Skip to content

Commit

Permalink
test: s3fs test using localstack
Browse files Browse the repository at this point in the history
  • Loading branch information
raylim committed Sep 11, 2023
1 parent c893e95 commit 696bcbb
Show file tree
Hide file tree
Showing 30 changed files with 733 additions and 188 deletions.
12 changes: 10 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,23 @@ jobs:
build:

runs-on: ubuntu-20.04

services:
localstack:
image: localstack/localstack
ports:
- "4566:4566"
- "4510-4559:4510-4559"

container:
image: mambaorg/micromamba:1.0.0
options: --user root

steps:

- name: Install required packages
run: |
apt-get update
apt-get install -y build-essential libgdal-dev liblapack-dev libblas-dev gfortran libgl1 git curl make
apt-get install -y build-essential libgdal-dev liblapack-dev libblas-dev gfortran libgl1 git curl make python3-pip
git config --system --add safe.directory *
- uses: actions/checkout@v3
Expand Down Expand Up @@ -52,6 +59,7 @@ jobs:
run: |
. ./venv/bin/activate
export LUNA_HOME=$PWD
export LOCALSTACK_ENDPOINT_URL=http://localstack:4566
pytest -v --capture=tee-sys --show-capture=all tests --cov-report=xml --junitxml=./luna-tests/results.xml
- name: Build mkdocs
Expand Down
17 changes: 17 additions & 0 deletions docker/localstack/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
version: "3.8"

services:
localstack:
container_name: "${LOCALSTACK_DOCKER_NAME-localstack_main}"
image: localstack/localstack
ports:
- "4566:4566" # LocalStack Gateway
- "4510-4559:4510-4559" # external services port range
environment:
- DEBUG=${DEBUG-}
- DOCKER_HOST=unix:///var/run/docker.sock
volumes:
- "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack"
- "/var/run/docker.sock:/var/run/docker.sock"
healthcheck:
test: "bash -c 'AWS_ACCESS_KEY_ID=fake AWS_SECRET_ACCESS_KEY=fake aws --endpoint-url=http://localhost:4566 s3 ls'"
113 changes: 57 additions & 56 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ appdirs = "^1.4.4"
loguru = "^0.6.0"
pyvips = "^2.2.1"
tiffslide = "^2.1.0"
s3fs = "^2022.10.0"
s3fs = "^2023.6.0"
pandera = {extras = ["io"], version = "^0.14.5"}
multimethod = "^1.9.1"
trimesh = "^3.22.0"
Expand Down
17 changes: 9 additions & 8 deletions src/luna/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def validate_dask_address(addr: str) -> bool:
The typical format for this will be something like 'tcp://192.168.0.37:8786',
but there could be a hostname instead of an IP address, and maybe some other
URL schemes are supported. This function will be used to check whether a
URL schemes are supported. This function will be used to check whether a
user-defined dask scheduler address is plausible, or obviously invalid.
"""
HOSTPORT_RE = re.compile(
Expand All @@ -51,7 +51,7 @@ def validate_dask_address(addr: str) -> bool:
[A-Za-z][A-Za-z0-9.-]*[A-Za-z0-9] |
[A-Za-z])
: (?P<port>\d+)$""",
re.VERBOSE
re.VERBOSE,
)
return bool(HOSTPORT_RE.match(addr))

Expand Down Expand Up @@ -88,7 +88,6 @@ def wrapper(*args, **kwargs):
args_dict = _get_args_dict(func, args, kwargs)
new_args_dict = args_dict.copy()

filesystem = None
tmp_dir_dest = []
for key, write_mode in dir_key_write_mode.items():
if not args_dict[key]:
Expand All @@ -99,11 +98,11 @@ def wrapper(*args, **kwargs):
fs, dir = fsspec.core.url_to_fs(
args_dict[key], **args_dict.get(storage_options_key, {})
)
if fs.protocol != "file" and 'cache' not in fs.protocol:
if fs.protocol != "file" and "cache" not in fs.protocol:
new_args_dict[storage_options_key] = {"auto_mkdir": True}
tmp_dir = tempfile.TemporaryDirectory()
new_args_dict[key] = tmp_dir.name
tmp_dir_dest.append((tmp_dir, dir))
tmp_dir_dest.append((tmp_dir, dir, fs))

result = None
with ExitStack() as stack:
Expand All @@ -116,7 +115,7 @@ def wrapper(*args, **kwargs):
fs, path = fsspec.core.url_to_fs(
args_dict[key], **args_dict.get(storage_options_key, {})
)
if 'cache' not in fs.protocol:
if "cache" not in fs.protocol:
simplecache_fs = fsspec.filesystem("simplecache", fs=fs)

of = simplecache_fs.open(path, write_mode)
Expand All @@ -125,8 +124,8 @@ def wrapper(*args, **kwargs):

result = func(**new_args_dict)

for tmp_dir, dest in tmp_dir_dest:
copy_files(tmp_dir.name, dest, destination_filesystem=filesystem)
for tmp_dir, dest, fs in tmp_dir_dest:
copy_files(tmp_dir.name, dest, destination_filesystem=fs)

return result

Expand Down Expand Up @@ -204,6 +203,7 @@ def rebase_schema_numeric(df):

df[col] = df[col].astype(float, errors="ignore")


def rebase_schema_mixed(df):
"""
Tries to convert all columns with mixed types to strings.
Expand All @@ -220,6 +220,7 @@ def rebase_schema_mixed(df):
if df[col].dtype == list:
df[col] = df[col].astype(str)


def generate_uuid_binary(content, prefix):
"""
Returns hash of the binary, preceded by the prefix.
Expand Down
Loading

0 comments on commit 696bcbb

Please sign in to comment.