autogluon · suzhoum · Nov 14, 2023 · Nov 10, 2023 · Nov 10, 2023 · Nov 12, 2023
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,10 +41,10 @@ dependencies = [
     "constructs >=10.0.0,<10.1.289",
     "fsspec >=2023.5.0,<=2023.6.0",
     "matplotlib >=3.4,<3.7",
-    "pandas >=1.2.5,<2.0",
+    "pandas >=1.4.1,<2.2.0",
     "pydantic>=1.10.4,<2.0", # https://github.com/ray-project/ray/issues/36990
     "pyyaml >=5.4,<=6.0",
-    "ray[default] >=2.3.0,<2.4.0",
+    "ray[default] >=2.6.3,<2.7",
     "s3fs >=2023.5.0,<=2023.6.0",
     "tqdm >=4.64.0,<=4.65.0",
     "typer >=0.9.0,<1.0.0",
@@ -108,4 +108,3 @@ xfail_strict = true
 
 [tool.setuptools_scm]
 write_to = "src/autogluon/bench/version.py"
-fallback_version = "0.0.1.dev0"
diff --git a/src/autogluon/bench/Dockerfile b/src/autogluon/bench/Dockerfile
@@ -3,7 +3,6 @@ FROM $AG_BENCH_BASE_IMAGE
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV RUNNING_IN_DOCKER=true
-ENV AGBENCH_BASE=src/autogluon/bench/
 
 # Install essential packages and Python 3.9
 RUN apt-get update && \
@@ -25,37 +24,46 @@ RUN apt-get install -y python3-pip unzip curl git pciutils && \
 
 # Application-specific steps
 ARG AG_BENCH_VERSION
+ARG AG_BENCH_BASE_DIR
 ARG CDK_DEPLOY_REGION
 ARG FRAMEWORK_PATH
 ARG GIT_URI
 ARG GIT_BRANCH
 ARG AMLB_FRAMEWORK
 ARG AMLB_USER_DIR
 
+ENV AG_BENCH_BASE_DIR=${AG_BENCH_BASE_DIR}
+
 WORKDIR /app/
+ENV VENV_BASE_DIR=/app/venv
+
+# Copying files for installing autogluon.bench from source, user needs to at the root of autogluon.bench repo as we need pyproject.toml
+COPY . .
+COPY ${AG_BENCH_BASE_DIR}/entrypoint.sh .
+COPY ${AG_BENCH_BASE_DIR}/custom_configs custom_configs/
 
-# Copying necessary files for autogluon-bench package
-COPY . /app/
-COPY ${AGBENCH_BASE}entrypoint.sh /app/
-COPY ${AGBENCH_BASE}custom_configs /app/custom_configs/
+ENV VENV_PATH="$VENV_BASE_DIR/.venv"
+RUN python3 -m venv $VENV_PATH
+ENV PATH="$VENV_PATH/bin:$PATH"
+RUN python3 -m pip install --upgrade pip
 
 # check if autogluon.bench version contains "dev" tag
 RUN if echo "$AG_BENCH_VERSION" | grep -q "dev"; then \ 
         # install from local source
-        pip install /app/; \
+        python3 -m pip install .; \
     else \
-        pip install autogluon.bench==$AG_BENCH_VERSION; \
+        python3 -m pip install autogluon.bench==$AG_BENCH_VERSION; \
     fi
 
 RUN chmod +x entrypoint.sh \
     && if echo "$FRAMEWORK_PATH" | grep -q -E "tabular|timeseries"; then \
         if [ -n "$AMLB_USER_DIR" ]; then \
-            bash ${AGBENCH_BASE}${FRAMEWORK_PATH}setup.sh $GIT_URI $GIT_BRANCH "/home" $AMLB_FRAMEWORK $AMLB_USER_DIR; \
+            bash ${AG_BENCH_BASE_DIR}/${FRAMEWORK_PATH}/setup.sh $GIT_URI $GIT_BRANCH $VENV_BASE_DIR $AMLB_FRAMEWORK $AMLB_USER_DIR; \
         else \
-            bash ${AGBENCH_BASE}${FRAMEWORK_PATH}setup.sh $GIT_URI $GIT_BRANCH "/home" $AMLB_FRAMEWORK; \
+            bash ${AG_BENCH_BASE_DIR}/${FRAMEWORK_PATH}/setup.sh $GIT_URI $GIT_BRANCH $VENV_BASE_DIR $AMLB_FRAMEWORK; \
         fi; \
     elif echo "$FRAMEWORK_PATH" | grep -q "multimodal"; then \
-        bash ${AGBENCH_BASE}${FRAMEWORK_PATH}setup.sh $GIT_URI $GIT_BRANCH "/home" $AG_BENCH_VERSION; \
+        bash ${AG_BENCH_BASE_DIR}/${FRAMEWORK_PATH}/setup.sh $GIT_URI $GIT_BRANCH $VENV_BASE_DIR $AG_BENCH_VERSION; \
     fi \
     && echo "CDK_DEPLOY_REGION=$CDK_DEPLOY_REGION" >> /etc/environment
 

diff --git a/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py b/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py
@@ -197,10 +197,12 @@ def process_benchmark_runs(module_configs: dict, amlb_benchmark_search_dirs: lis
         module_configs["fold_to_run"].setdefault(benchmark, {})
         for task in module_configs["amlb_task"][benchmark]:
             if module_configs["fold_to_run"][benchmark].get(task):
-                tasks = module_configs["fold_to_run"][benchmark][task]
-                module_configs["fold_to_run"][benchmark][task] = [t for t in tasks if t < default_max_folds]
+                folds = module_configs["fold_to_run"][benchmark][task]
             else:
-                module_configs["fold_to_run"][benchmark][task] = amlb_task_folds[benchmark][task]
+                folds = amlb_task_folds[benchmark][task]
+            module_configs["fold_to_run"][benchmark][task] = [f for f in folds if f < default_max_folds]
+            if not module_configs["fold_to_run"][benchmark][task]:
+                del module_configs["fold_to_run"][benchmark][task]
 
 
 def get_cloudwatch_logs_url(region: str, job_id: str, log_group_name: str = "aws/batch/job"):
@@ -219,10 +221,14 @@ def generate_config_combinations(config, metrics_bucket, batch_job_queue, batch_
     else:
         raise ValueError("Invalid module. Choose either 'tabular', 'timeseries', or 'multimodal'.")
 
+    if len(job_configs) == 0:
+        return {parent_job_id: "No job submitted"}
+
     benchmark_name = config["benchmark_name"]
     config_s3_path = upload_config(config_list=job_configs, bucket=metrics_bucket, benchmark_name=benchmark_name)
     env = [{"name": "config_file", "value": config_s3_path}]
-    job_name = f"{benchmark_name}-array-job"
+    job_type = "array" if len(job_configs) > 1 else "single"
+    job_name = f"{benchmark_name}-{config['module']}-{job_type}-job"
     parent_job_id = submit_batch_job(
         env=env,
         job_name=job_name,

diff --git a/src/autogluon/bench/cloud/aws/batch_stack/stack.py b/src/autogluon/bench/cloud/aws/batch_stack/stack.py
@@ -37,10 +37,15 @@ def find_project_root_or_fallback(start_dir: str, root_identifier: str = "pyproj
     return start_dir
 
 
+# when pip installed, project_root is at $site_package_path/src/autogluon/bench agbench_base_dir should be ./
+# when installed from source, project_root is at ./, agbench_base_dir should be ./src/autogluon/bench
 with importlib.resources.path("autogluon.bench", "Dockerfile") as file_path:
     docker_base_dir = os.path.dirname(file_path)
     project_root = find_project_root_or_fallback(docker_base_dir)
     docker_path = os.path.relpath(file_path, project_root)
+    agbench_base_dir = os.path.dirname(docker_path)
+    if agbench_base_dir == "":
+        agbench_base_dir = "."
 
 with importlib.resources.path("autogluon.bench.cloud.aws.batch_stack.lambdas", "lambda_function.py") as file_path:
     lambda_script_dir = os.path.dirname(file_path)
@@ -174,6 +179,8 @@ def __init__(self, scope: Construct, id: str, static_stack: StaticResourceStack,
         # Currently CDK can only push to the default repo aws-cdk/assets
         # https://github.com/aws/aws-cdk/issues/12597
         # TODO: use https://github.com/cdklabs/cdk-docker-image-deployment
+
+        logger.info(f"Building Dockerfile at {docker_path} with context at {project_root}")
         docker_image_asset = ecr_assets.DockerImageAsset(
             self,
             f"{prefix}-ecr-docker-image-asset",
@@ -182,6 +189,7 @@ def __init__(self, scope: Construct, id: str, static_stack: StaticResourceStack,
             follow_symlinks=core.SymlinkFollowMode.ALWAYS,
             build_args={
                 "AG_BENCH_BASE_IMAGE": os.environ["AG_BENCH_BASE_IMAGE"],
+                "AG_BENCH_BASE_DIR": agbench_base_dir,
                 "AG_BENCH_VERSION": os.getenv("AG_BENCH_VERSION", "latest"),
                 "CDK_DEPLOY_REGION": os.environ["CDK_DEPLOY_REGION"],
                 "FRAMEWORK_PATH": os.environ["FRAMEWORK_PATH"],

diff --git a/src/autogluon/bench/entrypoint.sh b/src/autogluon/bench/entrypoint.sh
@@ -1,6 +1,5 @@
 #!/bin/bash
 
 echo "Running hardware utilization monitoring in the background..."
-${AGBENCH_BASE}utils/hardware_utilization.sh &
-
+${AG_BENCH_BASE_DIR}/utils/hardware_utilization.sh &
 agbench run $config_file --skip-setup
diff --git a/src/autogluon/bench/frameworks/multimodal/exec.py b/src/autogluon/bench/frameworks/multimodal/exec.py
@@ -12,13 +12,7 @@
 from autogluon.core.metrics import make_scorer
 from autogluon.multimodal import MultiModalPredictor
 from autogluon.multimodal import __version__ as ag_version
-from autogluon.multimodal.constants import (
-    FEW_SHOT_CLASSIFICATION,
-    IMAGE_SIMILARITY,
-    IMAGE_TEXT_SIMILARITY,
-    OBJECT_DETECTION,
-    TEXT_SIMILARITY,
-)
+from autogluon.multimodal.constants import IMAGE_SIMILARITY, IMAGE_TEXT_SIMILARITY, OBJECT_DETECTION, TEXT_SIMILARITY
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)

diff --git a/src/autogluon/bench/frameworks/multimodal/multimodal_benchmark.py b/src/autogluon/bench/frameworks/multimodal/multimodal_benchmark.py
@@ -40,7 +40,7 @@ def setup(
         Returns:
             None
         """
-        setup_script_path = os.path.abspath(os.path.dirname(__file__)) + "/setup.sh"
+        setup_script_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "setup.sh")
         command = [setup_script_path, git_uri, git_branch, self.benchmark_dir, agbench_version]
         result = subprocess.run(command)
         if result.returncode != 0:
@@ -88,13 +88,14 @@ def run(
         Returns:
             None
         """
-        if os.environ.get("RUNNING_IN_DOCKER", False):
-            venv_base_dir = "/home/"
+        if os.environ.get("RUNNING_IN_DOCKER", "false") == "true":
+            venv_base_dir = os.environ["VENV_BASE_DIR"]
         else:
             venv_base_dir = self.benchmark_dir
         PY_EXC_PATH = os.path.join(venv_base_dir, ".venv/bin/python")
 
-        exec_path = os.path.abspath(os.path.dirname(__file__)) + "/exec.py"
+        exec_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "exec.py")
+        logger.info(f"Executing {exec_path} under {PY_EXC_PATH}")
         command = [
             PY_EXC_PATH,
             exec_path,

diff --git a/src/autogluon/bench/frameworks/multimodal/setup.sh b/src/autogluon/bench/frameworks/multimodal/setup.sh
@@ -24,9 +24,9 @@ python3 -m pip install --upgrade setuptools wheel
 
 if echo "$AG_BENCH_VERSION" | grep -q "dev"; then
   # install from local source or docker
-  pip install .
+  python3 -m pip install .
 else
-  pip install autogluon.bench==$AG_BENCH_VERSION
+  python3 -m pip install autogluon.bench==$AG_BENCH_VERSION
 fi
 
 cd $venv_base_dir/$repo_name

diff --git a/src/autogluon/bench/frameworks/tabular/exec.sh b/src/autogluon/bench/frameworks/tabular/exec.sh
@@ -30,7 +30,6 @@ if [ -n "$fold" ]; then
 fi
 
 if [ -n "$user_dir" ]; then
-    cp -r $user_dir $venv_base_dir
     amlb_args+=" -u $user_dir"
 fi
 

diff --git a/src/autogluon/bench/frameworks/tabular/tabular_benchmark.py b/src/autogluon/bench/frameworks/tabular/tabular_benchmark.py
@@ -2,7 +2,6 @@
 import os
 import subprocess
 import sys
-from typing import List
 
 from autogluon.bench.frameworks.benchmark import Benchmark
 
@@ -52,12 +51,12 @@ def run(
             None
         """
 
-        if os.environ.get("RUNNING_IN_DOCKER", False):
-            venv_base_dir = "/home/"
+        if os.environ.get("RUNNING_IN_DOCKER", "false") == "true":
+            venv_base_dir = os.environ["VENV_BASE_DIR"]
         else:
             venv_base_dir = self.benchmark_dir
 
-        exec_script_path = os.path.abspath(os.path.dirname(__file__)) + "/exec.sh"
+        exec_script_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "exec.sh")
         command = [
             exec_script_path,
             framework,

diff --git a/src/autogluon/bench/frameworks/timeseries/exec.sh b/src/autogluon/bench/frameworks/timeseries/exec.sh
@@ -30,7 +30,6 @@ if [ -n "$fold" ]; then
 fi
 
 if [ -n "$user_dir" ]; then
-    cp -r $user_dir $venv_base_dir
     amlb_args+=" -u $user_dir"
 fi
 

diff --git a/src/autogluon/bench/frameworks/timeseries/timeseries_benchmark.py b/src/autogluon/bench/frameworks/timeseries/timeseries_benchmark.py
@@ -50,8 +50,8 @@ def run(
         Returns:
             None
         """
-        if os.environ.get("RUNNING_IN_DOCKER", False):
-            venv_base_dir = "/home/"
+        if os.environ.get("RUNNING_IN_DOCKER", "false") == "true":
+            venv_base_dir = os.environ["VENV_BASE_DIR"]
         else:
             venv_base_dir = self.benchmark_dir
-Original file line number
+Diff line change
@@ Expand Up / @@ -30,7 +30,6 @@ if [ -n "$fold" ]; then @@
     fi
     if [ -n "$user_dir" ]; then
-        cp -r $user_dir $venv_base_dir
         amlb_args+=" -u $user_dir"
     fi
@@ Expand Down @@