From 6f4af0beede88eb993bf887cec6e53bf2db1afa2 Mon Sep 17 00:00:00 2001 From: ciaran28 Date: Thu, 8 Jun 2023 13:21:59 +0100 Subject: [PATCH 1/4] Developing DS Code --- .../src_nyc_taxi/training/__init__.py | 4 +- .../databricks_pipelines/hyper_params.yaml | 78 +++++++++++-------- 2 files changed, 49 insertions(+), 33 deletions(-) diff --git a/data_science/src_nyc_taxi/training/__init__.py b/data_science/src_nyc_taxi/training/__init__.py index 3abb4055..9998fee0 100644 --- a/data_science/src_nyc_taxi/training/__init__.py +++ b/data_science/src_nyc_taxi/training/__init__.py @@ -207,8 +207,8 @@ def set_mlflow( ): if namespace.env is not None: params = yaml.safe_load(pathlib.Path(namespace.env).read_text()) - experiment_name = params['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['EXPERIMENT_NAME'] - track_in_azure_ml = params['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['TRACK_IN_AZURE_ML'] + experiment_name = params['Global']['ExperimentName'] + track_in_azure_ml = params['Global']['AMLTraking'] if track_in_azure_ml: if track_in_azure_ml: diff --git a/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml b/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml index c3b59dbb..eec5c829 100644 --- a/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml +++ b/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml @@ -1,32 +1,48 @@ -ModelOne: - objective": "regression" - "metric": "rmse" - "num_leaves": 25 - "learning_rate": 0.2 - "bagging_fraction": 0.9 - "feature_fraction": 0.9 - "bagging_seed": 42 - "verbosity": -1 - "seed": 42 +Global: + ExperimentName: "nyc_taxi_dbx_job" + AMLTraking: False -ModelTwo: - objective": "regression" - "metric": "rmse" - "num_leaves": 27 - "learning_rate": 0.3 - "bagging_fraction": 0.9 - "feature_fraction": 0.9 - "bagging_seed": 42 - "verbosity": -1 - "seed": 42 - -ModelThree: - objective": "regression" - "metric": "rmse" - "num_leaves": 30 - "learning_rate": 0.4 - "bagging_fraction": 0.9 - "feature_fraction": 0.9 - "bagging_seed": 42 - "verbosity": -1 - "seed": 42 +ModelConfigs: [ + { + "ModelName": "ModelOne", + "ModelParams": { + "objective": "regression", + "metric": "rmse", + "num_leaves": 25, + "learning_rate": 0.2, + "bagging_fraction": 0.9, + "feature_fraction": 0.9, + "bagging_seed": 42, + "verbosity": -1, + "seed": 42 + } + }, + { + "ModelName": "ModelTwo", + "ModelParams": { + "objective": "regression", + "metric": "rmse", + "num_leaves": 27, + "learning_rate": 0.3, + "bagging_fraction": 0.9, + "feature_fraction": 0.9, + "bagging_seed": 42, + "verbosity": -1, + "seed": 42 + } + }, + { + "ModelName": "ModelThree", + "ModelParams": { + "objective": "regression", + "metric": "rmse", + "num_leaves": 30, + "learning_rate": 0.4, + "bagging_fraction": 0.9, + "feature_fraction": 0.9, + "bagging_seed": 42, + "verbosity": -1, + "seed": 42 + } + } +] From 19ef6126a92977e26b1d1db15af4e75eeac54785 Mon Sep 17 00:00:00 2001 From: ciaran28 Date: Thu, 8 Jun 2023 13:22:46 +0100 Subject: [PATCH 2/4] Developing DS Code --- .github/workflows/taskDatabricks.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/taskDatabricks.yaml b/.github/workflows/taskDatabricks.yaml index 60615a12..fa7c8ddf 100644 --- a/.github/workflows/taskDatabricks.yaml +++ b/.github/workflows/taskDatabricks.yaml @@ -90,10 +90,10 @@ jobs: #################################/ ## Deploy Azure Infrastructure. #################################/ - - name: Deploy Azure Resources - run: ${{ inputs.SCRIPT_LANGUAGE }} infrastructure/databricks/databricks_utils/${{ inputs.SCRIPT_LANGUAGE }}/utils_create_azure_resources.py - env: - ENVIRONMENT: ${{ inputs.ENVIRONMENT }} + #- name: Deploy Azure Resources + # run: ${{ inputs.SCRIPT_LANGUAGE }} infrastructure/databricks/databricks_utils/${{ inputs.SCRIPT_LANGUAGE }}/utils_create_azure_resources.py + # env: + # ENVIRONMENT: ${{ inputs.ENVIRONMENT }} From 48e84d12cea2668bb042e8b999d0359d37c04c12 Mon Sep 17 00:00:00 2001 From: ciaran28 Date: Thu, 8 Jun 2023 13:30:45 +0100 Subject: [PATCH 3/4] Developing DS Code --- mlOps/nyc_taxi/databricks_pipelines/workflow.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml b/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml index fda4aad6..2cec08af 100644 --- a/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml +++ b/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml @@ -2,7 +2,7 @@ custom: # Cluster configs for each environment default-cluster-spec: &default-cluster-spec - spark_version: '11.3.x-cpu-ml-scala2.12' + spark_version: '13.0.x-cpu-ml-scala2.1' node_type_id: 'Standard_DS3_v2' driver_node_type_id: 'Standard_DS3_v2' num_workers: 1 From bb013216ac487bd970f8681f7b61adb1f1dd4232 Mon Sep 17 00:00:00 2001 From: ciaran28 Date: Thu, 8 Jun 2023 15:47:03 +0100 Subject: [PATCH 4/4] Developing DS Code --- mlOps/nyc_taxi/databricks_pipelines/workflow.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml b/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml index 2cec08af..b74b5ee2 100644 --- a/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml +++ b/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml @@ -2,7 +2,7 @@ custom: # Cluster configs for each environment default-cluster-spec: &default-cluster-spec - spark_version: '13.0.x-cpu-ml-scala2.1' + spark_version: '13.0.x-cpu-ml-scala2.12' node_type_id: 'Standard_DS3_v2' driver_node_type_id: 'Standard_DS3_v2' num_workers: 1