diff --git a/.github/workflows/taskDatabricks.yaml b/.github/workflows/taskDatabricks.yaml index 60615a12..fa7c8ddf 100644 --- a/.github/workflows/taskDatabricks.yaml +++ b/.github/workflows/taskDatabricks.yaml @@ -90,10 +90,10 @@ jobs: #################################/ ## Deploy Azure Infrastructure. #################################/ - - name: Deploy Azure Resources - run: ${{ inputs.SCRIPT_LANGUAGE }} infrastructure/databricks/databricks_utils/${{ inputs.SCRIPT_LANGUAGE }}/utils_create_azure_resources.py - env: - ENVIRONMENT: ${{ inputs.ENVIRONMENT }} + #- name: Deploy Azure Resources + # run: ${{ inputs.SCRIPT_LANGUAGE }} infrastructure/databricks/databricks_utils/${{ inputs.SCRIPT_LANGUAGE }}/utils_create_azure_resources.py + # env: + # ENVIRONMENT: ${{ inputs.ENVIRONMENT }} diff --git a/data_science/src_nyc_taxi/training/__init__.py b/data_science/src_nyc_taxi/training/__init__.py index 3abb4055..9998fee0 100644 --- a/data_science/src_nyc_taxi/training/__init__.py +++ b/data_science/src_nyc_taxi/training/__init__.py @@ -207,8 +207,8 @@ def set_mlflow( ): if namespace.env is not None: params = yaml.safe_load(pathlib.Path(namespace.env).read_text()) - experiment_name = params['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['EXPERIMENT_NAME'] - track_in_azure_ml = params['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['TRACK_IN_AZURE_ML'] + experiment_name = params['Global']['ExperimentName'] + track_in_azure_ml = params['Global']['AMLTraking'] if track_in_azure_ml: if track_in_azure_ml: diff --git a/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml b/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml index c3b59dbb..eec5c829 100644 --- a/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml +++ b/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml @@ -1,32 +1,48 @@ -ModelOne: - objective": "regression" - "metric": "rmse" - "num_leaves": 25 - "learning_rate": 0.2 - "bagging_fraction": 0.9 - "feature_fraction": 0.9 - "bagging_seed": 42 - "verbosity": -1 - "seed": 42 +Global: + ExperimentName: "nyc_taxi_dbx_job" + AMLTraking: False -ModelTwo: - objective": "regression" - "metric": "rmse" - "num_leaves": 27 - "learning_rate": 0.3 - "bagging_fraction": 0.9 - "feature_fraction": 0.9 - "bagging_seed": 42 - "verbosity": -1 - "seed": 42 - -ModelThree: - objective": "regression" - "metric": "rmse" - "num_leaves": 30 - "learning_rate": 0.4 - "bagging_fraction": 0.9 - "feature_fraction": 0.9 - "bagging_seed": 42 - "verbosity": -1 - "seed": 42 +ModelConfigs: [ + { + "ModelName": "ModelOne", + "ModelParams": { + "objective": "regression", + "metric": "rmse", + "num_leaves": 25, + "learning_rate": 0.2, + "bagging_fraction": 0.9, + "feature_fraction": 0.9, + "bagging_seed": 42, + "verbosity": -1, + "seed": 42 + } + }, + { + "ModelName": "ModelTwo", + "ModelParams": { + "objective": "regression", + "metric": "rmse", + "num_leaves": 27, + "learning_rate": 0.3, + "bagging_fraction": 0.9, + "feature_fraction": 0.9, + "bagging_seed": 42, + "verbosity": -1, + "seed": 42 + } + }, + { + "ModelName": "ModelThree", + "ModelParams": { + "objective": "regression", + "metric": "rmse", + "num_leaves": 30, + "learning_rate": 0.4, + "bagging_fraction": 0.9, + "feature_fraction": 0.9, + "bagging_seed": 42, + "verbosity": -1, + "seed": 42 + } + } +] diff --git a/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml b/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml index fda4aad6..b74b5ee2 100644 --- a/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml +++ b/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml @@ -2,7 +2,7 @@ custom: # Cluster configs for each environment default-cluster-spec: &default-cluster-spec - spark_version: '11.3.x-cpu-ml-scala2.12' + spark_version: '13.0.x-cpu-ml-scala2.12' node_type_id: 'Standard_DS3_v2' driver_node_type_id: 'Standard_DS3_v2' num_workers: 1