From ef974cace12fb55fab07a011191e354d2e0f3f5f Mon Sep 17 00:00:00 2001 From: kr-arjun Date: Fri, 13 Jan 2023 22:37:31 -0600 Subject: [PATCH] Instance pool fix for handing driver_instance_pool_id This PR is to support the optional driver_instance_pool_id attribute that can be present with cluster pool specs. --- dbclient/ClustersClient.py | 26 +++++++++++++++++++++++++- migration_pipeline.py | 2 +- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/dbclient/ClustersClient.py b/dbclient/ClustersClient.py index c5ae7b69..9c4ccb9c 100644 --- a/dbclient/ClustersClient.py +++ b/dbclient/ClustersClient.py @@ -31,6 +31,7 @@ def __init__(self, configs, checkpoint_service): 'autotermination_minutes', 'enable_elastic_disk', 'instance_pool_id', + 'driver_instance_pool_id', 'policy_id', 'pinned_by_user_name', 'creator_user_name', @@ -58,7 +59,30 @@ def cleanup_cluster_pool_configs(self, cluster_json, cluster_creator, is_job_clu cluster_json.pop('enable_elastic_disk', None) # map old pool ids to new pool ids old_pool_id = cluster_json['instance_pool_id'] - cluster_json['instance_pool_id'] = pool_id_dict.get[old_pool_id] + new_pool_id = pool_id_dict.get(old_pool_id) + + if old_pool_id and new_pool_id: + cluster_json['instance_pool_id'] = new_pool_id + else: + logging.warning( + f"Instance pool mapped to src/dest :{old_pool_id}/{new_pool_id} is not available." + + "It may have been deleted; cluster will use defaults.") + cluster_json.pop("instance_pool_id") + + old_driver_pool_id = cluster_json.get('driver_instance_pool_id') + # driver_instance_pool_id is optional. if present, try to map new id. + if old_driver_pool_id: + new_driver_pool_id = pool_id_dict.get(old_driver_pool_id) + if new_driver_pool_id: + cluster_json['driver_instance_pool_id'] = new_driver_pool_id + else: + # if new driver pool for respective source driver pool id is not available, + # reset to default configs. + logging.warning( + f"Driver Instance pool mapped to src/dest :{old_driver_pool_id}/{new_driver_pool_id}" + + "is not available.It may have been deleted; cluster will use defaults.") + cluster_json.pop("instance_pool_id") + cluster_json.pop("driver_instance_pool_id") if not is_job_cluster: # add custom tag for original cluster creator for cost tracking diff --git a/migration_pipeline.py b/migration_pipeline.py index 2253fe84..8617e5ce 100644 --- a/migration_pipeline.py +++ b/migration_pipeline.py @@ -257,7 +257,7 @@ def add_dir_diff_task(name, dir_path, config, suffix=None, parents=None): # ClustersExportTask add_diff_task("validate-clusters", "clusters.log", DiffConfig( primary_key="cluster_name", - ignored_keys=["cluster_id", "policy_id", "instance_pool_id", "spark_version"], + ignored_keys=["cluster_id", "policy_id", "instance_pool_id", "driver_instance_pool_id", "spark_version"], children={ "aws_attributes": DiffConfig( ignored_keys=["zone_id"]