From f5af7db6b6965125f29d6c9f703d3389bd5f9306 Mon Sep 17 00:00:00 2001 From: Muhammad Samy Date: Thu, 17 Oct 2024 10:23:34 +0300 Subject: [PATCH] update generate_config_file update the function to accept workspace id where lakehouses exists and workspace id where warehouses exits --- README.md | 72 +----------- pyproject.toml | 2 +- updatefromgit/gitfunctions.py | 206 ++++++++++++++++++---------------- 3 files changed, 111 insertions(+), 169 deletions(-) diff --git a/README.md b/README.md index 6f96c59..ef6ef77 100644 --- a/README.md +++ b/README.md @@ -68,74 +68,4 @@ workspace_head = get_git_status(WORKSPACE_ID, access_token) commit_all_items_to_git(WORKSPACE_ID, workspace_head, access_token) logger.command("Program Completed") -``` - -### To edit pipeline connection while migrating the code between dev --> Uat --> Prod - -you can use `update_linked_services` -the following refer to the full code of update from git - -```yml -trigger: - branches: - include: - - dev # Change this to your development branch if different - -pr: - branches: - include: - - uat # Trigger on PRs to UAT - -pool: - vmImage: 'ubuntu-latest' - -steps: -- task: AzureKeyVault@2 - displayName: 'Azure Key Vault: da-dev-uaen-01-kv' - inputs: - azureSubscription: FabricSPNConnection - KeyVaultName: 'da-dev-uaen-01-kv' - SecretsFilter: 'CLIENTID, TENANTID, email, password, CLIENTSECRET' - RunAsPreJob: true - -- task: UsePythonVersion@0 - inputs: - versionSpec: '3.x' - addToPath: true - -- bash: 'python -m pip install updatefromgit --no-cache-dir --upgrade' - displayName: 'Bash Script' - -- script: | - python3 -c "from update import update_linked_services; update_linked_services('$(Build.SourcesDirectory)/linkedservice-config.json', '$(Build.SourcesDirectory)')" - displayName: 'Run Python Script to Modify JSON Files' - -- task: Bash@3 - inputs: - targetType: 'inline' - script: | - # Set up Git configuration for committing changes - git config --global user.email "your-email@example.com" - git config --global user.name "Your Name" - - # Checkout the UAT branch - git checkout uat - - # Stage all changes - git add "$BUILD_SOURCESDIRECTORY/**/*.json" # Adjust this pattern to your needs - - # Commit changes - git commit -m "Automated update of pipeline-content.json files from PR" - - # Push changes to the UAT branch - git push https://$(System.AccessToken)@dev.azure.com/your_org/your_project/_git/your_repo uat - -- task: PythonScript@0 - displayName: 'Run a Python script' - inputs: - scriptSource: 'filePath' - scriptPath: '$(Build.SourcesDirectory)/update.py' #look to examples - arguments: '--WORKSPACE_ID $(WORKSPACE_ID) --CLIENT_ID $(CLIENTID) --TENANT_ID $(TENANTID) --USER_NAME $(email) --PASSWORD $(password) --CLIENT_SECRET $(CLIENTSECRET)' - workingDirectory: '$(Build.SourcesDirectory)' - -``` +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 55c0d1d..e5bce6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "updatefromgit" -version = "2.0.4" +version = "2.0.5" dependencies = ["msal", "azlog"] requires-python = ">=3.6" authors = [{ name = "Muhammad Samy", email = "muhssamy@gmail.com" }] diff --git a/updatefromgit/gitfunctions.py b/updatefromgit/gitfunctions.py index abc60f8..04d0a4c 100644 --- a/updatefromgit/gitfunctions.py +++ b/updatefromgit/gitfunctions.py @@ -291,16 +291,53 @@ def commit_all_items_to_git(workspace_id: str, workspace_head: str, token): os._exit(1) -def generate_config_file(workspace_id: str, token: str) -> None: +def generate_lakehouse_config(workspace_id: str, token: str) -> dict: """ - Fetch workspace items, filter by 'Warehouse' and 'Lakehouse', retrieve warehouse details, - and create a JSON file with formatted information for each relevant item. + Fetch only 'Lakehouse' items from the workspace and return a structured dictionary with relevant data. + + Parameters + ---------- + workspace_id : str + The ID of the workspace for which the items need to be fetched. + token : str + The authentication token to access the Microsoft Fabric API. + + Returns + ------- + dict + A dictionary containing Lakehouse items. + """ + + def fetch_workspace_items() -> list: + url = f"{FABRIC_API_URL}/workspaces/{workspace_id}/items" + headers = {"Authorization": f"Bearer {token}"} + response = requests.get(url, headers=headers, timeout=120) + response.raise_for_status() + + items = response.json().get("value", []) + return [item for item in items if item["type"] == "Lakehouse"] + + # Prepare data structure + lakehouse_data = {} + filtered_items = fetch_workspace_items() + + for item in filtered_items: + lh_key = f"{item['displayName']}" + lakehouse_data[lh_key] = { + "typeProperties": { + "artifactId": item["id"], + "workspaceId": workspace_id, + "rootFolder": "Tables", + }, + "name": item["displayName"], + } + + return lakehouse_data - This function interacts with the Microsoft Fabric API to: - 1. Fetch all items from a given workspace. - 2. Filter for 'Warehouse' and 'Lakehouse' types. - 3. Retrieve detailed information for each warehouse. - 4. Structure the data and save it into a JSON file. + +def generate_warehouse_config(workspace_id: str, token: str) -> dict: + """ + Fetch 'Warehouse' items from the workspace, retrieve their details, and return a structured dictionary. Parameters ---------- @@ -311,52 +348,20 @@ def generate_config_file(workspace_id: str, token: str) -> None: Returns ------- - None - This function saves the formatted data to a JSON file named - `workspace__data.json`. + dict + A dictionary containing Warehouse items. """ def fetch_workspace_items() -> list: - """ - Fetch items from the Microsoft Fabric workspace. - - This function sends a GET request to the Microsoft Fabric API to retrieve all - items within the specified workspace and filters the results to only include - 'Warehouse' and 'Lakehouse' types. - - Returns - ------- - list - A list of dictionaries containing information about the filtered workspace items. - """ url = f"{FABRIC_API_URL}/workspaces/{workspace_id}/items" headers = {"Authorization": f"Bearer {token}"} response = requests.get(url, headers=headers, timeout=120) response.raise_for_status() items = response.json().get("value", []) - filtered_items = [ - item for item in items if item["type"] in ["Warehouse", "Lakehouse"] - ] - return filtered_items + return [item for item in items if item["type"] == "Warehouse"] def fetch_warehouse_details(warehouse_id: str) -> dict: - """ - Fetch detailed information about a specific warehouse. - - This function sends a GET request to the Microsoft Fabric API to retrieve detailed - information about a warehouse, including its connection string and metadata. - - Parameters - ---------- - warehouse_id : str - The ID of the warehouse for which details are required. - - Returns - ------- - dict - A dictionary containing detailed information about the warehouse. - """ url = f"{FABRIC_API_URL}/workspaces/{workspace_id}/warehouses/{warehouse_id}" headers = {"Authorization": f"Bearer {token}"} response = requests.get(url, headers=headers, timeout=120) @@ -364,58 +369,65 @@ def fetch_warehouse_details(warehouse_id: str) -> dict: return response.json() - def create_json(filtered_items: list) -> None: - """ - Create a JSON file from the filtered items (warehouses and lakehouses). - - This function generates a structured JSON object from the filtered items, - retrieves warehouse details, and then saves the data to a file. - - Parameters - ---------- - filtered_items : list - A list of filtered workspace items containing warehouse and lakehouse details. - - Returns - ------- - None - The function saves the data to a JSON file named `workspace__data.json`. - """ - result = {} - - for item in filtered_items: - if item["type"] == "Warehouse": - warehouse = fetch_warehouse_details(item["id"]) - wh_key = f"{item['displayName']}" - result[wh_key] = { - "typeProperties": { - "artifactId": warehouse["id"], - "endpoint": warehouse["properties"]["connectionString"], - "workspaceId": workspace_id, - }, - "objectId": warehouse["id"], - "name": warehouse["displayName"], - } - elif item["type"] == "Lakehouse": - lh_key = f"{item['displayName']}" - result[lh_key] = { - "typeProperties": { - "artifactId": item["id"], - "workspaceId": workspace_id, - "rootFolder": "Tables", - }, - "name": item["displayName"], - } - # Delete the file if it exists - filename = "linkedservice-config.json" - if os.path.exists(filename): - os.remove(filename) - - with open("linkedservice-config.json", "w") as outfile: - json.dump(result, outfile, indent=4) - - print("linkedservice-config file created: linkedservice-config.json") - - # Run the workflow + # Prepare data structure + warehouse_data = {} filtered_items = fetch_workspace_items() - create_json(filtered_items) + + for item in filtered_items: + warehouse = fetch_warehouse_details(item["id"]) + wh_key = f"{item['displayName']}" + warehouse_data[wh_key] = { + "typeProperties": { + "artifactId": warehouse["id"], + "endpoint": warehouse["properties"]["connectionString"], + "workspaceId": workspace_id, + }, + "objectId": warehouse["id"], + "name": warehouse["displayName"], + } + + return warehouse_data + + +def generate_config_file( + lakehouse_workspace_id: str, warehouse_workspace_id: str, token: str +) -> None: + """ + Run `generate_lakehouse_config` for the Lakehouse workspace and `generate_warehouse_config` + for the Warehouse workspace, then merge the results into a single JSON object without + any outer keys, such as 'warehouse' or 'lakehouse'. + + Parameters + ---------- + lakehouse_workspace_id : str + The workspace ID containing only 'Lakehouse' items. + warehouse_workspace_id : str + The workspace ID containing only 'Warehouse' items. + token : str + The authentication token. + + Returns + ------- + None + Saves the merged result to `linkedservice-config.json`. + """ + + # Run the function for Lakehouse items + lakehouse_data = generate_lakehouse_config(lakehouse_workspace_id, token) + + # Run the function for Warehouse items + warehouse_data = generate_warehouse_config(warehouse_workspace_id, token) + + # Merge the two dictionaries into one (no outer keys like 'warehouse' or 'lakehouse') + merged_result = {**warehouse_data, **lakehouse_data} + + # Delete the file if it exists + filename = "linkedservice-config.json" + if os.path.exists(filename): + os.remove(filename) + + # Save the merged result to the JSON file + with open(filename, "w") as outfile: + json.dump(merged_result, outfile, indent=4) + + logger.command(f"Linked service config file created: {filename}")