From 7cb3a9d646672072b58d205f4849709b5c44c54d Mon Sep 17 00:00:00 2001 From: jsj Date: Tue, 3 Dec 2024 21:54:12 +0100 Subject: [PATCH] refactor: remove sempy dependency --- pyproject.toml | 4 +- src/msfabricutils/core/__init__.py | 12 ++++++ src/msfabricutils/core/auth.py | 25 +++++++++++++ src/msfabricutils/core/generic.py | 37 +++++++++++++++++++ src/msfabricutils/core/lakehouse.py | 15 ++++++++ src/msfabricutils/core/workspace.py | 14 +++++++ src/msfabricutils/fabric_duckdb_connection.py | 36 ++++++------------ 7 files changed, 115 insertions(+), 28 deletions(-) create mode 100644 src/msfabricutils/core/__init__.py create mode 100644 src/msfabricutils/core/auth.py create mode 100644 src/msfabricutils/core/generic.py create mode 100644 src/msfabricutils/core/lakehouse.py create mode 100644 src/msfabricutils/core/workspace.py diff --git a/pyproject.toml b/pyproject.toml index 782760e..12208a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ license.file = "LICENSE" readme = "README.md" requires-python = ">=3.10" dependencies = [ + "azure-identity>=1.17.1", "duckdb>=1.1.3", "deltalake>=0.22.0", "sqlglot>=25.32.1", @@ -31,9 +32,6 @@ dependencies = [ Repository = "https://github.com/mrjsj/msfabricutils" [project.optional-dependencies] -sempy = [ - "semantic-link>=0.8.0", -] docs = [ "mkdocs==1.6.1", "mkdocs-material==9.5.47", diff --git a/src/msfabricutils/core/__init__.py b/src/msfabricutils/core/__init__.py new file mode 100644 index 0000000..7ac75f2 --- /dev/null +++ b/src/msfabricutils/core/__init__.py @@ -0,0 +1,12 @@ +from .workspace import get_workspaces, get_workspace +from .lakehouse import get_workspace_lakehouse_tables, get_workspace_lakehouses +from .auth import get_fabric_bearer_token, get_onelake_access_token + +__all__ = ( + "get_workspace", + "get_workspaces", + "get_workspace_lakehouses", + "get_workspace_lakehouse_tables", + "get_onelake_access_token", + "get_fabric_bearer_token" +) diff --git a/src/msfabricutils/core/auth.py b/src/msfabricutils/core/auth.py new file mode 100644 index 0000000..5c314ad --- /dev/null +++ b/src/msfabricutils/core/auth.py @@ -0,0 +1,25 @@ +from azure.identity import DefaultAzureCredential + + +def get_onelake_access_token(): + audience = "https://storage.azure.com" + try: + import notebookutils + + token = notebookutils.credentials.getToken(audience) + except ModuleNotFoundError: + token = DefaultAzureCredential().get_token(f"{audience}/.default").token + + return token + + +def get_fabric_bearer_token(): + audience = "https://analysis.windows.net/powerbi/api" + try: + import notebookutils + + token = notebookutils.credentials.getToken(audience) + except ModuleNotFoundError: + token = DefaultAzureCredential().get_token(f"{audience}/.default").token + + return token diff --git a/src/msfabricutils/core/generic.py b/src/msfabricutils/core/generic.py new file mode 100644 index 0000000..40357d3 --- /dev/null +++ b/src/msfabricutils/core/generic.py @@ -0,0 +1,37 @@ +import requests +from msfabricutils.core.auth import get_fabric_bearer_token + + +def get_paginated(endpoint: str, data_key: str) -> list[dict]: + base_url = "https://api.fabric.microsoft.com/v1" + token = get_fabric_bearer_token() + headers = {"Authorization": f"Bearer {token}"} + + responses = [] + continuation_token = None + while True: + params = {"continuationToken": continuation_token} if continuation_token else {} + + response = requests.get(f"{base_url}/{endpoint}", headers=headers, params=params) + response.raise_for_status() + data: dict = response.json() + + responses.extend(data.get(data_key)) + + continuation_token = data.get("continuationToken") + if not continuation_token: + break + + return responses + + +def get_page(endpoint: str) -> list[dict]: + base_url = "https://api.fabric.microsoft.com/v1" + token = get_fabric_bearer_token() + headers = {"Authorization": f"Bearer {token}"} + params = {} + + response = requests.get(f"{base_url}/{endpoint}", headers=headers, params=params) + response.raise_for_status() + + return response.json() diff --git a/src/msfabricutils/core/lakehouse.py b/src/msfabricutils/core/lakehouse.py new file mode 100644 index 0000000..361de4d --- /dev/null +++ b/src/msfabricutils/core/lakehouse.py @@ -0,0 +1,15 @@ +from msfabricutils.core.generic import get_paginated + + +def get_workspace_lakehouses(workspace_id: str): + endpoint = f"workspaces/{workspace_id}/lakehouses" + data_key = "value" + + return get_paginated(endpoint, data_key) + + +def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str): + endpoint = f"workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables" + data_key = "data" + + return get_paginated(endpoint, data_key) diff --git a/src/msfabricutils/core/workspace.py b/src/msfabricutils/core/workspace.py new file mode 100644 index 0000000..80e2f77 --- /dev/null +++ b/src/msfabricutils/core/workspace.py @@ -0,0 +1,14 @@ +from msfabricutils.core.generic import get_paginated, get_page + + +def get_workspaces(): + endpoint = "workspaces" + data_key = "value" + + return get_paginated(endpoint, data_key) + + +def get_workspace(workspace_id: str): + endpoint = f"workspaces/{workspace_id}" + + return get_page(endpoint) diff --git a/src/msfabricutils/fabric_duckdb_connection.py b/src/msfabricutils/fabric_duckdb_connection.py index 2f14177..673a8a2 100644 --- a/src/msfabricutils/fabric_duckdb_connection.py +++ b/src/msfabricutils/fabric_duckdb_connection.py @@ -1,6 +1,12 @@ from typing import Any from msfabricutils.helpers import _separator_indices +from msfabricutils.core import ( + get_workspace, + get_workspace_lakehouses, + get_workspace_lakehouse_tables, +) + import duckdb from deltalake import write_deltalake @@ -8,12 +14,6 @@ import sqlglot from sqlglot import exp -# Avoid import errors outside Fabric environments -try: - from sempy import fabric # noqa: F401 -except ModuleNotFoundError: - pass - class FabricDuckDBConnection: """A DuckDB connection wrapper for Microsoft Fabric Lakehouses. @@ -74,9 +74,8 @@ def __getattr__(self, name): if name == "sql" or name == "execute": def wrapper(*args, **kwargs): - original_method = getattr(self._connection, name) - + # Modify the query/parameters here before passing to the actual method modified_args, modified_kwargs = self._modify_input_query(args, kwargs) @@ -328,14 +327,7 @@ def _create_or_replace_fabric_lakehouse_secret(self, catalog_name: str) -> None: def _register_lakehouse_tables( self, workspace_name: str, workspace_id: str, lakehouse_id: str, lakehouse_name: str ) -> None: - from sempy import fabric - - client = fabric.FabricRestClient() - - response = client.get(f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables") - response.raise_for_status() - - tables = response.json()["data"] + tables = get_workspace_lakehouse_tables(workspace_id, lakehouse_id) if not tables: table_information = { @@ -416,17 +408,11 @@ def register_workspace_lakehouses(self, workspace_id: str, lakehouses: str | lis if isinstance(lakehouses, str): lakehouses = [lakehouses] - from sempy import fabric - - workspaces = fabric.list_workspaces() - - workspace_name = workspaces[workspaces.Id == workspace_id]["Name"].iat[0] + workspace_info = get_workspace(workspace_id) - client = fabric.FabricRestClient() + workspace_name = workspace_info["displayName"] - response = client.get(f"v1/workspaces/{workspace_id}/lakehouses") - response.raise_for_status() - lakehouse_properties = response.json()["value"] + lakehouse_properties = get_workspace_lakehouses(workspace_id) selected_lakehouses = [ lakehouse