Skip to content

Commit

Permalink
Merge pull request #21 from mrjsj/refactor/removed-sempy-dependency
Browse files Browse the repository at this point in the history
refactor: remove sempy dependency
  • Loading branch information
mrjsj authored Dec 3, 2024
2 parents 96b26fc + 7cb3a9d commit 999e0ad
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 28 deletions.
4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ license.file = "LICENSE"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"azure-identity>=1.17.1",
"duckdb>=1.1.3",
"deltalake>=0.22.0",
"sqlglot>=25.32.1",
Expand All @@ -31,9 +32,6 @@ dependencies = [
Repository = "https://github.com/mrjsj/msfabricutils"

[project.optional-dependencies]
sempy = [
"semantic-link>=0.8.0",
]
docs = [
"mkdocs==1.6.1",
"mkdocs-material==9.5.47",
Expand Down
12 changes: 12 additions & 0 deletions src/msfabricutils/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from .workspace import get_workspaces, get_workspace
from .lakehouse import get_workspace_lakehouse_tables, get_workspace_lakehouses
from .auth import get_fabric_bearer_token, get_onelake_access_token

__all__ = (
"get_workspace",
"get_workspaces",
"get_workspace_lakehouses",
"get_workspace_lakehouse_tables",
"get_onelake_access_token",
"get_fabric_bearer_token"
)
25 changes: 25 additions & 0 deletions src/msfabricutils/core/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from azure.identity import DefaultAzureCredential


def get_onelake_access_token():
audience = "https://storage.azure.com"
try:
import notebookutils

token = notebookutils.credentials.getToken(audience)
except ModuleNotFoundError:
token = DefaultAzureCredential().get_token(f"{audience}/.default").token

return token


def get_fabric_bearer_token():
audience = "https://analysis.windows.net/powerbi/api"
try:
import notebookutils

token = notebookutils.credentials.getToken(audience)
except ModuleNotFoundError:
token = DefaultAzureCredential().get_token(f"{audience}/.default").token

return token
37 changes: 37 additions & 0 deletions src/msfabricutils/core/generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import requests
from msfabricutils.core.auth import get_fabric_bearer_token


def get_paginated(endpoint: str, data_key: str) -> list[dict]:
base_url = "https://api.fabric.microsoft.com/v1"
token = get_fabric_bearer_token()
headers = {"Authorization": f"Bearer {token}"}

responses = []
continuation_token = None
while True:
params = {"continuationToken": continuation_token} if continuation_token else {}

response = requests.get(f"{base_url}/{endpoint}", headers=headers, params=params)
response.raise_for_status()
data: dict = response.json()

responses.extend(data.get(data_key))

continuation_token = data.get("continuationToken")
if not continuation_token:
break

return responses


def get_page(endpoint: str) -> list[dict]:
base_url = "https://api.fabric.microsoft.com/v1"
token = get_fabric_bearer_token()
headers = {"Authorization": f"Bearer {token}"}
params = {}

response = requests.get(f"{base_url}/{endpoint}", headers=headers, params=params)
response.raise_for_status()

return response.json()
15 changes: 15 additions & 0 deletions src/msfabricutils/core/lakehouse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from msfabricutils.core.generic import get_paginated


def get_workspace_lakehouses(workspace_id: str):
endpoint = f"workspaces/{workspace_id}/lakehouses"
data_key = "value"

return get_paginated(endpoint, data_key)


def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str):
endpoint = f"workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables"
data_key = "data"

return get_paginated(endpoint, data_key)
14 changes: 14 additions & 0 deletions src/msfabricutils/core/workspace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from msfabricutils.core.generic import get_paginated, get_page


def get_workspaces():
endpoint = "workspaces"
data_key = "value"

return get_paginated(endpoint, data_key)


def get_workspace(workspace_id: str):
endpoint = f"workspaces/{workspace_id}"

return get_page(endpoint)
36 changes: 11 additions & 25 deletions src/msfabricutils/fabric_duckdb_connection.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from typing import Any

from msfabricutils.helpers import _separator_indices
from msfabricutils.core import (
get_workspace,
get_workspace_lakehouses,
get_workspace_lakehouse_tables,
)


import duckdb
from deltalake import write_deltalake

import sqlglot
from sqlglot import exp

# Avoid import errors outside Fabric environments
try:
from sempy import fabric # noqa: F401
except ModuleNotFoundError:
pass


class FabricDuckDBConnection:
"""A DuckDB connection wrapper for Microsoft Fabric Lakehouses.
Expand Down Expand Up @@ -74,9 +74,8 @@ def __getattr__(self, name):
if name == "sql" or name == "execute":

def wrapper(*args, **kwargs):

original_method = getattr(self._connection, name)

# Modify the query/parameters here before passing to the actual method
modified_args, modified_kwargs = self._modify_input_query(args, kwargs)

Expand Down Expand Up @@ -328,14 +327,7 @@ def _create_or_replace_fabric_lakehouse_secret(self, catalog_name: str) -> None:
def _register_lakehouse_tables(
self, workspace_name: str, workspace_id: str, lakehouse_id: str, lakehouse_name: str
) -> None:
from sempy import fabric

client = fabric.FabricRestClient()

response = client.get(f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables")
response.raise_for_status()

tables = response.json()["data"]
tables = get_workspace_lakehouse_tables(workspace_id, lakehouse_id)

if not tables:
table_information = {
Expand Down Expand Up @@ -416,17 +408,11 @@ def register_workspace_lakehouses(self, workspace_id: str, lakehouses: str | lis
if isinstance(lakehouses, str):
lakehouses = [lakehouses]

from sempy import fabric

workspaces = fabric.list_workspaces()

workspace_name = workspaces[workspaces.Id == workspace_id]["Name"].iat[0]
workspace_info = get_workspace(workspace_id)

client = fabric.FabricRestClient()
workspace_name = workspace_info["displayName"]

response = client.get(f"v1/workspaces/{workspace_id}/lakehouses")
response.raise_for_status()
lakehouse_properties = response.json()["value"]
lakehouse_properties = get_workspace_lakehouses(workspace_id)

selected_lakehouses = [
lakehouse
Expand Down

0 comments on commit 999e0ad

Please sign in to comment.