From 2fe0d518c0e1c33e8ccaba834ff1364bfa76683c Mon Sep 17 00:00:00 2001 From: jsj Date: Tue, 3 Dec 2024 22:59:33 +0100 Subject: [PATCH 1/8] add docstrings --- src/msfabricutils/core/auth.py | 28 ++++++++++++++++++++++-- src/msfabricutils/core/generic.py | 34 +++++++++++++++++++++++++++++ src/msfabricutils/core/lakehouse.py | 33 ++++++++++++++++++++++++++++ src/msfabricutils/core/workspace.py | 28 ++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 2 deletions(-) diff --git a/src/msfabricutils/core/auth.py b/src/msfabricutils/core/auth.py index 5c314ad..043bf84 100644 --- a/src/msfabricutils/core/auth.py +++ b/src/msfabricutils/core/auth.py @@ -1,7 +1,19 @@ from azure.identity import DefaultAzureCredential -def get_onelake_access_token(): +def get_onelake_access_token() -> str: + """ + Retrieves an access token for Azure OneLake storage. + + This function attempts to obtain an access token for accessing Azure storage. + It first checks if the code is running in a Microsoft Fabric notebook environment + and attempts to use the `notebookutils` library to get the token. If the library + is not available, it falls back to using the `DefaultAzureCredential` from the Azure SDK + to fetch the token. + + Returns: + str: The access token used for authenticating requests to Azure OneLake storage. + """ audience = "https://storage.azure.com" try: import notebookutils @@ -13,7 +25,19 @@ def get_onelake_access_token(): return token -def get_fabric_bearer_token(): +def get_fabric_bearer_token() -> str: + """ + Retrieves a bearer token for Azure Fabric (Power BI) API. + + This function attempts to obtain a bearer token for authenticating requests to the + Azure Power BI API. It first checks if the code is running in a Microsoft Fabric + notebook environment and tries to use the `notebookutils` library to get the token. + If the library is not available, it falls back to using the `DefaultAzureCredential` + from the Azure SDK to fetch the token. + + Returns: + str: The bearer token used for authenticating requests to the Azure Fabric (Power BI) API. + """ audience = "https://analysis.windows.net/powerbi/api" try: import notebookutils diff --git a/src/msfabricutils/core/generic.py b/src/msfabricutils/core/generic.py index 40357d3..af9fe5f 100644 --- a/src/msfabricutils/core/generic.py +++ b/src/msfabricutils/core/generic.py @@ -3,6 +3,24 @@ def get_paginated(endpoint: str, data_key: str) -> list[dict]: + """ + Retrieves paginated data from the specified API endpoint. + + This function makes repeated GET requests to the specified endpoint of the + Fabric REST API, handling pagination automatically. It uses a bearer token + for authentication and retrieves data from each page, appending the results + to a list. Pagination continues until no `continuationToken` is returned. + + Args: + endpoint (str): The API endpoint to retrieve data from. + data_key (str): The key in the response JSON that contains the list of data to be returned. + + Returns: + list[dict]: A list of dictionaries containing the data from all pages. + + Raises: + requests.exceptions.RequestException: If the HTTP request fails or returns an error. + """ base_url = "https://api.fabric.microsoft.com/v1" token = get_fabric_bearer_token() headers = {"Authorization": f"Bearer {token}"} @@ -26,6 +44,22 @@ def get_paginated(endpoint: str, data_key: str) -> list[dict]: def get_page(endpoint: str) -> list[dict]: + """ + Retrieves data from a specified API endpoint. + + This function makes a GET request to the specified endpoint of the Azure Fabric API, + using a bearer token for authentication. It returns the JSON response as a list of + dictionaries containing the data returned by the API. + + Args: + endpoint (str): The API endpoint to send the GET request to. + + Returns: + list[dict]: A list of dictionaries containing the data returned from the API. + + Raises: + requests.exceptions.RequestException: If the HTTP request fails or returns an error. + """ base_url = "https://api.fabric.microsoft.com/v1" token = get_fabric_bearer_token() headers = {"Authorization": f"Bearer {token}"} diff --git a/src/msfabricutils/core/lakehouse.py b/src/msfabricutils/core/lakehouse.py index 361de4d..e347976 100644 --- a/src/msfabricutils/core/lakehouse.py +++ b/src/msfabricutils/core/lakehouse.py @@ -2,6 +2,22 @@ def get_workspace_lakehouses(workspace_id: str): + """ + Retrieves lakehouses for a specified workspace. + + This function fetches a list of lakehouses from a specified workspace using the + `get_paginated` function. It constructs the appropriate endpoint and retrieves + paginated data associated with the workspace ID. + + Args: + workspace_id (str): The ID of the workspace to retrieve lakehouses from. + + Returns: + list[dict]: A list of dictionaries containing lakehouse data for the specified workspace. + + See Also: + get_paginated: A helper function that handles paginated API requests. + """ endpoint = f"workspaces/{workspace_id}/lakehouses" data_key = "value" @@ -9,6 +25,23 @@ def get_workspace_lakehouses(workspace_id: str): def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str): + """ + Retrieves tables for a specified lakehouse within a workspace. + + This function fetches a list of tables from a specific lakehouse within a given workspace + using the `get_paginated` function. It constructs the appropriate endpoint and retrieves + paginated data associated with the workspace and lakehouse IDs. + + Args: + workspace_id (str): The ID of the workspace containing the lakehouse. + lakehouse_id (str): The ID of the lakehouse to retrieve tables from. + + Returns: + list[dict]: A list of dictionaries containing table data for the specified lakehouse. + + See Also: + get_paginated: A helper function that handles paginated API requests. + """ endpoint = f"workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables" data_key = "data" diff --git a/src/msfabricutils/core/workspace.py b/src/msfabricutils/core/workspace.py index 80e2f77..80ffd57 100644 --- a/src/msfabricutils/core/workspace.py +++ b/src/msfabricutils/core/workspace.py @@ -2,6 +2,19 @@ def get_workspaces(): + """ + Retrieves a list of workspaces. + + This function fetches a list of workspaces using the `get_paginated` function. + It constructs the appropriate endpoint and retrieves the paginated data associated + with workspaces. + + Returns: + list[dict]: A list of dictionaries containing data for the available workspaces. + + See Also: + get_paginated: A helper function that handles paginated API requests. + """ endpoint = "workspaces" data_key = "value" @@ -9,6 +22,21 @@ def get_workspaces(): def get_workspace(workspace_id: str): + """ + Retrieves details of a specified workspace. + + This function fetches the details of a specific workspace by using the `get_page` + function. It constructs the appropriate endpoint based on the provided workspace ID. + + Args: + workspace_id (str): The ID of the workspace to retrieve details for. + + Returns: + dict: A dictionary containing the details of the specified workspace. + + See Also: + get_page: A helper function that retrieves a single page of data from the API. + """ endpoint = f"workspaces/{workspace_id}" return get_page(endpoint) From 816afb44b094daa7506c9cd8be37ba52aeb8bdc9 Mon Sep 17 00:00:00 2001 From: jsj Date: Tue, 3 Dec 2024 22:59:54 +0100 Subject: [PATCH 2/8] add to top level import --- src/msfabricutils/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/msfabricutils/__init__.py b/src/msfabricutils/__init__.py index 381df40..41aa9b0 100644 --- a/src/msfabricutils/__init__.py +++ b/src/msfabricutils/__init__.py @@ -1,5 +1,19 @@ from msfabricutils.fabric_duckdb_connection import FabricDuckDBConnection +from msfabricutils.core import ( + get_fabric_bearer_token, + get_onelake_access_token, + get_workspace, + get_workspace_lakehouse_tables, + get_workspace_lakehouses, + get_workspaces, +) __all__ = ( "FabricDuckDBConnection", + "get_fabric_bearer_token", + "get_onelake_access_token", + "get_workspace", + "get_workspace_lakehouse_tables", + "get_workspace_lakehouses", + "get_workspaces", ) From 35c902a5f4f3530e44b557f460493df48a236033 Mon Sep 17 00:00:00 2001 From: jsj Date: Tue, 3 Dec 2024 23:00:17 +0100 Subject: [PATCH 3/8] update docs --- README.md | 14 ++++++++++---- docs/index.md | 9 ++++++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1f7f98a..8df6daf 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,15 @@ A collection of Spark-free Python utilities for working with Microsoft Fabric in ![How to select Python Notebook](docs/images/select-python-notebooks.png) ## Features + +### Local development first +- Aim to provide a local development "within" Fabric + ### Fabric DuckDB Connection Seamless integration between DuckDB and Microsoft Fabric Lakehouses - Cross-workspace and cross-lakehouse querying capabilities -- Automatic table registration and authentication +- Automatic table registration +- Reading and writing to Onelake outside and inside Fabric - Support for Delta Lake tables - Flexible table name referencing (1-part to 4-part names) ## Installation @@ -16,9 +21,10 @@ pip install msfabricutils ``` ## Quick Start ```python -from msfabricutils import FabricDuckDBConnection -# Initialize connection -access_token = notebookutils.credentials.getToken('storage') +from msfabricutils import FabricDuckDBConnection, get_onelake_access_token + +#Initialize connection +access_token = get_onelake_access_token() conn = FabricDuckDBConnection(access_token=access_token) # Register lakehouses from different workspaces diff --git a/docs/index.md b/docs/index.md index e0304ff..883daff 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,10 @@ A collection of **Spark-free** Python utilities for working with Microsoft Fabri ## Features -### DuckDB Connection Wrapper +### Local development first +- Aim to provide a local development support "within" Fabric + +### DuckDB Connection - Seamless integration between DuckDB and Microsoft Fabric Lakehouses - Cross-lakehouse and cross-workspace querying - Delta Lake writing features @@ -27,10 +30,10 @@ Ensure you are working in a Python Notebook: ![Select Python Notebook](images/select-python-notebooks.png) ```python -from msfabricutils import FabricDuckDBConnection +from msfabricutils import FabricDuckDBConnection, get_onelake_access_token #Initialize connection -access_token = notebookutils.credentials.getToken("storage") +access_token = get_onelake_access_token() conn = FabricDuckDBConnection(access_token=access_token) From b3e44c31bab5f28becc0a36adba394d7a6c39e6e Mon Sep 17 00:00:00 2001 From: jsj Date: Tue, 3 Dec 2024 23:03:07 +0100 Subject: [PATCH 4/8] type ignore --- src/msfabricutils/core/auth.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/msfabricutils/core/auth.py b/src/msfabricutils/core/auth.py index 043bf84..96a8cac 100644 --- a/src/msfabricutils/core/auth.py +++ b/src/msfabricutils/core/auth.py @@ -16,7 +16,7 @@ def get_onelake_access_token() -> str: """ audience = "https://storage.azure.com" try: - import notebookutils + import notebookutils # type: ignore token = notebookutils.credentials.getToken(audience) except ModuleNotFoundError: @@ -40,7 +40,7 @@ def get_fabric_bearer_token() -> str: """ audience = "https://analysis.windows.net/powerbi/api" try: - import notebookutils + import notebookutils # type: ignore token = notebookutils.credentials.getToken(audience) except ModuleNotFoundError: From 5ef6d917a0b12d89c627c71d788fd84a48e1ad9d Mon Sep 17 00:00:00 2001 From: jsj Date: Tue, 3 Dec 2024 23:03:18 +0100 Subject: [PATCH 5/8] add type hints --- src/msfabricutils/core/lakehouse.py | 4 ++-- src/msfabricutils/core/workspace.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/msfabricutils/core/lakehouse.py b/src/msfabricutils/core/lakehouse.py index e347976..defe356 100644 --- a/src/msfabricutils/core/lakehouse.py +++ b/src/msfabricutils/core/lakehouse.py @@ -1,7 +1,7 @@ from msfabricutils.core.generic import get_paginated -def get_workspace_lakehouses(workspace_id: str): +def get_workspace_lakehouses(workspace_id: str) -> list[dict]: """ Retrieves lakehouses for a specified workspace. @@ -24,7 +24,7 @@ def get_workspace_lakehouses(workspace_id: str): return get_paginated(endpoint, data_key) -def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str): +def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str) -> list[dict]: """ Retrieves tables for a specified lakehouse within a workspace. diff --git a/src/msfabricutils/core/workspace.py b/src/msfabricutils/core/workspace.py index 80ffd57..a7261ea 100644 --- a/src/msfabricutils/core/workspace.py +++ b/src/msfabricutils/core/workspace.py @@ -1,7 +1,7 @@ from msfabricutils.core.generic import get_paginated, get_page -def get_workspaces(): +def get_workspaces() -> list[dict]: """ Retrieves a list of workspaces. @@ -21,7 +21,7 @@ def get_workspaces(): return get_paginated(endpoint, data_key) -def get_workspace(workspace_id: str): +def get_workspace(workspace_id: str) -> dict: """ Retrieves details of a specified workspace. From 3f678220434d153fb0032691278fe074fd5c09de Mon Sep 17 00:00:00 2001 From: jsj Date: Tue, 3 Dec 2024 23:10:17 +0100 Subject: [PATCH 6/8] nit --- src/msfabricutils/core/lakehouse.py | 4 ++-- src/msfabricutils/core/workspace.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/msfabricutils/core/lakehouse.py b/src/msfabricutils/core/lakehouse.py index defe356..ff0304f 100644 --- a/src/msfabricutils/core/lakehouse.py +++ b/src/msfabricutils/core/lakehouse.py @@ -16,7 +16,7 @@ def get_workspace_lakehouses(workspace_id: str) -> list[dict]: list[dict]: A list of dictionaries containing lakehouse data for the specified workspace. See Also: - get_paginated: A helper function that handles paginated API requests. + `get_paginated`: A helper function that handles paginated API requests. """ endpoint = f"workspaces/{workspace_id}/lakehouses" data_key = "value" @@ -40,7 +40,7 @@ def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str) -> list list[dict]: A list of dictionaries containing table data for the specified lakehouse. See Also: - get_paginated: A helper function that handles paginated API requests. + `get_paginated`: A helper function that handles paginated API requests. """ endpoint = f"workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables" data_key = "data" diff --git a/src/msfabricutils/core/workspace.py b/src/msfabricutils/core/workspace.py index a7261ea..f2d4119 100644 --- a/src/msfabricutils/core/workspace.py +++ b/src/msfabricutils/core/workspace.py @@ -13,7 +13,7 @@ def get_workspaces() -> list[dict]: list[dict]: A list of dictionaries containing data for the available workspaces. See Also: - get_paginated: A helper function that handles paginated API requests. + `get_paginated`: A helper function that handles paginated API requests. """ endpoint = "workspaces" data_key = "value" @@ -35,7 +35,7 @@ def get_workspace(workspace_id: str) -> dict: dict: A dictionary containing the details of the specified workspace. See Also: - get_page: A helper function that retrieves a single page of data from the API. + `get_page`: A helper function that retrieves a single page of data from the API. """ endpoint = f"workspaces/{workspace_id}" From a93fe32105b4d9d08c1a07350db47b50005a1a82 Mon Sep 17 00:00:00 2001 From: jsj Date: Tue, 3 Dec 2024 23:10:40 +0100 Subject: [PATCH 7/8] add sites to nav --- mkdocs.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 7fc6043..555bed1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -49,9 +49,11 @@ nav: - Home: index.md - API Reference: - FabricDuckDBConnection: reference/msfabricutils/fabric_duckdb_connection.md - #- Helpers: - # - Overview: reference/msfabricutils/helpers/index.md - # - Separator Indices: reference/msfabricutils/helpers/separator_indices.md + - Core: + - Authentication: reference/msfabricutils/core/auth.md + - Utilities: + - Lakehouse: reference/msfabricutils/core/lakehouse.md + - Workspace: reference/msfabricutils/core/workspace.md # Formatting options markdown_extensions: From e2c1e1ec876ed9a839f52ccd160eb1248f037378 Mon Sep 17 00:00:00 2001 From: jsj Date: Tue, 3 Dec 2024 23:11:38 +0100 Subject: [PATCH 8/8] up version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 12208a2..934e8a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "msfabricutils" -version = "0.1.8" +version = "0.2.0" description = "A Python library exposes additional functionality to work with Python Notebooks in Microsoft Fabric." authors = [ { name = "Jimmy Jensen" },