diff --git a/README.md b/README.md index 1f7f98a..8df6daf 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,15 @@ A collection of Spark-free Python utilities for working with Microsoft Fabric in ![How to select Python Notebook](docs/images/select-python-notebooks.png) ## Features + +### Local development first +- Aim to provide a local development "within" Fabric + ### Fabric DuckDB Connection Seamless integration between DuckDB and Microsoft Fabric Lakehouses - Cross-workspace and cross-lakehouse querying capabilities -- Automatic table registration and authentication +- Automatic table registration +- Reading and writing to Onelake outside and inside Fabric - Support for Delta Lake tables - Flexible table name referencing (1-part to 4-part names) ## Installation @@ -16,9 +21,10 @@ pip install msfabricutils ``` ## Quick Start ```python -from msfabricutils import FabricDuckDBConnection -# Initialize connection -access_token = notebookutils.credentials.getToken('storage') +from msfabricutils import FabricDuckDBConnection, get_onelake_access_token + +#Initialize connection +access_token = get_onelake_access_token() conn = FabricDuckDBConnection(access_token=access_token) # Register lakehouses from different workspaces diff --git a/docs/index.md b/docs/index.md index e0304ff..883daff 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,10 @@ A collection of **Spark-free** Python utilities for working with Microsoft Fabri ## Features -### DuckDB Connection Wrapper +### Local development first +- Aim to provide a local development support "within" Fabric + +### DuckDB Connection - Seamless integration between DuckDB and Microsoft Fabric Lakehouses - Cross-lakehouse and cross-workspace querying - Delta Lake writing features @@ -27,10 +30,10 @@ Ensure you are working in a Python Notebook: ![Select Python Notebook](images/select-python-notebooks.png) ```python -from msfabricutils import FabricDuckDBConnection +from msfabricutils import FabricDuckDBConnection, get_onelake_access_token #Initialize connection -access_token = notebookutils.credentials.getToken("storage") +access_token = get_onelake_access_token() conn = FabricDuckDBConnection(access_token=access_token) diff --git a/mkdocs.yml b/mkdocs.yml index 7fc6043..555bed1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -49,9 +49,11 @@ nav: - Home: index.md - API Reference: - FabricDuckDBConnection: reference/msfabricutils/fabric_duckdb_connection.md - #- Helpers: - # - Overview: reference/msfabricutils/helpers/index.md - # - Separator Indices: reference/msfabricutils/helpers/separator_indices.md + - Core: + - Authentication: reference/msfabricutils/core/auth.md + - Utilities: + - Lakehouse: reference/msfabricutils/core/lakehouse.md + - Workspace: reference/msfabricutils/core/workspace.md # Formatting options markdown_extensions: diff --git a/pyproject.toml b/pyproject.toml index 12208a2..934e8a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "msfabricutils" -version = "0.1.8" +version = "0.2.0" description = "A Python library exposes additional functionality to work with Python Notebooks in Microsoft Fabric." authors = [ { name = "Jimmy Jensen" }, diff --git a/src/msfabricutils/__init__.py b/src/msfabricutils/__init__.py index 381df40..41aa9b0 100644 --- a/src/msfabricutils/__init__.py +++ b/src/msfabricutils/__init__.py @@ -1,5 +1,19 @@ from msfabricutils.fabric_duckdb_connection import FabricDuckDBConnection +from msfabricutils.core import ( + get_fabric_bearer_token, + get_onelake_access_token, + get_workspace, + get_workspace_lakehouse_tables, + get_workspace_lakehouses, + get_workspaces, +) __all__ = ( "FabricDuckDBConnection", + "get_fabric_bearer_token", + "get_onelake_access_token", + "get_workspace", + "get_workspace_lakehouse_tables", + "get_workspace_lakehouses", + "get_workspaces", ) diff --git a/src/msfabricutils/core/auth.py b/src/msfabricutils/core/auth.py index 5c314ad..96a8cac 100644 --- a/src/msfabricutils/core/auth.py +++ b/src/msfabricutils/core/auth.py @@ -1,10 +1,22 @@ from azure.identity import DefaultAzureCredential -def get_onelake_access_token(): +def get_onelake_access_token() -> str: + """ + Retrieves an access token for Azure OneLake storage. + + This function attempts to obtain an access token for accessing Azure storage. + It first checks if the code is running in a Microsoft Fabric notebook environment + and attempts to use the `notebookutils` library to get the token. If the library + is not available, it falls back to using the `DefaultAzureCredential` from the Azure SDK + to fetch the token. + + Returns: + str: The access token used for authenticating requests to Azure OneLake storage. + """ audience = "https://storage.azure.com" try: - import notebookutils + import notebookutils # type: ignore token = notebookutils.credentials.getToken(audience) except ModuleNotFoundError: @@ -13,10 +25,22 @@ def get_onelake_access_token(): return token -def get_fabric_bearer_token(): +def get_fabric_bearer_token() -> str: + """ + Retrieves a bearer token for Azure Fabric (Power BI) API. + + This function attempts to obtain a bearer token for authenticating requests to the + Azure Power BI API. It first checks if the code is running in a Microsoft Fabric + notebook environment and tries to use the `notebookutils` library to get the token. + If the library is not available, it falls back to using the `DefaultAzureCredential` + from the Azure SDK to fetch the token. + + Returns: + str: The bearer token used for authenticating requests to the Azure Fabric (Power BI) API. + """ audience = "https://analysis.windows.net/powerbi/api" try: - import notebookutils + import notebookutils # type: ignore token = notebookutils.credentials.getToken(audience) except ModuleNotFoundError: diff --git a/src/msfabricutils/core/generic.py b/src/msfabricutils/core/generic.py index 40357d3..af9fe5f 100644 --- a/src/msfabricutils/core/generic.py +++ b/src/msfabricutils/core/generic.py @@ -3,6 +3,24 @@ def get_paginated(endpoint: str, data_key: str) -> list[dict]: + """ + Retrieves paginated data from the specified API endpoint. + + This function makes repeated GET requests to the specified endpoint of the + Fabric REST API, handling pagination automatically. It uses a bearer token + for authentication and retrieves data from each page, appending the results + to a list. Pagination continues until no `continuationToken` is returned. + + Args: + endpoint (str): The API endpoint to retrieve data from. + data_key (str): The key in the response JSON that contains the list of data to be returned. + + Returns: + list[dict]: A list of dictionaries containing the data from all pages. + + Raises: + requests.exceptions.RequestException: If the HTTP request fails or returns an error. + """ base_url = "https://api.fabric.microsoft.com/v1" token = get_fabric_bearer_token() headers = {"Authorization": f"Bearer {token}"} @@ -26,6 +44,22 @@ def get_paginated(endpoint: str, data_key: str) -> list[dict]: def get_page(endpoint: str) -> list[dict]: + """ + Retrieves data from a specified API endpoint. + + This function makes a GET request to the specified endpoint of the Azure Fabric API, + using a bearer token for authentication. It returns the JSON response as a list of + dictionaries containing the data returned by the API. + + Args: + endpoint (str): The API endpoint to send the GET request to. + + Returns: + list[dict]: A list of dictionaries containing the data returned from the API. + + Raises: + requests.exceptions.RequestException: If the HTTP request fails or returns an error. + """ base_url = "https://api.fabric.microsoft.com/v1" token = get_fabric_bearer_token() headers = {"Authorization": f"Bearer {token}"} diff --git a/src/msfabricutils/core/lakehouse.py b/src/msfabricutils/core/lakehouse.py index 361de4d..ff0304f 100644 --- a/src/msfabricutils/core/lakehouse.py +++ b/src/msfabricutils/core/lakehouse.py @@ -1,14 +1,47 @@ from msfabricutils.core.generic import get_paginated -def get_workspace_lakehouses(workspace_id: str): +def get_workspace_lakehouses(workspace_id: str) -> list[dict]: + """ + Retrieves lakehouses for a specified workspace. + + This function fetches a list of lakehouses from a specified workspace using the + `get_paginated` function. It constructs the appropriate endpoint and retrieves + paginated data associated with the workspace ID. + + Args: + workspace_id (str): The ID of the workspace to retrieve lakehouses from. + + Returns: + list[dict]: A list of dictionaries containing lakehouse data for the specified workspace. + + See Also: + `get_paginated`: A helper function that handles paginated API requests. + """ endpoint = f"workspaces/{workspace_id}/lakehouses" data_key = "value" return get_paginated(endpoint, data_key) -def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str): +def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str) -> list[dict]: + """ + Retrieves tables for a specified lakehouse within a workspace. + + This function fetches a list of tables from a specific lakehouse within a given workspace + using the `get_paginated` function. It constructs the appropriate endpoint and retrieves + paginated data associated with the workspace and lakehouse IDs. + + Args: + workspace_id (str): The ID of the workspace containing the lakehouse. + lakehouse_id (str): The ID of the lakehouse to retrieve tables from. + + Returns: + list[dict]: A list of dictionaries containing table data for the specified lakehouse. + + See Also: + `get_paginated`: A helper function that handles paginated API requests. + """ endpoint = f"workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables" data_key = "data" diff --git a/src/msfabricutils/core/workspace.py b/src/msfabricutils/core/workspace.py index 80e2f77..f2d4119 100644 --- a/src/msfabricutils/core/workspace.py +++ b/src/msfabricutils/core/workspace.py @@ -1,14 +1,42 @@ from msfabricutils.core.generic import get_paginated, get_page -def get_workspaces(): +def get_workspaces() -> list[dict]: + """ + Retrieves a list of workspaces. + + This function fetches a list of workspaces using the `get_paginated` function. + It constructs the appropriate endpoint and retrieves the paginated data associated + with workspaces. + + Returns: + list[dict]: A list of dictionaries containing data for the available workspaces. + + See Also: + `get_paginated`: A helper function that handles paginated API requests. + """ endpoint = "workspaces" data_key = "value" return get_paginated(endpoint, data_key) -def get_workspace(workspace_id: str): +def get_workspace(workspace_id: str) -> dict: + """ + Retrieves details of a specified workspace. + + This function fetches the details of a specific workspace by using the `get_page` + function. It constructs the appropriate endpoint based on the provided workspace ID. + + Args: + workspace_id (str): The ID of the workspace to retrieve details for. + + Returns: + dict: A dictionary containing the details of the specified workspace. + + See Also: + `get_page`: A helper function that retrieves a single page of data from the API. + """ endpoint = f"workspaces/{workspace_id}" return get_page(endpoint)