Skip to content

Commit

Permalink
Merge pull request #22 from mrjsj/chore/prepare-release
Browse files Browse the repository at this point in the history
chore: updated docs, prepare for release 0.2.0
  • Loading branch information
mrjsj authored Dec 3, 2024
2 parents 999e0ad + e2c1e1e commit 963a375
Showing 9 changed files with 163 additions and 19 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -4,10 +4,15 @@ A collection of Spark-free Python utilities for working with Microsoft Fabric in
![How to select Python Notebook](docs/images/select-python-notebooks.png)

## Features

### Local development first
- Aim to provide a local development "within" Fabric

### Fabric DuckDB Connection
Seamless integration between DuckDB and Microsoft Fabric Lakehouses
- Cross-workspace and cross-lakehouse querying capabilities
- Automatic table registration and authentication
- Automatic table registration
- Reading and writing to Onelake outside and inside Fabric
- Support for Delta Lake tables
- Flexible table name referencing (1-part to 4-part names)
## Installation
@@ -16,9 +21,10 @@ pip install msfabricutils
```
## Quick Start
```python
from msfabricutils import FabricDuckDBConnection
# Initialize connection
access_token = notebookutils.credentials.getToken('storage')
from msfabricutils import FabricDuckDBConnection, get_onelake_access_token

#Initialize connection
access_token = get_onelake_access_token()
conn = FabricDuckDBConnection(access_token=access_token)

# Register lakehouses from different workspaces
9 changes: 6 additions & 3 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -4,7 +4,10 @@ A collection of **Spark-free** Python utilities for working with Microsoft Fabri

## Features

### DuckDB Connection Wrapper
### Local development first
- Aim to provide a local development support "within" Fabric

### DuckDB Connection
- Seamless integration between DuckDB and Microsoft Fabric Lakehouses
- Cross-lakehouse and cross-workspace querying
- Delta Lake writing features
@@ -27,10 +30,10 @@ Ensure you are working in a Python Notebook:
![Select Python Notebook](images/select-python-notebooks.png)

```python
from msfabricutils import FabricDuckDBConnection
from msfabricutils import FabricDuckDBConnection, get_onelake_access_token

#Initialize connection
access_token = notebookutils.credentials.getToken("storage")
access_token = get_onelake_access_token()

conn = FabricDuckDBConnection(access_token=access_token)

8 changes: 5 additions & 3 deletions mkdocs.yml
Original file line number Diff line number Diff line change
@@ -49,9 +49,11 @@ nav:
- Home: index.md
- API Reference:
- FabricDuckDBConnection: reference/msfabricutils/fabric_duckdb_connection.md
#- Helpers:
# - Overview: reference/msfabricutils/helpers/index.md
# - Separator Indices: reference/msfabricutils/helpers/separator_indices.md
- Core:
- Authentication: reference/msfabricutils/core/auth.md
- Utilities:
- Lakehouse: reference/msfabricutils/core/lakehouse.md
- Workspace: reference/msfabricutils/core/workspace.md

# Formatting options
markdown_extensions:
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "msfabricutils"
version = "0.1.8"
version = "0.2.0"
description = "A Python library exposes additional functionality to work with Python Notebooks in Microsoft Fabric."
authors = [
{ name = "Jimmy Jensen" },
14 changes: 14 additions & 0 deletions src/msfabricutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
from msfabricutils.fabric_duckdb_connection import FabricDuckDBConnection
from msfabricutils.core import (
get_fabric_bearer_token,
get_onelake_access_token,
get_workspace,
get_workspace_lakehouse_tables,
get_workspace_lakehouses,
get_workspaces,
)

__all__ = (
"FabricDuckDBConnection",
"get_fabric_bearer_token",
"get_onelake_access_token",
"get_workspace",
"get_workspace_lakehouse_tables",
"get_workspace_lakehouses",
"get_workspaces",
)
32 changes: 28 additions & 4 deletions src/msfabricutils/core/auth.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
from azure.identity import DefaultAzureCredential


def get_onelake_access_token():
def get_onelake_access_token() -> str:
"""
Retrieves an access token for Azure OneLake storage.
This function attempts to obtain an access token for accessing Azure storage.
It first checks if the code is running in a Microsoft Fabric notebook environment
and attempts to use the `notebookutils` library to get the token. If the library
is not available, it falls back to using the `DefaultAzureCredential` from the Azure SDK
to fetch the token.
Returns:
str: The access token used for authenticating requests to Azure OneLake storage.
"""
audience = "https://storage.azure.com"
try:
import notebookutils
import notebookutils # type: ignore

token = notebookutils.credentials.getToken(audience)
except ModuleNotFoundError:
@@ -13,10 +25,22 @@ def get_onelake_access_token():
return token


def get_fabric_bearer_token():
def get_fabric_bearer_token() -> str:
"""
Retrieves a bearer token for Azure Fabric (Power BI) API.
This function attempts to obtain a bearer token for authenticating requests to the
Azure Power BI API. It first checks if the code is running in a Microsoft Fabric
notebook environment and tries to use the `notebookutils` library to get the token.
If the library is not available, it falls back to using the `DefaultAzureCredential`
from the Azure SDK to fetch the token.
Returns:
str: The bearer token used for authenticating requests to the Azure Fabric (Power BI) API.
"""
audience = "https://analysis.windows.net/powerbi/api"
try:
import notebookutils
import notebookutils # type: ignore

token = notebookutils.credentials.getToken(audience)
except ModuleNotFoundError:
34 changes: 34 additions & 0 deletions src/msfabricutils/core/generic.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,24 @@


def get_paginated(endpoint: str, data_key: str) -> list[dict]:
"""
Retrieves paginated data from the specified API endpoint.
This function makes repeated GET requests to the specified endpoint of the
Fabric REST API, handling pagination automatically. It uses a bearer token
for authentication and retrieves data from each page, appending the results
to a list. Pagination continues until no `continuationToken` is returned.
Args:
endpoint (str): The API endpoint to retrieve data from.
data_key (str): The key in the response JSON that contains the list of data to be returned.
Returns:
list[dict]: A list of dictionaries containing the data from all pages.
Raises:
requests.exceptions.RequestException: If the HTTP request fails or returns an error.
"""
base_url = "https://api.fabric.microsoft.com/v1"
token = get_fabric_bearer_token()
headers = {"Authorization": f"Bearer {token}"}
@@ -26,6 +44,22 @@ def get_paginated(endpoint: str, data_key: str) -> list[dict]:


def get_page(endpoint: str) -> list[dict]:
"""
Retrieves data from a specified API endpoint.
This function makes a GET request to the specified endpoint of the Azure Fabric API,
using a bearer token for authentication. It returns the JSON response as a list of
dictionaries containing the data returned by the API.
Args:
endpoint (str): The API endpoint to send the GET request to.
Returns:
list[dict]: A list of dictionaries containing the data returned from the API.
Raises:
requests.exceptions.RequestException: If the HTTP request fails or returns an error.
"""
base_url = "https://api.fabric.microsoft.com/v1"
token = get_fabric_bearer_token()
headers = {"Authorization": f"Bearer {token}"}
37 changes: 35 additions & 2 deletions src/msfabricutils/core/lakehouse.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,47 @@
from msfabricutils.core.generic import get_paginated


def get_workspace_lakehouses(workspace_id: str):
def get_workspace_lakehouses(workspace_id: str) -> list[dict]:
"""
Retrieves lakehouses for a specified workspace.
This function fetches a list of lakehouses from a specified workspace using the
`get_paginated` function. It constructs the appropriate endpoint and retrieves
paginated data associated with the workspace ID.
Args:
workspace_id (str): The ID of the workspace to retrieve lakehouses from.
Returns:
list[dict]: A list of dictionaries containing lakehouse data for the specified workspace.
See Also:
`get_paginated`: A helper function that handles paginated API requests.
"""
endpoint = f"workspaces/{workspace_id}/lakehouses"
data_key = "value"

return get_paginated(endpoint, data_key)


def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str):
def get_workspace_lakehouse_tables(workspace_id: str, lakehouse_id: str) -> list[dict]:
"""
Retrieves tables for a specified lakehouse within a workspace.
This function fetches a list of tables from a specific lakehouse within a given workspace
using the `get_paginated` function. It constructs the appropriate endpoint and retrieves
paginated data associated with the workspace and lakehouse IDs.
Args:
workspace_id (str): The ID of the workspace containing the lakehouse.
lakehouse_id (str): The ID of the lakehouse to retrieve tables from.
Returns:
list[dict]: A list of dictionaries containing table data for the specified lakehouse.
See Also:
`get_paginated`: A helper function that handles paginated API requests.
"""
endpoint = f"workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables"
data_key = "data"

32 changes: 30 additions & 2 deletions src/msfabricutils/core/workspace.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,42 @@
from msfabricutils.core.generic import get_paginated, get_page


def get_workspaces():
def get_workspaces() -> list[dict]:
"""
Retrieves a list of workspaces.
This function fetches a list of workspaces using the `get_paginated` function.
It constructs the appropriate endpoint and retrieves the paginated data associated
with workspaces.
Returns:
list[dict]: A list of dictionaries containing data for the available workspaces.
See Also:
`get_paginated`: A helper function that handles paginated API requests.
"""
endpoint = "workspaces"
data_key = "value"

return get_paginated(endpoint, data_key)


def get_workspace(workspace_id: str):
def get_workspace(workspace_id: str) -> dict:
"""
Retrieves details of a specified workspace.
This function fetches the details of a specific workspace by using the `get_page`
function. It constructs the appropriate endpoint based on the provided workspace ID.
Args:
workspace_id (str): The ID of the workspace to retrieve details for.
Returns:
dict: A dictionary containing the details of the specified workspace.
See Also:
`get_page`: A helper function that retrieves a single page of data from the API.
"""
endpoint = f"workspaces/{workspace_id}"

return get_page(endpoint)

0 comments on commit 963a375

Please sign in to comment.