This repository has been archived by the owner on Mar 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 736
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add the initial version of Couchbase documents reader * Update the Readme * Add doc strings * Update the examples link --------- Co-authored-by: Nithish Raghunandanan <[email protected]>
- Loading branch information
Showing
5 changed files
with
180 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Couchbase Loader | ||
|
||
This loader loads documents from Couchbase cluster. | ||
The user specifies a Couchbase client or credentials to initialize the reader. They can specify the SQL++ query to | ||
fetch the relevant docs. | ||
|
||
## Usage | ||
|
||
Here's an example usage of the CouchbaseReader. | ||
|
||
```python | ||
from llama_index import download_loader | ||
import os | ||
|
||
CouchbaseLoader = download_loader('CouchbaseReader') | ||
|
||
connection_string = "couchbase://localhost" # valid Couchbase connection string | ||
db_username = "<valid_database_user_with_read_access_to_bucket_with_data>" | ||
db_password = "<password_for_database_user>" | ||
|
||
# query is a valid SQL++ query that is passed to client.query() | ||
query = """ | ||
SELECT h.* FROM `travel-sample`.inventory.hotel h | ||
WHERE h.country = 'United States' | ||
LIMIT 5 | ||
""" | ||
|
||
reader = CouchbaseLoader( | ||
connection_string=connection_string, | ||
db_username=db_username, | ||
db_password=db_password | ||
) | ||
|
||
# It is also possible to pass an initialized Couchbase client to the document loader | ||
# from couchbase.auth import PasswordAuthenticator # noqa: E402 | ||
# from couchbase.cluster import Cluster # noqa: E402 | ||
# from couchbase.options import ClusterOptions # noqa: E402 | ||
|
||
# auth = PasswordAuthenticator( | ||
# db_username, | ||
# db_password, | ||
# ) | ||
|
||
# couchbase_client = Cluster(connection_string, ClusterOptions(auth)) | ||
# reader = CouchbaseLoader(client=couchbase_client) | ||
|
||
# fields to be written to the document | ||
text_fields=["name", "title", "address", "reviews"] | ||
|
||
# metadata fields to be written to the document's metadata | ||
metadata_fields=["country", "city"], | ||
|
||
documents = reader.load_data(query=query, text_fields=text_fields, metadata_fields=metadata_fields) | ||
``` | ||
|
||
This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/run-llama/llama_index/tree/main/llama_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/run-llama/llama-hub/tree/main) for examples. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
"""Init file.""" | ||
from llama_hub.couchbase.base import ( | ||
CouchbaseReader, | ||
) | ||
|
||
__all__ = ["CouchbaseReader"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
"""Couchbase document loader""" | ||
|
||
from typing import Any, Iterable, List, Optional | ||
from llama_index.readers.base import BaseReader | ||
from llama_index.readers.schema.base import Document | ||
|
||
|
||
class CouchbaseReader(BaseReader): | ||
"""Couchbase document loader. | ||
Loads data from a Couchbase cluster into Document used by LlamaIndex. | ||
Args: | ||
client(Optional[Any]): A Couchbase client to use. | ||
If not provided, the client will be created based on the connection_string | ||
and database credentials. | ||
connection_string (Optional[str]): The connection string to the Couchbase cluster. | ||
db_username (Optional[str]): The username to connect to the Couchbase cluster. | ||
db_password (Optional[str]): The password to connect to the Couchbase cluster. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
client: Optional[Any] = None, | ||
connection_string: Optional[str] = None, | ||
db_username: Optional[str] = None, | ||
db_password: Optional[str] = None, | ||
) -> None: | ||
"""Initialize Couchbase document loader.""" | ||
import_err_msg = "`couchbase` package not found, please run `pip install --upgrade couchbase`" | ||
try: | ||
from couchbase.auth import PasswordAuthenticator | ||
from couchbase.cluster import Cluster | ||
from couchbase.options import ClusterOptions | ||
except ImportError: | ||
raise ImportError(import_err_msg) | ||
|
||
if not client: | ||
if not connection_string or not db_username or not db_password: | ||
raise ValueError( | ||
"You need to pass either a couchbase client or connection_string and credentials must be provided." | ||
) | ||
else: | ||
auth = PasswordAuthenticator( | ||
db_username, | ||
db_password, | ||
) | ||
|
||
self._client: Cluster = Cluster(connection_string, ClusterOptions(auth)) | ||
else: | ||
self._client = client | ||
|
||
def lazy_load_data( | ||
self, | ||
query: str, | ||
text_fields: Optional[List[str]] = None, | ||
metadata_fields: Optional[List[str]] = [], | ||
) -> Iterable[Document]: | ||
"""Load data from the Couchbase cluster lazily. | ||
Args: | ||
query (str): The SQL++ query to execute. | ||
text_fields (Optional[List[str]]): The columns to write into the | ||
`text` field of the document. By default, all columns are | ||
written. | ||
metadata_fields (Optional[List[str]]): The columns to write into the | ||
`metadata` field of the document. By default, no columns are written. | ||
""" | ||
from datetime import timedelta | ||
|
||
if not query: | ||
raise ValueError("Query must be provided.") | ||
|
||
# Ensure connection to Couchbase cluster | ||
self._client.wait_until_ready(timedelta(seconds=5)) | ||
|
||
# Run SQL++ Query | ||
result = self._client.query(query) | ||
for row in result: | ||
if not text_fields: | ||
text_fields = list(row.keys()) | ||
|
||
metadata = {field: row[field] for field in metadata_fields} | ||
|
||
document = "\n".join( | ||
f"{k}: {v}" for k, v in row.items() if k in text_fields | ||
) | ||
|
||
yield (Document(text=document, metadata=metadata)) | ||
|
||
def load_data( | ||
self, | ||
query: str, | ||
text_fields: Optional[List[str]] = None, | ||
metadata_fields: Optional[List[str]] = None, | ||
) -> List[Document]: | ||
"""Load data from the Couchbase cluster. | ||
Args: | ||
query (str): The SQL++ query to execute. | ||
text_fields (Optional[List[str]]): The columns to write into the | ||
`text` field of the document. By default, all columns are | ||
written. | ||
metadata_fields (Optional[List[str]]): The columns to write into the | ||
`metadata` field of the document. By default, no columns are written. | ||
""" | ||
return list(self.lazy_load_data(query, text_fields, metadata_fields)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
couchbase |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters