Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Add Couchbase Reader #892

Merged
merged 5 commits into from
Jan 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions llama_hub/couchbase/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Couchbase Loader

This loader loads documents from Couchbase cluster.
The user specifies a Couchbase client or credentials to initialize the reader. They can specify the SQL++ query to
fetch the relevant docs.

## Usage

Here's an example usage of the CouchbaseReader.

```python
from llama_index import download_loader
import os

CouchbaseLoader = download_loader('CouchbaseReader')

connection_string = "couchbase://localhost" # valid Couchbase connection string
db_username = "<valid_database_user_with_read_access_to_bucket_with_data>"
db_password = "<password_for_database_user>"

# query is a valid SQL++ query that is passed to client.query()
query = """
SELECT h.* FROM `travel-sample`.inventory.hotel h
WHERE h.country = 'United States'
LIMIT 5
"""

reader = CouchbaseLoader(
connection_string=connection_string,
db_username=db_username,
db_password=db_password
)

# It is also possible to pass an initialized Couchbase client to the document loader
# from couchbase.auth import PasswordAuthenticator # noqa: E402
# from couchbase.cluster import Cluster # noqa: E402
# from couchbase.options import ClusterOptions # noqa: E402

# auth = PasswordAuthenticator(
# db_username,
# db_password,
# )

# couchbase_client = Cluster(connection_string, ClusterOptions(auth))
# reader = CouchbaseLoader(client=couchbase_client)

# fields to be written to the document
text_fields=["name", "title", "address", "reviews"]

# metadata fields to be written to the document's metadata
metadata_fields=["country", "city"],

documents = reader.load_data(query=query, text_fields=text_fields, metadata_fields=metadata_fields)
```

This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/run-llama/llama_index/tree/main/llama_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/run-llama/llama-hub/tree/main) for examples.
6 changes: 6 additions & 0 deletions llama_hub/couchbase/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Init file."""
from llama_hub.couchbase.base import (
CouchbaseReader,
)

__all__ = ["CouchbaseReader"]
107 changes: 107 additions & 0 deletions llama_hub/couchbase/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""Couchbase document loader"""

from typing import Any, Iterable, List, Optional
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document


class CouchbaseReader(BaseReader):
"""Couchbase document loader.

Loads data from a Couchbase cluster into Document used by LlamaIndex.

Args:
client(Optional[Any]): A Couchbase client to use.
If not provided, the client will be created based on the connection_string
and database credentials.
connection_string (Optional[str]): The connection string to the Couchbase cluster.
db_username (Optional[str]): The username to connect to the Couchbase cluster.
db_password (Optional[str]): The password to connect to the Couchbase cluster.
"""

def __init__(
self,
client: Optional[Any] = None,
connection_string: Optional[str] = None,
db_username: Optional[str] = None,
db_password: Optional[str] = None,
) -> None:
"""Initialize Couchbase document loader."""
import_err_msg = "`couchbase` package not found, please run `pip install --upgrade couchbase`"
try:
from couchbase.auth import PasswordAuthenticator
from couchbase.cluster import Cluster
from couchbase.options import ClusterOptions
except ImportError:
raise ImportError(import_err_msg)

if not client:
if not connection_string or not db_username or not db_password:
raise ValueError(
"You need to pass either a couchbase client or connection_string and credentials must be provided."
)
else:
auth = PasswordAuthenticator(
db_username,
db_password,
)

self._client: Cluster = Cluster(connection_string, ClusterOptions(auth))
else:
self._client = client

def lazy_load_data(
self,
query: str,
text_fields: Optional[List[str]] = None,
metadata_fields: Optional[List[str]] = [],
) -> Iterable[Document]:
"""Load data from the Couchbase cluster lazily.

Args:
query (str): The SQL++ query to execute.
text_fields (Optional[List[str]]): The columns to write into the
`text` field of the document. By default, all columns are
written.
metadata_fields (Optional[List[str]]): The columns to write into the
`metadata` field of the document. By default, no columns are written.
"""
from datetime import timedelta

if not query:
raise ValueError("Query must be provided.")

# Ensure connection to Couchbase cluster
self._client.wait_until_ready(timedelta(seconds=5))

# Run SQL++ Query
result = self._client.query(query)
for row in result:
if not text_fields:
text_fields = list(row.keys())

metadata = {field: row[field] for field in metadata_fields}

document = "\n".join(
f"{k}: {v}" for k, v in row.items() if k in text_fields
)

yield (Document(text=document, metadata=metadata))

def load_data(
self,
query: str,
text_fields: Optional[List[str]] = None,
metadata_fields: Optional[List[str]] = None,
) -> List[Document]:
"""Load data from the Couchbase cluster.

Args:
query (str): The SQL++ query to execute.
text_fields (Optional[List[str]]): The columns to write into the
`text` field of the document. By default, all columns are
written.
metadata_fields (Optional[List[str]]): The columns to write into the
`metadata` field of the document. By default, no columns are written.
"""
return list(self.lazy_load_data(query, text_fields, metadata_fields))
1 change: 1 addition & 0 deletions llama_hub/couchbase/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
couchbase
11 changes: 10 additions & 1 deletion llama_hub/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -1223,5 +1223,14 @@
"XMLReader": {
"id": "file/xml",
"author": "mmaatouk"
},
"CouchbaseReader": {
"id": "couchbase",
"author": "nithishr",
"keywords": [
"Couchbase",
"Capella",
"NoSQL"
]
}
}
}
Loading