initial commit

RostislavPorohnya · Jun 26, 2024 · 787c38a · 787c38a
1 parent 29d9833
commit 787c38a
Show file tree

Hide file tree

Showing 64 changed files with 11,880 additions and 9 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/README.md b/README.md
@@ -1,17 +1,39 @@
-## My Project
+# RAG with ACLs on Amazon FSx for NetApp ONTAP data using Amazon Bedrock and Langchain
+## Overview
+This reference architecture implemets the a RAG Chat engine on top of Amazon FSx for ONTAP (FSxN) data that has predefined access permission using ACLs with Active Directory.
 
-TODO: Fill this README out!
 
-Be sure to:
+## Infrastructure Allocation
+This will create all the required AWS and application resources to on your AWS account using Terraform. You can read about it [here](/terraform/).
 
-* Change the title in this README
-* Edit your repository description on GitHub
+![general architecture](/images/architecture.png)
+## Embedding Engine
+This process takes the documents stored on the FSxN filesystem and embeds them together with the Access Control List into the OpenSearch Vector DB. You can read about the process in more details [here](/embed/).
 
-## Security
+![embedding](/images/embedding.png)
 
-See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
+## Retrieval Engine
+The retrieval engine works as a Lambda function that get a promp and model parameter for retrieval and uses RAG to get answers based on the embedding on the FSxN data. It also filters data access based on SID (ACL) provided. You can read more about the process in more details [here](/lambda/)  
 
-## License
+## Chatbot
+The chat bot is a simple chat UI that simplifies access to the retrival engine. You can read more about the chat bot in more details [here](/chatapp/)
 
-This library is licensed under the MIT-0 License. See the LICENSE file.
+## Test
+### Prerequisites
+* [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) installed on host
+    * AWS [Credentials configured](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-authentication.html)
+* [Terraform](https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli) installed on host
+* [Docker engine](https://docs.docker.com/engine/install/) installed on host
 
+### Start the environment 
+Use the following to start the environment:
+```
+terraform init
+terraform apply --auto-approve
+```
+
+### Clear the environment
+Use the following to clear the environment:
+```
+terraform destroy --auto-approve
+```
diff --git a/chatapp/.streamlit/config.toml b/chatapp/.streamlit/config.toml
@@ -0,0 +1,3 @@
+[theme]
+base="dark"
+primaryColor="#FF9900"
diff --git a/chatapp/Dockerfile b/chatapp/Dockerfile
@@ -0,0 +1,20 @@
+FROM python:3.9-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    software-properties-common \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY ./ ./
+
+RUN pip3 install -r requirements.txt
+
+EXPOSE 8501
+
+HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+
+ENTRYPOINT ["streamlit", "run", "chatapi.py", "--server.port=8501", "--server.address=0.0.0.0"]
diff --git a/chatapp/chatapi.py b/chatapp/chatapi.py
@@ -0,0 +1,194 @@
+import json
+import time
+import streamlit as st
+import random
+import requests
+import os
+
+from config import config
+from typing import Tuple, Dict
+
+INIT_MESSAGE = {"role": "assistant",
+                "content": "Hi! I'm Claude on Bedrock. I can help you with quries on your FSxN data. \n What would you like to know?",
+                "documents": []}
+
+def new_chat() -> None:
+    st.session_state["sessionId"] = str(random.randint(1, 1000000))
+    st.session_state["messages"] = [INIT_MESSAGE]
+    st.session_state["langchain_messages"] = []
+
+def set_page_config() -> None:
+    st.set_page_config(page_title="🤖 Chat with your FSxN data", layout="wide")
+    st.title("🤖 Chat with your FSxN data")
+
+def render_sidebar() -> Tuple[Dict, int, str]:
+    with st.sidebar:           
+        # st.markdown("## Inference Parameters")
+        model_name_select = st.selectbox(
+            'Model',
+            list(config["models"].keys()),
+            key=f"{st.session_state['sessionId']}_Model_Id",
+        )
+
+        st.session_state["model_name"] = model_name_select
+
+        model_config = config["models"][model_name_select]
+
+        metadata = st.text_input(
+                    'User (SID) filter search',
+                    key=f"{st.session_state['sessionId']}_Metadata",
+                )  
+        with st.container():
+            col1, col2 = st.columns(2)
+            with col1:   
+                temperature = st.slider(
+                    "Temperature",
+                    min_value=0.0,
+                    max_value=1.0,
+                    value=model_config.get("temperature", 1.0),
+                    step=0.1,
+                    key=f"{st.session_state['sessionId']}_Temperature",
+                )   
+            with col2:  
+                max_tokens = st.slider(
+                    "Max Token",
+                    min_value=0,
+                    max_value=4096,
+                    value=model_config.get("max_tokens", 4096),
+                    step=8,
+                    key=f"{st.session_state['sessionId']}_Max_Token",
+                )
+        with st.container():
+            col1, col2 = st.columns(2)
+            with col1:
+                top_p = st.slider(
+                    "Top-P",
+                    min_value=0.0,
+                    max_value=1.0,
+                    value=model_config.get("top_p", 1.0),
+                    step=0.01,
+                    key=f"{st.session_state['sessionId']}_Top-P",
+                )
+            with col2:
+                top_k = st.slider(
+                    "Top-K",
+                    min_value=1,
+                    max_value=model_config.get("max_top_k", 500),
+                    value=model_config.get("top_k", 500),
+                    step=5,
+                    key=f"{st.session_state['sessionId']}_Top-K",
+                )
+        with st.container():
+            col1, col2 = st.columns(2)
+            with col1:
+                memory_window = st.slider(
+                    "Memory Window",
+                    min_value=0,
+                    max_value=10,
+                    value=model_config.get("memory_window", 10),
+                    step=1,
+                    key=f"{st.session_state['sessionId']}_Memory_Window",
+                )
+        with st.container():
+            with st.expander("Chat URL"):
+                form = st.form("chat_form")
+                url = form.text_input("Chat Url",os.environ['CHAT_URL'])
+                form.form_submit_button("Submit")
+                if not url:
+                    st.error("Please enter a valid URL")
+
+    st.sidebar.button("New Chat", on_click=new_chat, type="primary")
+
+    model_kwargs = {
+        "temperature": temperature,
+        "top_p": top_p,
+        "top_k": top_k,
+        "max_tokens": max_tokens,
+    }
+
+
+    return model_config['model_id'],model_kwargs, memory_window, metadata, url 
+
+def init_conversationchain(prompt, bedrock_model_id, model_kwargs, metadata, memory_window, url):
+    try:
+        if not metadata:
+            metadata = "NA"
+
+        payload = json.dumps({
+            "session_id": st.session_state["sessionId"],
+            "prompt": prompt,
+            "bedrock_model_id": bedrock_model_id,
+            "model_kwargs": model_kwargs,
+            "metadata": metadata,
+            "memory_window": memory_window
+        })
+
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        print(payload)
+        result = requests.post(url, headers=headers, data=payload)
+        print(result.text)
+
+        if result.status_code == 200:
+            answer = json.loads(result.text).get("body")
+            return answer
+        else:
+            st.error(f"Request failed with status code: {result.status_code}",icon="🚨")
+            return None
+    except Exception as e:
+        st.error(f"An error occurred: {str(e)}",icon="🚨")
+        return None
+
+def stream_data(response):
+    for word in response.split(" "):
+        yield word + " "
+        time.sleep(0.02)
+
+def main():
+    history = []
+
+    set_page_config() 
+    # Generate a unique widget key only once
+    if "sessionId" not in st.session_state:
+        st.session_state["sessionId"] = str(random.randint(1, 1000000))
+
+    bedrock_model_id, model_kwargs, memory_window, metadata, url = render_sidebar()
+
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+        st.session_state["messages"] = [INIT_MESSAGE]
+
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+            if message["documents"]:
+                with st.expander("Sources"):
+                    for source in set(message["documents"]):
+                        st.write(f"Source: {str(source)}")
+
+    # User-provided prompt
+    prompt = st.chat_input()
+
+    if prompt:
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # Add user message to chat history
+        st.session_state.messages.append({"role": "user", "content": prompt, "documents": []})
+
+        # with st.spinner("Thinking..."):
+        response = init_conversationchain(prompt,bedrock_model_id,model_kwargs,metadata,memory_window,url)
+
+        # Add assistant message to chat history
+        if response is not None:
+            with st.chat_message("assistant"):
+                st.write_stream(stream_data(response["answer"]))
+                with st.expander("Sources"):
+                    for source in set(response["documents"]):
+                        st.write(f"Source: {str(source)}")
+            st.session_state.messages.append({"role": "assistant", "content": response["answer"], "documents": response["documents"]})
+
+
+if __name__ == "__main__":
+    main()
diff --git a/chatapp/config.py b/chatapp/config.py
@@ -0,0 +1,8 @@
+import os
+import yaml
+
+file_dir = os.path.dirname(os.path.abspath(__file__))
+config_file = os.path.join(file_dir, "config.yml")
+
+with open(config_file, "r") as file:
+    config = yaml.safe_load(file)
diff --git a/chatapp/config.yml b/chatapp/config.yml
@@ -0,0 +1,19 @@
+models:
+  Claude 2:
+    model_id: "anthropic.claude-v2:1"
+    input_format: "list_of_dicts"
+    temperature: 1.0
+    top_p: 1.0
+    top_k: 500
+    max_tokens: 1024
+    memory_window: 10
+    max_top_k: 500
+  Claude 3 Sonnet:
+    model_id: "anthropic.claude-3-sonnet-20240229-v1:0"
+    input_format: "list_of_dicts"
+    temperature: 1.0
+    top_p: 1.0
+    top_k: 500
+    max_tokens: 1024
+    memory_window: 10
+    max_top_k: 500
diff --git a/chatapp/readme.md b/chatapp/readme.md
@@ -0,0 +1,47 @@
+# Chatbot Application
+## Overview
+This is a chatbot client built using [streamlit](https://streamlit.io). It interacts with the RAG lambda functions and displays inputs and outputs in a chat like interface.  
+
+
+
+## Input
+This is how the chat window looks like:
+![Chat window](../images/chat.png)
+
+### Chat Input
+* **Your message**: user prompt/question for the model
+
+### Sidebar Properties:
+You can setup most input parameters on the side bar to the left:
+* **Model**: AWS Bedrock Model for inferencing. Current supported models are ``Anthropic Claude v2`` and ``Anthropic Claude 3 Sonnet``.
+* **User (SID) Filter Search (str)**: User SID to filter the data we access too based on original ACL SID access auth. 
+* **Model Arguments**: list of paramers for the foundation model
+    * **Temperature**: The amount of randomness injected into the response. Ranges from 0 to 1. Use temp closer to 0 for analytical / multiple choice, and closer to 1 for creative and generative tasks.
+    * **Top P**: Use nucleus sampling. In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either ``temperature`` or ``top_p``, but not both.
+   * **Top K**: Only sample from the top K options for each subsequent token. Use ``top_k`` to remove long tail low probability responses
+   * **Max_Tokens**: The maximum number of tokens to generate before stopping.
+* **Memory Window**: Only keep last K interaction in the memory of the chat.
+* **Chat URL**: path to the RAG lambda function API access.
+
+### ENV Variable Inputs
+* **CHAT_URL**: path to the RAG lambda function API access.
+
+## Testing
+#### Pre-requsites:
+* Docker engine
+
+#### Build:
+Use the following to build the app:
+```
+docker build -t <image name> --platform linux/amd64 .
+```
+
+#### Run
+Use the following to run the app:
+```
+docker run -d -p 8501:8501 -e CHAT_URL='<-lambda-api-gtw-url>' <docker-image> 
+```
+
+
+
+
diff --git a/chatapp/requirements.txt b/chatapp/requirements.txt
@@ -0,0 +1,2 @@
+streamlit
+pyyaml
diff --git a/embed/.DS_Store b/embed/.DS_Store
diff --git a/embed/.env b/embed/.env
@@ -0,0 +1,13 @@
+PROFILE ='xcp' # used for non in AWS environments
+REGION  = 'us-west-2' # used for non in AWS environments
+OPEN_SEARCH_SERVERLESS_COLLECTION_NAME = 'rag'
+BEDROCK_EMBEDDING_MODEL_ID = 'amazon.titan-embed-text-v2:0' #'amazon.titan-embed-text-v1'
+BEDROCK_EMBEDDING_MODEL_OUTPUT_VECTOR_SIZE = 1024 #1536
+FILES_PROCESSING_CONCURRENCY = 3
+EMBEDDING_CONCURRENCY = 3
+DOCUMENTS_INDEXING_CONCURRENCY = 10
+TEXT_SPLITTER_CHUNK_SIZE = 500
+TEXT_SPLITTER_CHUNK_OVERLAP = 10
+DATA_DIRECTORY = './data'
+INTERNAL_DB = './db/internal.db'
+SCANNER_INTERVAL = '5m'
diff --git a/embed/Dockerfile b/embed/Dockerfile
@@ -0,0 +1,26 @@
+FROM rockylinux:9.3-minimal
+
+WORKDIR /opt/netapp/ai
+
+RUN microdnf install xz -y
+RUN microdnf install cifs-utils -y
+RUN microdnf install tar -y
+RUN microdnf install make -y
+RUN microdnf install gcc -y
+RUN microdnf install gcc-c++ -y
+
+ENV NODE_VERSION 22.2.0
+
+RUN curl -fsSL https://raw.githubusercontent.com/tj/n/master/bin/n | bash -s latest
+
+ADD package.json ./
+ADD package-lock.json ./
+ADD src/ ./
+ADD tsconfig.json ./
+RUN npm i --force
+RUN npm run build
+
+ADD migrations ./migrations
+ADD .env ./
+
+CMD ["node", "--import", "./dist/register-hooks.js", "./dist/index.js"]
diff --git a/embed/data/Boxing.pdf b/embed/data/Boxing.pdf
diff --git a/embed/data/Evander_Holyfield.pdf b/embed/data/Evander_Holyfield.pdf