From 386a9e49a8b6b81fa5f24cbb5d89775abde6e174 Mon Sep 17 00:00:00 2001 From: KimJammer <41841812+kimjammer@users.noreply.github.com> Date: Wed, 22 May 2024 23:34:47 -0700 Subject: [PATCH] Update Documentation --- README.md | 17 ++++++++++------- constants.py | 9 ++++++++- main.py | 2 +- memories/readme.md | 2 +- requirements.txt | Bin 9824 -> 9504 bytes 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4bea840..565e56f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ The original version was also created in only 7 days, so it is not exactly very - Audio File playback (for pre-generated songs/covers created with something like [RVC](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) - Vtube Studio Plugin & Model/Prop control - Flexible LLM - Load any model into text-generation-webui (tested) or use any openai-compatible endpoint (not tested). -- Memory - Long-term (persists across restarts) memories can be manually added, but they will also be +- Memory/RAG - Long-term (persists across restarts) memories can be manually added, but they will also be automatically generated as the AI talks. (See memories/readme.md for details) ## Architecture @@ -112,20 +112,23 @@ documentation [here](https://pytwitchapi.dev/en/stable/index.html#user-authentic ### This Project -A virtual environment of some sort is recommended (Python 3.11); this project was developed with venv. +A virtual environment of some sort is recommended (Python 3.11 required); this project was developed with venv. -Install requirements.txt (This is just a pip freeze, so if you're not on windows watch out) +Install the CUDA 11.8 version of pytorch 2.2.2 first. -DeepSpeed (For TTS) will probably need to be installed separately, I was using instructions -from [AllTalkTTS](https://github.com/erew123/alltalk_tts?#-deepspeed-installation-options) , and using their +Then install requirements.txt (This is just a pip freeze, so if you're not on Windows watch out) + +Finally, DeepSpeed (For TTS) will need to be installed separately. I was using instructions +from [AllTalkTTS](https://github.com/erew123/alltalk_tts?#-deepspeed-installation-options), and using their [provided wheels](https://github.com/erew123/alltalk_tts/releases/tag/DeepSpeed-14.0). -Create an .env file using .env.example as reference. You need your Twitch app id and secret. +Create an .env file using .env.example as reference. You need your Twitch app id and secret, along with your +Hugginface token if you use a gated model (like Llama 3). Place a voice reference wav file in the voices directory. It should be 5~30 seconds long. For details see the RealtimeTTS repository. -Find your desired microphone and speaker device numbers by running utils/listAudioDevices.py and note its number. +Find your desired microphone and speaker device numbers by running utils/listAudioDevices.py and note its numbers. Configure constants.py. diff --git a/constants.py b/constants.py index edf4dcb..11a88fe 100644 --- a/constants.py +++ b/constants.py @@ -1,10 +1,13 @@ # This file holds various constants used in the program +# Variables marked with #UNIQUE# will be unique to your setup and NEED to be changed or the program will not work correctly. # CORE SECTION: All constants in this section are necessary +# Microphone/Speaker device indices # Use utils/listAudioDevices.py to find the correct device ID +#UNIQUE# INPUT_DEVICE_INDEX = 1 -OUTPUT_DEVICE_INDEX = 12 +OUTPUT_DEVICE_INDEX = 7 # How many seconds to wait before prompting AI PATIENCE = 60 @@ -17,9 +20,11 @@ TWITCH_MAX_MESSAGE_LENGTH = 300 # Twitch channel for bot to join +#UNIQUE# TWITCH_CHANNEL = "lunasparkai" # Voice reference file for TTS +#UNIQUE# VOICE_REFERENCE = "neuro.wav" # MULTIMODAL SPECIFIC SECTION: Not needed when not using multimodal capabilities @@ -34,12 +39,14 @@ # LLM SPECIFIC SECTION: Below are constants that are specific to the LLM you are using # The model you are using, to calculate how many tokens the current message is +# Ensure this is correct! Used for token count estimation MODEL = "meta-llama/Meta-Llama-3-8B" # Context size (maximum number of tokens in the prompt) Will target upto 90% usage of this limit CONTEXT_SIZE = 8192 # This is your name +#UNIQUE# HOST_NAME = "John" # This is the AI's name diff --git a/main.py b/main.py index 519b943..a688d32 100644 --- a/main.py +++ b/main.py @@ -46,7 +46,7 @@ def signal_handler(sig, frame): stt = STT(signals) # Create TTS tts = TTS(signals) - # Create LLMController + # Create LLMWrapper llm_wrapper = LLMWrapper(signals, tts, modules) # Create Prompter prompter = Prompter(signals, llm_wrapper) diff --git a/memories/readme.md b/memories/readme.md index ad2a573..525e2cb 100644 --- a/memories/readme.md +++ b/memories/readme.md @@ -5,7 +5,7 @@ will be automatically injected into the prompt. Memories will also persist acros the frontend or the database is deleted. The automatically generated memories are based off of -("Generative Agents: Interactive Simulacra of Human Behavior")[https://arxiv.org/abs/2304.03442]. Essentially, +[Generative Agents: Interactive Simulacra of Human Behavior](https://arxiv.org/abs/2304.03442). Essentially, every handful of messages, the LLM will be prompted to review the recent messages and come up with the 3 most high level questions that encapsulate the conversation and also provide the answer. These question/answer pairs are then each stored as a (short-term) memory. These short-term memories will persists across restarts unless deleted. diff --git a/requirements.txt b/requirements.txt index d074df4bdcd9187b7cf27e6964ac9a5ce561d376..237ab1dcd747e3f5769e4611b9253eb273c8cf7e 100644 GIT binary patch delta 12 TcmaFhv%qVE72D=2_GfYcB}xT9 delta 330 zcmZ9HI|{-;5QhKCV~8M@>Si?$1AI>JkO#^nE;p^QfIG^H91 zWVNK@j^ZiO5L{_eJKB;KKUiIA%Yrjw12^EIf^+(uS)g0_=yO&kL^Ckp;E**wk+-U^ ynI^2V_#s+|7x6Nn8s{0!XAWBaGsQ5}JywHrGz-f>IV~yKG^Y58