From c4a48daa2e4c6f2dd4e02a400ff59649d2c3440b Mon Sep 17 00:00:00 2001 From: Yaliang Wu Date: Sat, 17 Feb 2024 17:23:01 -0800 Subject: [PATCH 1/3] add tutorial for semantic search with byte quantized vector and Cohere embedding model Signed-off-by: Yaliang Wu --- ...ation_search_with_byte_quantized_vector.md | 278 ++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md diff --git a/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md b/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md new file mode 100644 index 0000000000..b7ec73a82c --- /dev/null +++ b/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md @@ -0,0 +1,278 @@ +# Topic + +> This tutorial doesn't explain details about byte-quantized vectors. Read this [Byte-quantized vectors in OpenSearch](https://opensearch.org/blog/byte-quantized-vectors-in-opensearch/) to learn more details. + +This tutorial explains how to build semantic search with [Cohere Embedding model](https://docs.cohere.com/reference/embed) and byte-quantized vectors in OpenSearch. + +Cohere embedding model v3 supports more `embedding_types`, this tutorial uses `"int8"` to work with byte-quantized vectors. + +Note: Replace the placeholders that start with `your_` with your own values. + +# Steps + +Cohere embedding model v3 supports different `input_type`. From Cohere [doc](https://docs.cohere.com/reference/embed) +> - `"search_document"`: Used for embeddings stored in a vector database for search use-cases. +> - `"search_query"`: Used for embeddings of search queries run against a vector DB to find relevant documents. + +We will create two models in this tutorial: +- one model with `search_document` input type for ingestion +- one model with `search_query` input type for search + +## 1. Create embedding model for ingestion + +Create connector with `search_document` input type + +``` +POST /_plugins/_ml/connectors/_create +{ + "name": "Cohere embedding connector with int8 embedding type for ingestion", + "description": "Test connector for Cohere embedding model", + "version": 1, + "protocol": "http", + "credential": { + "cohere_key": "your_cohere_api_key" + }, + "parameters": { + "model": "embed-english-v3.0", + "embedding_types": ["int8"], + "input_type": "search_document" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "headers": { + "Authorization": "Bearer ${credential.cohere_key}", + "Request-Source": "unspecified:opensearch" + }, + "url": "https://api.cohere.ai/v1/embed", + "request_body": "{ \"model\": \"${parameters.model}\", \"texts\": ${parameters.texts}, \"input_type\":\"${parameters.input_type}\", \"embedding_types\": ${parameters.embedding_types} }", + "pre_process_function": "connector.pre_process.cohere.embedding", + "post_process_function": "\n def name = \"sentence_embedding\";\n def data_type = \"FLOAT32\";\n def result;\n if (params.embeddings.int8 != null) {\n data_type = \"INT8\";\n result = params.embeddings.int8;\n } else if (params.embeddings.uint8 != null) {\n data_type = \"UINT8\";\n result = params.embeddings.uint8;\n } else if (params.embeddings.float != null) {\n data_type = \"FLOAT32\";\n result = params.embeddings.float;\n }\n \n if (result == null) {\n return \"Invalid embedding result\";\n }\n \n def embedding_list = new StringBuilder(\"[\");\n \n for (int m=0; m Date: Mon, 19 Feb 2024 13:13:53 -0800 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Signed-off-by: Yaliang Wu --- ...ation_search_with_byte_quantized_vector.md | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md b/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md index b7ec73a82c..361da25415 100644 --- a/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md +++ b/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md @@ -1,26 +1,26 @@ # Topic -> This tutorial doesn't explain details about byte-quantized vectors. Read this [Byte-quantized vectors in OpenSearch](https://opensearch.org/blog/byte-quantized-vectors-in-opensearch/) to learn more details. +> This tutorial doesn't explain byte-quantized vectors in detail. For more information, see [Byte-quantized vectors in OpenSearch](https://opensearch.org/blog/byte-quantized-vectors-in-opensearch/). -This tutorial explains how to build semantic search with [Cohere Embedding model](https://docs.cohere.com/reference/embed) and byte-quantized vectors in OpenSearch. +This tutorial shows how to build semantic search using the [Cohere Embed model](https://docs.cohere.com/reference/embed) and byte-quantized vectors in OpenSearch. -Cohere embedding model v3 supports more `embedding_types`, this tutorial uses `"int8"` to work with byte-quantized vectors. +The Cohere Embed v3 model supports several `embedding_types`. This tutorial uses the `int8` type for byte-quantized vectors. Note: Replace the placeholders that start with `your_` with your own values. # Steps -Cohere embedding model v3 supports different `input_type`. From Cohere [doc](https://docs.cohere.com/reference/embed) -> - `"search_document"`: Used for embeddings stored in a vector database for search use-cases. +The Cohere Embed v3 model supports several input types. This tutorial uses the following input types (from the Cohere [documentation](https://docs.cohere.com/reference/embed)): +> - `search_document`: Used for embeddings stored in a vector database for search use cases. > - `"search_query"`: Used for embeddings of search queries run against a vector DB to find relevant documents. -We will create two models in this tutorial: -- one model with `search_document` input type for ingestion -- one model with `search_query` input type for search +You will create two models in this tutorial: +- A model used for ingestion with the `search_document` input type +- A model used for search with the `search_query` input type ## 1. Create embedding model for ingestion -Create connector with `search_document` input type +Create a connector with the `search_document` input type: ``` POST /_plugins/_ml/connectors/_create @@ -53,7 +53,7 @@ POST /_plugins/_ml/connectors/_create ] } ``` -Use connector id from the response to create model +Use the connector ID from the response to create a model: ``` POST /_plugins/_ml/models/_register?deploy=true { @@ -63,9 +63,9 @@ POST /_plugins/_ml/models/_register?deploy=true "connector_id": "your_connector_id" } ``` -Note the model id, it will be used in step 2.1 +Note the model ID; you'll use it in step 2.1. -Test model +Test the model: ``` POST /_plugins/_ml/models/your_embedding_model_id/_predict { @@ -74,7 +74,7 @@ POST /_plugins/_ml/models/your_embedding_model_id/_predict } } ``` -Sample response +Sample response: ``` { @@ -138,7 +138,7 @@ PUT /_ingest/pipeline/pipeline-cohere ``` ### 2.2 Create KNN index with byte-quantized vector -Refer to https://opensearch.org/blog/byte-quantized-vectors-in-opensearch/ +For more information, refer to [this blog](https://opensearch.org/blog/byte-quantized-vectors-in-opensearch/). ``` PUT my_test_data @@ -174,7 +174,7 @@ PUT my_test_data } ``` -Ingest test data +Ingest test data: ``` POST _bulk @@ -187,7 +187,7 @@ POST _bulk ## 3. Semantic search -Let's create another embedding model with `search_query` input type +Create another embedding model with the `search_query` input type: ``` POST /_plugins/_ml/connectors/_create { @@ -219,7 +219,7 @@ POST /_plugins/_ml/connectors/_create ] } ``` -Use the connector id from response to create model +Use the connector ID from the response to create a model: ``` POST /_plugins/_ml/models/_register?deploy=true { From 20b79fef4f2796d7d95da559d932ebd7367c853e Mon Sep 17 00:00:00 2001 From: Yaliang Wu Date: Wed, 21 Feb 2024 18:09:38 -0800 Subject: [PATCH 3/3] address comments Signed-off-by: Yaliang Wu --- ...conversation_search_with_byte_quantized_vector.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md b/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md index 361da25415..1b510abaab 100644 --- a/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md +++ b/docs/tutorials/semantic_search/conversation_search_with_byte_quantized_vector.md @@ -10,9 +10,9 @@ Note: Replace the placeholders that start with `your_` with your own values. # Steps -The Cohere Embed v3 model supports several input types. This tutorial uses the following input types (from the Cohere [documentation](https://docs.cohere.com/reference/embed)): -> - `search_document`: Used for embeddings stored in a vector database for search use cases. -> - `"search_query"`: Used for embeddings of search queries run against a vector DB to find relevant documents. +The Cohere Embed v3 model supports several input types. This tutorial uses the following input types (from the Cohere [documentation](https://docs.cohere.com/docs/embed-api#the-input_type-parameter)): +> - `input_type="search_document":`: Use this when you have texts (documents) that you want to store in a vector database. +> - `input_type="search_query":`: Use this when structuring search queries to find the most relevant documents in your vector database. You will create two models in this tutorial: - A model used for ingestion with the `search_document` input type @@ -63,9 +63,7 @@ POST /_plugins/_ml/models/_register?deploy=true "connector_id": "your_connector_id" } ``` -Note the model ID; you'll use it in step 2.1. - -Test the model: +Use the model ID from the response to test predict API (you'll use the model id in step 2.1 too.): ``` POST /_plugins/_ml/models/your_embedding_model_id/_predict { @@ -76,6 +74,7 @@ POST /_plugins/_ml/models/your_embedding_model_id/_predict ``` Sample response: +Note: Set `inference_results.output.data_type` as `FLOAT32` just to keep compatible with neural-search plugin. The embedding value is `INT8` actually. ``` { "inference_results": [ @@ -136,6 +135,7 @@ PUT /_ingest/pipeline/pipeline-cohere ] } ``` +The response simply acknowledges that the request has been executed. ### 2.2 Create KNN index with byte-quantized vector For more information, refer to [this blog](https://opensearch.org/blog/byte-quantized-vectors-in-opensearch/).