forked from opensearch-project/ml-commons
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Yaliang Wu <[email protected]>
- Loading branch information
Showing
23 changed files
with
674 additions
and
146 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
...search/ml/common/connector/functions/postprocess/BedrockEmbeddingPostProcessFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.ml.common.connector.functions.postprocess; | ||
|
||
import org.opensearch.ml.common.output.model.MLResultDataType; | ||
import org.opensearch.ml.common.output.model.ModelTensor; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public class BedrockEmbeddingPostProcessFunction extends ConnectorPostProcessFunction<List<Float>> { | ||
|
||
@Override | ||
public void validate(Object input) { | ||
if (!(input instanceof List)) { | ||
throw new IllegalArgumentException("Post process function input is not a List."); | ||
} | ||
|
||
List<?> outerList = (List<?>) input; | ||
|
||
if (!outerList.isEmpty() && !(((List<?>)input).get(0) instanceof Number)) { | ||
throw new IllegalArgumentException("The embedding should be a non-empty List containing Float values."); | ||
} | ||
} | ||
|
||
@Override | ||
public List<ModelTensor> process(List<Float> embedding) { | ||
List<ModelTensor> modelTensors = new ArrayList<>(); | ||
modelTensors.add( | ||
ModelTensor | ||
.builder() | ||
.name("sentence_embedding") | ||
.dataType(MLResultDataType.FLOAT32) | ||
.shape(new long[]{embedding.size()}) | ||
.data(embedding.toArray(new Number[0])) | ||
.build()); | ||
return modelTensors; | ||
} | ||
} |
57 changes: 57 additions & 0 deletions
57
...opensearch/ml/common/connector/functions/postprocess/CohereRerankPostProcessFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.ml.common.connector.functions.postprocess; | ||
|
||
import org.opensearch.ml.common.output.model.MLResultDataType; | ||
import org.opensearch.ml.common.output.model.ModelTensor; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
public class CohereRerankPostProcessFunction extends ConnectorPostProcessFunction<List<Map<String, Object>>> { | ||
|
||
@Override | ||
public void validate(Object input) { | ||
if (!(input instanceof List)) { | ||
throw new IllegalArgumentException("Post process function input is not a List."); | ||
} | ||
List<?> outerList = (List<?>) input; | ||
if (!outerList.isEmpty()) { | ||
if (!(outerList.get(0) instanceof Map)) { | ||
throw new IllegalArgumentException("Post process function input is not a List of Map."); | ||
} | ||
Map innerMap = (Map) outerList.get(0); | ||
|
||
if (innerMap.isEmpty() || !innerMap.containsKey("index") || !innerMap.containsKey("relevance_score")) { | ||
throw new IllegalArgumentException("The rerank result should contain index and relevance_score."); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public List<ModelTensor> process(List<Map<String, Object>> rerankResults) { | ||
List<ModelTensor> modelTensors = new ArrayList<>(); | ||
|
||
if (rerankResults.size() > 0) { | ||
Double[] scores = new Double[rerankResults.size()]; | ||
for (int i = 0; i < rerankResults.size(); i++) { | ||
Integer index = (Integer) rerankResults.get(i).get("index"); | ||
scores[index] = (Double) rerankResults.get(i).get("relevance_score"); | ||
} | ||
|
||
for (int i = 0; i < scores.length; i++) { | ||
modelTensors.add(ModelTensor.builder() | ||
.name("similarity") | ||
.shape(new long[]{1}) | ||
.data(new Number[]{scores[i]}) | ||
.dataType(MLResultDataType.FLOAT32) | ||
.build()); | ||
} | ||
} | ||
return modelTensors; | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
...rg/opensearch/ml/common/connector/functions/postprocess/ConnectorPostProcessFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.ml.common.connector.functions.postprocess; | ||
|
||
import org.opensearch.ml.common.output.model.ModelTensor; | ||
|
||
import java.util.List; | ||
import java.util.function.Function; | ||
|
||
public abstract class ConnectorPostProcessFunction<T> implements Function<Object, List<ModelTensor>> { | ||
|
||
@Override | ||
public List<ModelTensor> apply(Object input) { | ||
if (input == null) { | ||
throw new IllegalArgumentException("Can't run post process function as model output is null"); | ||
} | ||
validate(input); | ||
return process((T)input); | ||
} | ||
|
||
public abstract void validate(Object input); | ||
|
||
public abstract List<ModelTensor> process(T input); | ||
} |
50 changes: 50 additions & 0 deletions
50
...rg/opensearch/ml/common/connector/functions/postprocess/EmbeddingPostProcessFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.ml.common.connector.functions.postprocess; | ||
|
||
import org.opensearch.ml.common.output.model.MLResultDataType; | ||
import org.opensearch.ml.common.output.model.ModelTensor; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public class EmbeddingPostProcessFunction extends ConnectorPostProcessFunction<List<List<Float>>> { | ||
|
||
@Override | ||
public void validate(Object input) { | ||
if (!(input instanceof List)) { | ||
throw new IllegalArgumentException("Post process function input is not a List."); | ||
} | ||
|
||
List<?> outerList = (List<?>) input; | ||
|
||
if (!outerList.isEmpty()) { | ||
if (!(outerList.get(0) instanceof List)) { | ||
throw new IllegalArgumentException("Post process function input is not a List of List."); | ||
} | ||
List<?> innerList = (List<?>) outerList.get(0); | ||
|
||
if (innerList.isEmpty() || !(innerList.get(0) instanceof Number)) { | ||
throw new IllegalArgumentException("The embedding should be a non-empty List containing Float values."); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public List<ModelTensor> process(List<List<Float>> embeddings) { | ||
List<ModelTensor> modelTensors = new ArrayList<>(); | ||
embeddings.forEach(embedding -> modelTensors.add( | ||
ModelTensor | ||
.builder() | ||
.name("sentence_embedding") | ||
.dataType(MLResultDataType.FLOAT32) | ||
.shape(new long[]{embedding.size()}) | ||
.data(embedding.toArray(new Number[0])) | ||
.build() | ||
)); | ||
return modelTensors; | ||
} | ||
} |
34 changes: 34 additions & 0 deletions
34
...ensearch/ml/common/connector/functions/preprocess/BedrockEmbeddingPreProcessFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.ml.common.connector.functions.preprocess; | ||
|
||
import org.opensearch.ml.common.dataset.TextDocsInputDataSet; | ||
import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet; | ||
import org.opensearch.ml.common.input.MLInput; | ||
|
||
import java.util.Map; | ||
|
||
import static org.opensearch.ml.common.utils.StringUtils.convertScriptStringToJsonString; | ||
|
||
|
||
public class BedrockEmbeddingPreProcessFunction extends ConnectorPreProcessFunction { | ||
|
||
public BedrockEmbeddingPreProcessFunction() { | ||
this.returnDirectlyForRemoteInferenceInput = true; | ||
} | ||
|
||
@Override | ||
public void validate(MLInput mlInput) { | ||
validateTextDocsInput(mlInput); | ||
} | ||
|
||
@Override | ||
public RemoteInferenceInputDataSet process(MLInput mlInput) { | ||
TextDocsInputDataSet inputData = (TextDocsInputDataSet) mlInput.getInputDataset(); | ||
Map<String, Object> processedResult = Map.of("parameters", Map.of("inputText", processTextDocs(inputData).get(0))); | ||
return RemoteInferenceInputDataSet.builder().parameters(convertScriptStringToJsonString(processedResult)).build(); | ||
} | ||
} |
Oops, something went wrong.