From f78109c7942d0e2eb024ab89f3231d783b28668d Mon Sep 17 00:00:00 2001 From: Varun Jain Date: Thu, 11 Jan 2024 20:56:53 -0800 Subject: [PATCH] Adding extra check to ensure model gets undeployed and then gets deleted Signed-off-by: Varun Jain --- .../neuralsearch/BaseNeuralSearchIT.java | 49 ++++++++++++++++++- .../opensearch/neuralsearch/TestUtils.java | 2 + 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java index e882e4d95..4133e905f 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java @@ -4,6 +4,7 @@ */ package org.opensearch.neuralsearch; +import org.opensearch.ml.common.model.MLModelState; import static org.opensearch.neuralsearch.common.VectorUtil.vectorAsListToArray; import java.io.IOException; @@ -61,6 +62,8 @@ import static org.opensearch.neuralsearch.TestUtils.DEFAULT_NORMALIZATION_METHOD; import static org.opensearch.neuralsearch.TestUtils.DEFAULT_COMBINATION_METHOD; import static org.opensearch.neuralsearch.TestUtils.PARAM_NAME_WEIGHTS; +import static org.opensearch.neuralsearch.TestUtils.MAX_RETRY; +import static org.opensearch.neuralsearch.TestUtils.MAX_TIME_OUT_INTERVAL; import lombok.AllArgsConstructor; import lombok.Getter; @@ -664,8 +667,10 @@ protected void deleteModel(String modelId) { ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, DEFAULT_USER_AGENT)) ); - // after model undeploy returns, the max interval to update model status is 3s in ml-commons CronJob. - Thread.sleep(3000); + // wait for model undeploy to complete. + // Sometimes the undeploy action results in a DEPLOY_FAILED state. But this does not block the model from being deleted. + // So set both UNDEPLOYED and DEPLOY_FAILED as exit state. + pollForModelState(modelId, Set.of(MLModelState.UNDEPLOYED, MLModelState.DEPLOY_FAILED)); makeRequest( client(), @@ -677,6 +682,46 @@ protected void deleteModel(String modelId) { ); } + protected void pollForModelState(String modelId, Set exitModelStates) throws InterruptedException { + MLModelState currentState = null; + for (int i = 0; i < MAX_RETRY; i++) { + Thread.sleep(MAX_TIME_OUT_INTERVAL); + currentState = getModelState(modelId); + if (exitModelStates.contains(currentState)) { + return; + } + } + fail( + String.format( + LOCALE, + "Model state does not reached exit states %s after %d attempts with interval of %d ms, latest model state: %s.", + StringUtils.join(exitModelStates, ","), + MAX_RETRY, + MAX_TIME_OUT_INTERVAL, + currentState + ) + ); + } + + @SneakyThrows + protected MLModelState getModelState(String modelId) { + Response getModelResponse = makeRequest( + client(), + "GET", + String.format(LOCALE, "/_plugins/_ml/models/%s", modelId), + null, + toHttpEntity(""), + ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, DEFAULT_USER_AGENT)) + ); + Map getModelResponseJson = XContentHelper.convertToMap( + XContentType.JSON.xContent(), + EntityUtils.toString(getModelResponse.getEntity()), + false + ); + String modelState = (String) getModelResponseJson.get("model_state"); + return MLModelState.valueOf(modelState); + } + public boolean isUpdateClusterSettings() { return true; } diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java b/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java index 525fb813c..ae155a0f9 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java @@ -61,6 +61,8 @@ public class TestUtils { public static final String DEFAULT_COMBINATION_METHOD = "arithmetic_mean"; public static final String PARAM_NAME_WEIGHTS = "weights"; public static final String SPARSE_ENCODING_PROCESSOR = "sparse_encoding"; + public static final int MAX_TIME_OUT_INTERVAL = 3000; + public static final int MAX_RETRY = 3; /** * Convert an xContentBuilder to a map