From dc366c6d92a22b04d630a0a0402489a7dc550449 Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Wed, 18 Sep 2024 13:58:47 -0700 Subject: [PATCH 1/4] Avoid infinite loop in flat_object parsing We had logic in flat_object parsing that would: 1. Try parsing a flat object field that is not an object or null. 2. Would see an END_ARRAY token, ignore it, and not advance the parser. Combined, this would create a scenario where passing an array of strings for a flat_object would parse the string values, then loop infinitely on the END_ARRAY token. Signed-off-by: Michael Froh --- CHANGELOG.md | 1 + .../test/index/90_flat_object.yml | 11 ++++ .../xcontent/JsonToStringXContentParser.java | 5 +- .../index/mapper/FlatObjectFieldMapper.java | 57 +++++++++++-------- 4 files changed, 48 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 636684af939bf..5b2e5d522fa77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737)) - Fix case-insensitive query on wildcard field ([#15882](https://github.com/opensearch-project/OpenSearch/pull/15882)) - Add validation for the search backpressure cancellation settings ([#15501](https://github.com/opensearch-project/OpenSearch/pull/15501)) +- Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985)) - Fix infinite loop in nested agg ([#15931](https://github.com/opensearch-project/OpenSearch/pull/15931)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml index 83d3d273ebd93..b2c1e04d096aa 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml @@ -62,6 +62,17 @@ setup: }, "required_matches": 1 } + # The following document is invalid. + - do: + catch: /parsing_exception/ + index: + index: test + id: 3 + body: { + "ISBN13": "V12154942123242", + "catalog": ["Arrays in Action"], + "required_matches": 1 + } # Do index refresh - do: diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 2f60fc8f69f87..b9c0fa84f5ba7 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -9,6 +9,7 @@ package org.opensearch.common.xcontent; import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.ParsingException; import org.opensearch.core.common.Strings; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.AbstractXContentParser; @@ -117,8 +118,6 @@ private boolean parseToken(Deque path, String currentFieldName) throws I isChildrenValueValid |= parseToken(path, currentFieldName); } this.parser.nextToken(); - } else if (this.parser.currentToken() == Token.END_ARRAY) { - // skip } else if (this.parser.currentToken() == Token.START_OBJECT) { parser.nextToken(); while (this.parser.currentToken() != Token.END_OBJECT) { @@ -172,7 +171,7 @@ private String parseValue() throws IOException { return this.parser.textOrNull(); // Handle other token types as needed default: - throw new IOException("Unsupported value token type [" + parser.currentToken() + "]"); + throw new ParsingException(parser.getTokenLocation(), "Unexpected value token type [" + parser.currentToken() + "]"); } } diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index bf8f83e1b95df..738efcfafdca1 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -30,6 +30,7 @@ import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.search.AutomatonQueries; import org.opensearch.common.xcontent.JsonToStringXContentParser; +import org.opensearch.core.common.ParsingException; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; @@ -568,31 +569,41 @@ protected void parseCreateField(ParseContext context) throws IOException { if (context.externalValueSet()) { String value = context.externalValue().toString(); parseValueAddFields(context, value, fieldType().name()); - } else if (context.parser().currentToken() != XContentParser.Token.VALUE_NULL) { - JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser( - NamedXContentRegistry.EMPTY, - DeprecationHandler.IGNORE_DEPRECATIONS, - context.parser(), - fieldType().name() - ); - /* - JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser - It reads the JSON object and parsed to a list of string - */ - XContentParser parser = jsonToStringParser.parseObject(); - - XContentParser.Token currentToken; - while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - switch (currentToken) { - case FIELD_NAME: - fieldName = parser.currentName(); - break; - case VALUE_STRING: - String value = parser.textOrNull(); - parseValueAddFields(context, value, fieldName); - break; + } else { + XContentParser ctxParser = context.parser(); + if (ctxParser.currentToken() != XContentParser.Token.VALUE_NULL) { + if (ctxParser.currentToken() != XContentParser.Token.START_OBJECT) { + throw new ParsingException( + ctxParser.getTokenLocation(), + "[" + this.name() + "] unexpected token [" + ctxParser.currentToken() + "] in flat_object field value" + ); } + JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + ctxParser, + fieldType().name() + ); + /* + JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser + It reads the JSON object and parsed to a list of string + */ + XContentParser parser = jsonToStringParser.parseObject(); + + XContentParser.Token currentToken; + while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + switch (currentToken) { + case FIELD_NAME: + fieldName = parser.currentName(); + break; + case VALUE_STRING: + String value = parser.textOrNull(); + parseValueAddFields(context, value, fieldName); + break; + } + + } } } From 5a74dace0b1a75c0002e6308be7d8c01c80d2c15 Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Wed, 18 Sep 2024 18:45:00 -0700 Subject: [PATCH 2/4] Remove some unused code and add more tests The removed code does not actually seem to affect the logic. Also, I want to be 100% sure that every call to parseToken is guaranteed to call parser.nextToken() at some point. Signed-off-by: Michael Froh --- .../test/index/90_flat_object.yml | 33 +++++++++++++++++-- .../xcontent/JsonToStringXContentParser.java | 18 ---------- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml index b2c1e04d096aa..c87f86eb89388 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml @@ -62,7 +62,7 @@ setup: }, "required_matches": 1 } - # The following document is invalid. + # The following documents are invalid. - do: catch: /parsing_exception/ index: @@ -73,7 +73,36 @@ setup: "catalog": ["Arrays in Action"], "required_matches": 1 } - + - do: + catch: /parsing_exception/ + index: + index: test + id: 3 + body: { + "ISBN13": "V12154942123242", + "catalog": "Strings in Action", + "required_matches": 1 + } + - do: + catch: /parsing_exception/ + index: + index: test + id: 3 + body: { + "ISBN13": "V12154942123242", + "catalog": 12345, + "required_matches": 1 + } + - do: + catch: /parsing_exception/ + index: + index: test + id: 3 + body: { + "ISBN13": "V12154942123242", + "catalog": [ 12345 ], + "required_matches": 1 + } # Do index refresh - do: indices.refresh: diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index b9c0fa84f5ba7..a5be79f28dd2d 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -89,9 +89,6 @@ public XContentParser parseObject() throws IOException { * @return true if the child object contains no_null value, false otherwise */ private boolean parseToken(Deque path, String currentFieldName) throws IOException { - if (path.size() == 1 && processNoNestedValue()) { - return true; - } boolean isChildrenValueValid = false; boolean visitFieldName = false; if (this.parser.currentToken() == Token.FIELD_NAME) { @@ -147,21 +144,6 @@ public void removeKeyOfNullValue() { this.keyList.remove(keyList.size() - 1); } - private boolean processNoNestedValue() throws IOException { - if (parser.currentToken() == Token.VALUE_NULL) { - return true; - } else if (this.parser.currentToken() == Token.VALUE_STRING - || this.parser.currentToken() == Token.VALUE_NUMBER - || this.parser.currentToken() == Token.VALUE_BOOLEAN) { - String value = this.parser.textOrNull(); - if (value != null) { - this.valueList.add(value); - } - return true; - } - return false; - } - private String parseValue() throws IOException { switch (this.parser.currentToken()) { case VALUE_BOOLEAN: From dff498dfc4243900ff2e7c3690960f931853f329 Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Thu, 19 Sep 2024 13:41:38 -0700 Subject: [PATCH 3/4] Remove unused parameter from parseToken Thanks for the reminder, @kkewwei! Signed-off-by: Michael Froh --- .../xcontent/JsonToStringXContentParser.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index a5be79f28dd2d..95a8d9c9495f2 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -74,7 +74,7 @@ public XContentParser parseObject() throws IOException { builder.startObject(); LinkedList path = new LinkedList<>(Collections.singleton(fieldTypeName)); while (currentToken() != Token.END_OBJECT) { - parseToken(path, null); + parseToken(path); } // deduplication the fieldName,valueList,valueAndPathList builder.field(this.fieldTypeName, new HashSet<>(keyList)); @@ -88,11 +88,11 @@ public XContentParser parseObject() throws IOException { /** * @return true if the child object contains no_null value, false otherwise */ - private boolean parseToken(Deque path, String currentFieldName) throws IOException { + private boolean parseToken(Deque path) throws IOException { boolean isChildrenValueValid = false; boolean visitFieldName = false; if (this.parser.currentToken() == Token.FIELD_NAME) { - currentFieldName = this.parser.currentName(); + final String currentFieldName = this.parser.currentName(); path.addLast(currentFieldName); // Pushing onto the stack *must* be matched by pop visitFieldName = true; String parts = currentFieldName; @@ -104,21 +104,21 @@ private boolean parseToken(Deque path, String currentFieldName) throws I } this.keyList.add(parts); // parts has no dot, so either it's the original fieldName or it's the last part this.parser.nextToken(); // advance to the value of fieldName - isChildrenValueValid = parseToken(path, currentFieldName); // parse the value for fieldName (which will be an array, an object, - // or a primitive value) + isChildrenValueValid = parseToken(path); // parse the value for fieldName (which will be an array, an object, + // or a primitive value) path.removeLast(); // Here is where we pop fieldName from the stack (since we're done with the value of fieldName) // Note that whichever other branch we just passed through has already ended with nextToken(), so we // don't need to call it. } else if (this.parser.currentToken() == Token.START_ARRAY) { parser.nextToken(); while (this.parser.currentToken() != Token.END_ARRAY) { - isChildrenValueValid |= parseToken(path, currentFieldName); + isChildrenValueValid |= parseToken(path); } this.parser.nextToken(); } else if (this.parser.currentToken() == Token.START_OBJECT) { parser.nextToken(); while (this.parser.currentToken() != Token.END_OBJECT) { - isChildrenValueValid |= parseToken(path, currentFieldName); + isChildrenValueValid |= parseToken(path); } this.parser.nextToken(); } else { From 6ddd18b5950cc80a8142cf716822a857813a5f73 Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Thu, 19 Sep 2024 18:51:12 -0700 Subject: [PATCH 4/4] Add skip for newly-added test The test fails on MixedClusterClientYamlTestSuiteIT because 2.x still has the infinite loop until backport. Signed-off-by: Michael Froh --- .../test/index/90_flat_object.yml | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml index c87f86eb89388..c0fc0090abedf 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml @@ -62,6 +62,22 @@ setup: }, "required_matches": 1 } + # Do index refresh + - do: + indices.refresh: + index: test + +--- +# Delete Index when connection is teardown +teardown: + - do: + indices.delete: + index: test +--- +"Invalid docs": + - skip: + version: "- 2.99.99" + reason: "parsing of these objects would infinite loop prior to 2.18" # The following documents are invalid. - do: catch: /parsing_exception/ @@ -70,7 +86,7 @@ setup: id: 3 body: { "ISBN13": "V12154942123242", - "catalog": ["Arrays in Action"], + "catalog": [ "Arrays in Action" ], "required_matches": 1 } - do: @@ -103,18 +119,6 @@ setup: "catalog": [ 12345 ], "required_matches": 1 } - # Do index refresh - - do: - indices.refresh: - index: test - ---- -# Delete Index when connection is teardown -teardown: - - do: - indices.delete: - index: test - --- # Verify that mappings under the catalog field did not expand # and no dynamic fields were created.