Skip to content

Commit

Permalink
ESQL: Change "substring" function to not return null on empty string (e…
Browse files Browse the repository at this point in the history
…lastic#109174)

Currently, `substring("", 1, 1)` returns null instead of an empty string. This PR changes the behaviour to the expected one, as in other languages and databases.

Unrelated to this PR's intent, returning null is causing the function to fail with a NPE, so this is currently raising an error, and doesn't affect existing queries.

Related issue: elastic#109095
  • Loading branch information
ivancea authored May 30, 2024
1 parent d26be67 commit 0ce54d7
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 4 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/109174.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109174
summary: "ESQL: Change \"substring\" function to not return null on empty string"
area: ES|QL
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ public static ExpectedResults loadCsvSpecValues(String csv) {
for (int i = 0; i < row.size(); i++) {
String value = row.get(i);
if (value == null) {
rowValues.add(null);
// Empty cells are converted to null by SuperCSV. We convert them back to empty strings.
rowValues.add("");
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,14 @@ emp_no:integer | last_name:keyword | x:keyword | z:keyword
10010 | Piveteau | P | a
;

substring empty string
required_capability: fn_substring_empty_null
row sub = substring("", 1, 3);

sub:keyword
""
;

substring Emoji#[skip:-8.13.99,reason:bug fix in 8.14]
row a = "🐱Meow!🐶Woof!" | eval sub1 = substring(a, 2) | eval sub2 = substring(a, 2, 100);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ public class EsqlCapabilities {
*/
private static final String FN_IP_PREFIX = "fn_ip_prefix";

/**
* Fix on function {@code SUBSTRING} that makes it not return null on empty strings.
*/
private static final String FN_SUBSTRING_EMPTY_NULL = "fn_substring_empty_null";

/**
* Optimization for ST_CENTROID changed some results in cartesian data. #108713
*/
Expand All @@ -53,6 +58,7 @@ private static Set<String> capabilities() {
List<String> caps = new ArrayList<>();
caps.add(FN_CBRT);
caps.add(FN_IP_PREFIX);
caps.add(FN_SUBSTRING_EMPTY_NULL);
caps.add(ST_CENTROID_AGG_OPTIMIZED);
caps.add(METADATA_IGNORED_FIELD);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,12 @@ static BytesRef process(BytesRef str, int start) {

@Evaluator
static BytesRef process(BytesRef str, int start, int length) {
if (str.length == 0) {
return null;
}
if (length < 0) {
throw new IllegalArgumentException("Length parameter cannot be negative, found [" + length + "]");
}
if (str.length == 0) {
return str;
}
int codePointCount = UnicodeUtil.codePointCount(str);
int indexStart = indexStart(codePointCount, start);
int indexEnd = Math.min(codePointCount, indexStart + length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,24 @@ public static Iterable<Object[]> parameters() {
equalTo(new BytesRef(text.substring(start - 1, start + length - 1)))
);
}
),
new TestCaseSupplier(
"Substring empty string",
List.of(DataTypes.TEXT, DataTypes.INTEGER, DataTypes.INTEGER),
() -> {
int start = between(1, 8);
int length = between(1, 10 - start);
return new TestCaseSupplier.TestCase(
List.of(
new TestCaseSupplier.TypedData(new BytesRef(""), DataTypes.TEXT, "str"),
new TestCaseSupplier.TypedData(start, DataTypes.INTEGER, "start"),
new TestCaseSupplier.TypedData(length, DataTypes.INTEGER, "end")
),
"SubstringEvaluator[str=Attribute[channel=0], start=Attribute[channel=1], length=Attribute[channel=2]]",
DataTypes.KEYWORD,
equalTo(new BytesRef(""))
);
}
)
)
)
Expand Down

0 comments on commit 0ce54d7

Please sign in to comment.