diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index 642b74690a11..2b6bf1f57ee0 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -569,6 +569,14 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, @LibraryOperator(libraries = {BIG_QUERY, MYSQL, ORACLE, REDSHIFT}) public static final SqlFunction REGEXP_REPLACE = new SqlRegexpReplaceFunction(); + /** The PostgreSQL variant of + * "REGEXP_REPLACE(value, regexp, rep [, pos [, occurrence]] [, matchType])" + * function. Replaces all substrings of value that match regexp with + * {@code rep} and returns modified value. */ + @LibraryOperator(libraries = {POSTGRESQL}) + public static final SqlFunction PG_REGEXP_REPLACE = new SqlPgRegexpReplaceFunction(); + + /** The "REGEXP_SUBSTR(value, regexp[, position[, occurrence]])" function. * Returns the substring in value that matches the regexp. Returns NULL if there is no match. */ @LibraryOperator(libraries = {BIG_QUERY}) diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlPgRegexpReplaceFunction.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlPgRegexpReplaceFunction.java new file mode 100644 index 000000000000..080b582d7635 --- /dev/null +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlPgRegexpReplaceFunction.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.sql.fun; + +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlTypeFamily; + +import java.util.ArrayList; +import java.util.List; + +/** + * The PostgreSQL + * REGEXP_REPLACE(source_string, pattern, replacement [, pos, [, occurrence]] [, match_type]) + * searches for a regular expression pattern and replaces every occurrence of the pattern + * with the specified string. It differs from the standard REGEXP_REPLACE in that there is + * no type inference for position or occurrence parameters and allows the match_type parameter + * to be used as the 3rd, 4th, or 5th parameters instead. + */ +public class SqlPgRegexpReplaceFunction extends SqlRegexpReplaceFunction { + + @Override public String getAllowedSignatures(String opNameToUse) { + return opNameToUse + "(VARCHAR, VARCHAR, VARCHAR [, INTEGER [, INTEGER]] [, VARCHAR])"; + } + + @Override public boolean checkOperandTypes(SqlCallBinding callBinding, + boolean throwOnFailure) { + final int operandCount = callBinding.getOperandCount(); + assert operandCount >= 3; + if (operandCount == 3) { + return OperandTypes.STRING_STRING_STRING + .checkOperandTypes(callBinding, throwOnFailure); + } + final List families = new ArrayList<>(); + families.add(SqlTypeFamily.STRING); + families.add(SqlTypeFamily.STRING); + families.add(SqlTypeFamily.STRING); + for (int i = 3; i < operandCount; i++) { + // The argument type at index 3 and 4 can be either integer or string. + // Index 3 can either be the start pos or the flags. + // Index 4 can either be the end pos of the flags. + // If the flags get used at index 3 or 4, there can be no more arguments, since index 5 + // can only be flags. + if (i == 3) { + if (SqlTypeFamily.STRING.contains(callBinding.getOperandType(i))) { + families.add(SqlTypeFamily.STRING); + break; + } + families.add(SqlTypeFamily.INTEGER); + } else if (i == 4) { + if (SqlTypeFamily.STRING.contains(callBinding.getOperandType(i))) { + families.add(SqlTypeFamily.STRING); + break; + } + families.add(SqlTypeFamily.INTEGER); + } else if (i == 5) { + families.add(SqlTypeFamily.STRING); + } + } + + if (throwOnFailure && operandCount != families.size()) { + throw callBinding.newValidationSignatureError(); + } + return OperandTypes.family(families.toArray(new SqlTypeFamily[0])) + .checkOperandTypes(callBinding, throwOnFailure); + } +} diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlRegexpReplaceFunction.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlRegexpReplaceFunction.java index e56e74e2eca9..22017feb39f0 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlRegexpReplaceFunction.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlRegexpReplaceFunction.java @@ -46,6 +46,10 @@ public SqlRegexpReplaceFunction() { return SqlOperandCountRanges.between(3, 6); } + @Override public String getAllowedSignatures(String opNameToUse) { + return opNameToUse + "(VARCHAR, VARCHAR, VARCHAR [, INTEGER [, INTEGER [, VARCHAR]]])"; + } + @Override public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { final int operandCount = callBinding.getOperandCount(); @@ -59,16 +63,20 @@ public SqlRegexpReplaceFunction() { families.add(SqlTypeFamily.STRING); families.add(SqlTypeFamily.STRING); for (int i = 3; i < operandCount; i++) { + // The argument type at index 3 and 4 can be either integer or string. + // Index 3 can either be the start pos or the flags. + // Index 4 can either be the end pos of the flags. + // If the flags get used at index 3 or 4, there can be no more arguments, since index 5 + // can only be flags. if (i == 3) { families.add(SqlTypeFamily.INTEGER); - } - if (i == 4) { + } else if (i == 4) { families.add(SqlTypeFamily.INTEGER); - } - if (i == 5) { + } else if (i == 5) { families.add(SqlTypeFamily.STRING); } } + return OperandTypes.family(families.toArray(new SqlTypeFamily[0])) .checkOperandTypes(callBinding, throwOnFailure); } diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java index bb4dc7b7831e..984fd563ecf7 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java @@ -11715,6 +11715,51 @@ private void checkCustomColumnResolving(String table) { .columnType("VARCHAR NOT NULL"); } + @Test void testPgRegexpReplace() { + final SqlOperatorTable opTable = operatorTableFor(SqlLibrary.POSTGRESQL); + + expr("REGEXP_REPLACE('a b c', 'a', 'X')") + .withOperatorTable(opTable) + .columnType("VARCHAR NOT NULL"); + expr("REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 2)") + .withOperatorTable(opTable) + .columnType("VARCHAR NOT NULL"); + expr("REGEXP_REPLACE('abc def GHI', '[a-z]+', 'X', 'c')") + .withOperatorTable(opTable) + .columnType("VARCHAR NOT NULL"); + expr("REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 3)") + .withOperatorTable(opTable) + .columnType("VARCHAR NOT NULL"); + expr("REGEXP_REPLACE('abc def GHI', '[a-z]+', 'X', 1, 'c')") + .withOperatorTable(opTable) + .columnType("VARCHAR NOT NULL"); + expr("REGEXP_REPLACE('abc def GHI', '[a-z]+', 'X', 1, 3, 'c')") + .withOperatorTable(opTable) + .columnType("VARCHAR NOT NULL"); + // Implicit type coercion. + expr("REGEXP_REPLACE(null, '(-)', '###')") + .withOperatorTable(opTable) + .columnType("VARCHAR"); + expr("REGEXP_REPLACE('100-200', null, '###')") + .withOperatorTable(opTable) + .columnType("VARCHAR"); + expr("REGEXP_REPLACE('100-200', '(-)', null)") + .withOperatorTable(opTable) + .columnType("VARCHAR"); + + // If a String parameter is used after index 3, it must be the flags parameter. + // No other parameters can be used after. + expr("^REGEXP_REPLACE('abc def GHI', '[a-z]+', 'X', 'c', 1)^") + .withOperatorTable(opTable) + .fails("Cannot apply 'REGEXP_REPLACE' to arguments of type .*"); + expr("^REGEXP_REPLACE('abc def GHI', '[a-z]+', 'X', 'c', 'c')^") + .withOperatorTable(opTable) + .fails("Cannot apply 'REGEXP_REPLACE' to arguments of type .*"); + expr("^REGEXP_REPLACE('abc def GHI', '[a-z]+', 'X', 1, 'c', 'c')^") + .withOperatorTable(opTable) + .fails("Cannot apply 'REGEXP_REPLACE' to arguments of type .*"); + } + @Test void testInvalidFunctionCall() { final SqlOperatorTable operatorTable = MockSqlOperatorTable.standard().extend(); diff --git a/site/_docs/reference.md b/site/_docs/reference.md index dc80e67ed93c..7814c2a0c50d 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2847,7 +2847,8 @@ In the following: | b | REGEXP_EXTRACT_ALL(string, regexp) | Returns an array of all substrings in *string* that matches the *regexp*. Returns an empty array if there is no match | b | REGEXP_INSTR(string, regexp [, position [, occurrence [, occurrence_position]]]) | Returns the lowest 1-based position of the substring in *string* that matches the *regexp*, starting search at *position* (default 1), and until locating the nth *occurrence* (default 1). Setting occurrence_position (default 0) to 1 returns the end position of substring + 1. Returns 0 if there is no match | m o p r s | REGEXP_LIKE(string, regexp [, flags]) | Equivalent to `string1 RLIKE string2` with an optional parameter for search flags. Supported flags are: -| b m o r | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [, matchType]]]) | Replaces all substrings of *string* that match *regexp* with *rep* at the starting *pos* in expr (if omitted, the default is 1), *occurrence* specifies which occurrence of a match to search for (if omitted, the default is 1), *matchType* specifies how to perform matching +| b m o | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [, matchType]]]) | Replaces all substrings of *string* that match *regexp* with *rep* at the starting *pos* in expr (if omitted, the default is 1), *occurrence* specifies which occurrence of a match to search for (if omitted, the default is 1), *matchType* specifies how to perform matching +| p | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence]] [, matchType]) | Replaces all substrings of *string* that match *regexp* with *rep* at the starting *pos* in expr (if omitted, the default is 1), *occurrence* specifies which occurrence of a match to search for (if omitted, the default is 1), *matchType* specifies how to perform matching | b | REGEXP_SUBSTR(string, regexp [, position [, occurrence]]) | Synonym for REGEXP_EXTRACT | b m p r s | REPEAT(string, integer) | Returns a string consisting of *string* repeated of *integer* times; returns an empty string if *integer* is less than 1 | b m | REVERSE(string) | Returns *string* with the order of the characters reversed