From 01360c1f4b2369d4a13fc977d54d54852538b938 Mon Sep 17 00:00:00 2001 From: xuyu <11161569@vivo.com> Date: Fri, 24 Jan 2025 11:09:27 +0800 Subject: [PATCH] [CALCITE-6241] Enable a few existing functions to Hive library --- .../calcite/sql/fun/SqlLibraryOperators.java | 38 +++++++++---------- site/_docs/reference.md | 34 ++++++++--------- .../apache/calcite/test/SqlOperatorTest.java | 15 ++++---- 3 files changed, 44 insertions(+), 43 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index 9028e16c7b2b..580eaeac09f6 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -134,7 +134,7 @@ private SqlLibraryOperators() { /** The "DATE_ADD(date, numDays)" function * (Spark) Returns the date that is num_days after start_date. */ - @LibraryOperator(libraries = {SPARK}) + @LibraryOperator(libraries = {SPARK, HIVE}) public static final SqlFunction DATE_ADD_SPARK = SqlBasicFunction.create(SqlKind.DATE_ADD, ReturnTypes.DATE_NULLABLE, OperandTypes.DATE_ANY) @@ -142,7 +142,7 @@ private SqlLibraryOperators() { /** The "DATE_SUB(date, numDays)" function * (Spark) Returns the date that is num_days before start_date.*/ - @LibraryOperator(libraries = {SPARK}) + @LibraryOperator(libraries = {SPARK, HIVE}) public static final SqlFunction DATE_SUB_SPARK = SqlBasicFunction.create(SqlKind.DATE_SUB, ReturnTypes.DATE_NULLABLE, OperandTypes.DATE_ANY) @@ -162,7 +162,7 @@ private SqlLibraryOperators() { *
MySQL has "DATEDIFF(date, date2)" and "TIMEDIFF(time, time2)" functions * but Calcite does not implement these because they have no "timeUnit" * argument. */ - @LibraryOperator(libraries = {MSSQL, REDSHIFT, SNOWFLAKE}) + @LibraryOperator(libraries = {MSSQL, REDSHIFT, SNOWFLAKE, HIVE}) public static final SqlFunction DATEDIFF = new SqlTimestampDiffFunction("DATEDIFF", OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.DATE, @@ -261,7 +261,7 @@ private static SqlCall transformConvert(SqlValidator validator, SqlCall call) { }; /** The "DECODE(v, v1, result1, [v2, result2, ...], resultN)" function. */ - @LibraryOperator(libraries = {ORACLE, REDSHIFT, SPARK}) + @LibraryOperator(libraries = {ORACLE, REDSHIFT, SPARK, HIVE}) public static final SqlFunction DECODE = SqlBasicFunction.create(SqlKind.DECODE, DECODE_RETURN_TYPE, OperandTypes.VARIADIC); @@ -292,7 +292,7 @@ private static SqlCall transformConvert(SqlValidator validator, SqlCall call) { } /** The "NVL(value, value)" function. */ - @LibraryOperator(libraries = {ORACLE, REDSHIFT, SPARK}) + @LibraryOperator(libraries = {ORACLE, REDSHIFT, SPARK, HIVE}) public static final SqlBasicFunction NVL = SqlBasicFunction.create(SqlKind.NVL, ReturnTypes.LEAST_RESTRICTIVE @@ -327,7 +327,7 @@ private static RelDataType deriveTypePad(SqlOperatorBinding binding, RelDataType } /** The "LPAD(original_value, return_length[, pattern])" function. */ - @LibraryOperator(libraries = {BIG_QUERY, ORACLE, POSTGRESQL, SPARK}) + @LibraryOperator(libraries = {BIG_QUERY, ORACLE, POSTGRESQL, SPARK, HIVE}) public static final SqlFunction LPAD = SqlBasicFunction.create( "LPAD", @@ -336,7 +336,7 @@ private static RelDataType deriveTypePad(SqlOperatorBinding binding, RelDataType SqlFunctionCategory.STRING); /** The "RPAD(original_value, return_length[, pattern])" function. */ - @LibraryOperator(libraries = {BIG_QUERY, ORACLE, POSTGRESQL, SPARK}) + @LibraryOperator(libraries = {BIG_QUERY, ORACLE, POSTGRESQL, SPARK, HIVE}) public static final SqlFunction RPAD = SqlBasicFunction.create( "RPAD", @@ -345,7 +345,7 @@ private static RelDataType deriveTypePad(SqlOperatorBinding binding, RelDataType SqlFunctionCategory.STRING); /** The "LTRIM(string)" function. */ - @LibraryOperator(libraries = {BIG_QUERY, ORACLE, POSTGRESQL, SPARK}) + @LibraryOperator(libraries = {BIG_QUERY, ORACLE, POSTGRESQL, SPARK, HIVE}) public static final SqlFunction LTRIM = SqlBasicFunction.create(SqlKind.LTRIM, ReturnTypes.ARG0.andThen(SqlTypeTransforms.TO_NULLABLE) @@ -354,7 +354,7 @@ private static RelDataType deriveTypePad(SqlOperatorBinding binding, RelDataType .withFunctionType(SqlFunctionCategory.STRING); /** The "RTRIM(string)" function. */ - @LibraryOperator(libraries = {BIG_QUERY, ORACLE, POSTGRESQL, SPARK}) + @LibraryOperator(libraries = {BIG_QUERY, ORACLE, POSTGRESQL, SPARK, HIVE}) public static final SqlFunction RTRIM = SqlBasicFunction.create(SqlKind.RTRIM, ReturnTypes.ARG0.andThen(SqlTypeTransforms.TO_NULLABLE) @@ -401,7 +401,7 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, public static final SqlFunction STRPOS = new SqlPositionFunction("STRPOS"); /** The "INSTR(string, substring [, position [, occurrence]])" function. */ - @LibraryOperator(libraries = {BIG_QUERY, HIVE, MYSQL, ORACLE}) + @LibraryOperator(libraries = {BIG_QUERY, HIVE, MYSQL, ORACLE, HIVE}) public static final SqlFunction INSTR = new SqlPositionFunction("INSTR"); /** Generic "SUBSTR(string, position [, substringLength ])" function. */ @@ -484,7 +484,7 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, SqlFunctionCategory.STRING); /** The "GREATEST(value, value)" function. */ - @LibraryOperator(libraries = {BIG_QUERY, ORACLE}) + @LibraryOperator(libraries = {BIG_QUERY, ORACLE, HIVE}) public static final SqlFunction GREATEST = SqlBasicFunction.create(SqlKind.GREATEST, ReturnTypes.LEAST_RESTRICTIVE.andThen(SqlTypeTransforms.TO_NULLABLE), @@ -499,7 +499,7 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, OperandTypes.SAME_VARIADIC); /** The "LEAST(value, value)" function. */ - @LibraryOperator(libraries = {BIG_QUERY, ORACLE}) + @LibraryOperator(libraries = {BIG_QUERY, ORACLE, HIVE}) public static final SqlFunction LEAST = SqlBasicFunction.create(SqlKind.LEAST, ReturnTypes.LEAST_RESTRICTIVE.andThen(SqlTypeTransforms.TO_NULLABLE), @@ -613,7 +613,7 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, /** The "REGEXP_REPLACE(value, regexp, rep)" * function. Replaces all substrings of value that match regexp with * {@code rep} and returns modified value. */ - @LibraryOperator(libraries = {MYSQL, ORACLE, REDSHIFT}) + @LibraryOperator(libraries = {MYSQL, ORACLE, REDSHIFT, HIVE}) public static final SqlFunction REGEXP_REPLACE_3 = SqlBasicFunction.create("REGEXP_REPLACE", ReturnTypes.VARCHAR_NULLABLE, OperandTypes.STRING_STRING_STRING, SqlFunctionCategory.STRING); @@ -1093,7 +1093,7 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, ReturnTypes.ARG0_NULLABLE_VARYING, OperandTypes.CBSTRING_INTEGER, SqlFunctionCategory.STRING); - @LibraryOperator(libraries = {BIG_QUERY, MYSQL, POSTGRESQL, SPARK}) + @LibraryOperator(libraries = {BIG_QUERY, MYSQL, POSTGRESQL, SPARK, HIVE}) public static final SqlFunction REPEAT = SqlBasicFunction.create("REPEAT", ReturnTypes.VARCHAR_NULLABLE, @@ -1105,7 +1105,7 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, SqlBasicFunction.create("RIGHT", ReturnTypes.ARG0_NULLABLE_VARYING, OperandTypes.CBSTRING_INTEGER, SqlFunctionCategory.STRING); - @LibraryOperator(libraries = {MYSQL, SPARK}) + @LibraryOperator(libraries = {MYSQL, SPARK, HIVE}) public static final SqlFunction SPACE = SqlBasicFunction.create("SPACE", ReturnTypes.VARCHAR_NULLABLE, @@ -1119,7 +1119,7 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, OperandTypes.STRING_STRING, SqlFunctionCategory.STRING); - @LibraryOperator(libraries = {BIG_QUERY, MYSQL, POSTGRESQL, ORACLE}) + @LibraryOperator(libraries = {BIG_QUERY, MYSQL, POSTGRESQL, ORACLE, HIVE}) public static final SqlFunction SOUNDEX = SqlBasicFunction.create("SOUNDEX", ReturnTypes.VARCHAR_4_NULLABLE, @@ -1921,7 +1921,7 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding /** The "TO_DATE(string1, string2)" function; casts string1 * to a DATE using the format specified in string2. */ - @LibraryOperator(libraries = {ORACLE, REDSHIFT}) + @LibraryOperator(libraries = {ORACLE, REDSHIFT, HIVE}) public static final SqlFunction TO_DATE = SqlBasicFunction.create("TO_DATE", ReturnTypes.DATE_NULLABLE, @@ -2490,7 +2490,7 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding OperandTypes.STRING.or(OperandTypes.BINARY), SqlFunctionCategory.STRING); - @LibraryOperator(libraries = {BIG_QUERY, MYSQL, POSTGRESQL, SPARK}) + @LibraryOperator(libraries = {BIG_QUERY, MYSQL, POSTGRESQL, SPARK, HIVE}) public static final SqlFunction SHA1 = SqlBasicFunction.create("SHA1", ReturnTypes.VARCHAR_NULLABLE, @@ -2541,7 +2541,7 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding /** The "LOG(numeric1 [, numeric2 ]) " function. Returns the logarithm of numeric2 * to base numeric1.*/ - @LibraryOperator(libraries = {MYSQL, SPARK}) + @LibraryOperator(libraries = {MYSQL, SPARK, HIVE}) public static final SqlFunction LOG_MYSQL = SqlBasicFunction.create(SqlKind.LOG, ReturnTypes.DOUBLE_FORCE_NULLABLE, diff --git a/site/_docs/reference.md b/site/_docs/reference.md index 48d2d62a975f..cd3bebc30a24 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2852,7 +2852,7 @@ In the following: | b | DATE(string) | Equivalent to `CAST(string AS DATE)` | b | DATE(year, month, day) | Returns a DATE value for *year*, *month*, and *day* (all of type INTEGER) | q r f | DATEADD(timeUnit, integer, datetime) | Equivalent to `TIMESTAMPADD(timeUnit, integer, datetime)` -| q r f | DATEDIFF(timeUnit, datetime, datetime2) | Equivalent to `TIMESTAMPDIFF(timeUnit, datetime, datetime2)` +| q r f h | DATEDIFF(timeUnit, datetime, datetime2) | Equivalent to `TIMESTAMPDIFF(timeUnit, datetime, datetime2)` | q | DATEPART(timeUnit, datetime) | Equivalent to `EXTRACT(timeUnit FROM datetime)` | b | DATETIME(date, time) | Converts *date* and *time* to a TIMESTAMP | b | DATETIME(date) | Converts *date* to a TIMESTAMP value (at midnight) @@ -2865,12 +2865,12 @@ In the following: | b s | DATE_FROM_UNIX_DATE(integer) | Returns the DATE that is *integer* days after 1970-01-01 | p r | DATE_PART(timeUnit, datetime) | Equivalent to `EXTRACT(timeUnit FROM datetime)` | b | DATE_ADD(date, interval) | Returns the DATE value that occurs *interval* after *date* -| s | DATE_ADD(date, numDays) | Returns the DATE that is *numDays* after *date* +| s h | DATE_ADD(date, numDays) | Returns the DATE that is *numDays* after *date* | b | DATE_DIFF(date, date2, timeUnit) | Returns the whole number of *timeUnit* between *date* and *date2* | b | DATE_SUB(date, interval) | Returns the DATE value that occurs *interval* before *date* -| s | DATE_SUB(date, numDays) | Returns the DATE that is *numDays* before *date* +| s h | DATE_SUB(date, numDays) | Returns the DATE that is *numDays* before *date* | b | DATE_TRUNC(date, timeUnit) | Truncates *date* to the granularity of *timeUnit*, rounding to the beginning of the unit -| o r s | DECODE(value, value1, result1 [, valueN, resultN ]* [, default ]) | Compares *value* to each *valueN* value one by one; if *value* is equal to a *valueN*, returns the corresponding *resultN*, else returns *default*, or NULL if *default* is not specified +| o r s h | DECODE(value, value1, result1 [, valueN, resultN ]* [, default ]) | Compares *value* to each *valueN* value one by one; if *value* is equal to a *valueN*, returns the corresponding *resultN*, else returns *default*, or NULL if *default* is not specified | p r | DIFFERENCE(string, string) | Returns a measure of the similarity of two strings, namely the number of character positions that their `SOUNDEX` values have in common: 4 if the `SOUNDEX` values are same and 0 if the `SOUNDEX` values are totally different | f s | ENDSWITH(string1, string2) | Returns whether *string2* is a suffix of *string1* | b | ENDS_WITH(string1, string2) | Equivalent to `ENDSWITH(string1, string2)` @@ -2888,7 +2888,7 @@ In the following: | b | FORMAT_TIME(string, time) | Formats *time* according to the specified format *string* | b | FORMAT_TIMESTAMP(string timestamp) | Formats *timestamp* according to the specified format *string* | s | GETBIT(value, position) | Equivalent to `BIT_GET(value, position)` -| b o p r s | GREATEST(expr [, expr ]*) | Returns the greatest of the expressions +| b o p r s h | GREATEST(expr [, expr ]*) | Returns the greatest of the expressions | b h s | IF(condition, value1, value2) | Returns *value1* if *condition* is TRUE, *value2* otherwise | b s | IFNULL(value1, value2) | Equivalent to `NVL(value1, value2)` | p | string1 ILIKE string2 [ ESCAPE string3 ] | Whether *string1* matches pattern *string2*, ignoring case (similar to `LIKE`) @@ -2907,13 +2907,13 @@ In the following: | m | JSON_REPLACE(jsonValue, path, val [, path, val ]*) | Returns a JSON document replace a data of *jsonValue*, *path*, *val* | m | JSON_SET(jsonValue, path, val [, path, val ]*) | Returns a JSON document set a data of *jsonValue*, *path*, *val* | m | JSON_STORAGE_SIZE(jsonValue) | Returns the number of bytes used to store the binary representation of *jsonValue* -| b o p r s | LEAST(expr [, expr ]* ) | Returns the least of the expressions +| b o p r s h | LEAST(expr [, expr ]* ) | Returns the least of the expressions | b m p r s | LEFT(string, length) | Returns the leftmost *length* characters from the *string* | f r s | LEN(string) | Equivalent to `CHAR_LENGTH(string)` | b f h p r s | LENGTH(string) | Equivalent to `CHAR_LENGTH(string)` | h s | LEVENSHTEIN(string1, string2) | Returns the Levenshtein distance between *string1* and *string2* | b | LOG(numeric1 [, base ]) | Returns the logarithm of *numeric1* to base *base*, or base e if *base* is not present, or error if *numeric1* is 0 or negative -| m s | LOG([, base ], numeric1) | Returns the logarithm of *numeric1* to base *base*, or base e if *base* is not present, or null if *numeric1* is 0 or negative +| m s h | LOG([, base ], numeric1) | Returns the logarithm of *numeric1* to base *base*, or base e if *base* is not present, or null if *numeric1* is 0 or negative | p | LOG([, base ], numeric1 ) | Returns the logarithm of *numeric1* to base *base*, or base 10 if *numeric1* is not present, or error if *numeric1* is 0 or negative | m s | LOG2(numeric) | Returns the base 2 logarithm of *numeric* | s | LOG1P(numeric) | Returns the natural logarithm of 1 plus *numeric* @@ -2924,7 +2924,7 @@ In the following: | b m | FROM_BASE64(string) | Returns the decoded result of a base-64 *string* as a string. If the input argument is an invalid base-64 *string* the function returns `NULL` | b | TO_HEX(binary) | Converts *binary* into a hexadecimal varchar | b | FROM_HEX(varchar) | Converts a hexadecimal-encoded *varchar* into bytes -| b o p r s | LTRIM(string) | Returns *string* with all blanks removed from the start +| b o p r s h | LTRIM(string) | Returns *string* with all blanks removed from the start | s | MAP() | Returns an empty map | s | MAP(key, value [, key, value]*) | Returns a map with the given *key*/*value* pairs | s | MAP_CONCAT(map [, map]*) | Concatenates one or more maps. If any input argument is `NULL` the function returns `NULL`. Note that calcite is using the LAST_WIN strategy @@ -2938,7 +2938,7 @@ In the following: | s | SUBSTRING_INDEX(string, delim, count) | Returns the substring from *string* before *count* occurrences of the delimiter *delim*. If *count* is positive, everything to the left of the final delimiter (counting from the left) is returned. If *count* is negative, everything to the right of the final delimiter (counting from the right) is returned. The function substring_index performs a case-sensitive match when searching for *delim*. | b m p r s | MD5(string) | Calculates an MD5 128-bit checksum of *string* and returns it as a hex string | m | MONTHNAME(date) | Returns the name, in the connection's locale, of the month in *datetime*; for example, for a locale of en, it will return 'February' for both DATE '2020-02-10' and TIMESTAMP '2020-02-10 10:10:10', and for a locale of zh, it will return '二月' -| o r s | NVL(value1, value2) | Returns *value1* if *value1* is not null, otherwise *value2* +| o r s h | NVL(value1, value2) | Returns *value1* if *value1* is not null, otherwise *value2* | o r s | NVL2(value1, value2, value3) | Returns *value2* if *value1* is not null, otherwise *value3* | b | OFFSET(index) | When indexing an array, wrapping *index* in `OFFSET` returns the value at the 0-based *index*; throws error if *index* is out of bounds | b | ORDINAL(index) | Similar to `OFFSET` except *index* begins at 1 @@ -2957,17 +2957,17 @@ In the following: | b | REGEXP_INSTR(string, regexp [, position [, occurrence [, occurrence_position]]]) | Returns the lowest 1-based position of the substring in *string* that matches the *regexp*, starting search at *position* (default 1), and until locating the nth *occurrence* (default 1). Setting occurrence_position (default 0) to 1 returns the end position of substring + 1. Returns 0 if there is no match | m o p r s | REGEXP_LIKE(string, regexp [, flags]) | Equivalent to `string1 RLIKE string2` with an optional parameter for search flags. Supported flags are: