From 6469ba154c6f44174f7ec12a278ba77dac16d25e Mon Sep 17 00:00:00 2001 From: Marko Date: Mon, 9 Sep 2024 16:29:05 +0200 Subject: [PATCH] suggestions applied --- .../CollationIdentifier.java | 79 ++++++++++++++++--- .../io/delta/kernel/types/StringType.java | 1 - .../CollationIdentifierSuite.scala | 4 +- 3 files changed, 68 insertions(+), 16 deletions(-) rename kernel/kernel-api/src/main/java/io/delta/kernel/{expressions => types}/CollationIdentifier.java (63%) rename kernel/kernel-api/src/test/scala/io/delta/kernel/{expressions => types}/CollationIdentifierSuite.scala (96%) diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CollationIdentifier.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/CollationIdentifier.java similarity index 63% rename from kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CollationIdentifier.java rename to kernel/kernel-api/src/main/java/io/delta/kernel/types/CollationIdentifier.java index 51eea38d31..b8d3234f24 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CollationIdentifier.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/CollationIdentifier.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.delta.kernel.expressions; +package io.delta.kernel.types; import io.delta.kernel.annotation.Evolving; @@ -22,15 +22,42 @@ import static io.delta.kernel.internal.util.Preconditions.checkArgument; +/** + * Identifies collation for string type. + * + * Collation identifiers + */ @Evolving public class CollationIdentifier { + /** + * Spark collation provider. + * It provides two collations: {@code UTF8_BINARY} and {@code UTF8_LCASE}. + *
+ * {@code UTF8_BINARY} is default string collation. Comparing strings using this collation + * is comparing the binary values of their UTF-8 encoded forms. + *
+ * The {@code UTF8_LCASE} collation performs case-insensitive comparisons of UTF-8 encoded strings. + */ public static final String PROVIDER_SPARK = "SPARK"; + /** + * ICU collation provider. + * ICU library collations + */ public static final String PROVIDER_ICU = "ICU"; + /** + * ICU library supported version. + */ public static final String ICU_COLLATOR_VERSION = "75.1"; + /** + * Default collation name. + */ public static final String DEFAULT_COLLATION_NAME = "UTF8_BINARY"; + /** + * Default collation identifier. + */ public static final CollationIdentifier DEFAULT_COLLATION_IDENTIFIER = new CollationIdentifier(PROVIDER_SPARK, DEFAULT_COLLATION_NAME); @@ -39,42 +66,53 @@ public class CollationIdentifier { private final Optional version; public CollationIdentifier(String provider, String collationName) { + Objects.requireNonNull(provider, "Collation provider cannot be null."); + Objects.requireNonNull(collationName, "Collation name cannot be null."); + this.provider = provider.toUpperCase(); this.name = collationName.toUpperCase(); this.version = Optional.empty(); } - public CollationIdentifier(String provider, String collationName, Optional version) { + public CollationIdentifier(String provider, String collationName, String version) { Objects.requireNonNull(provider, "Collation provider cannot be null."); Objects.requireNonNull(collationName, "Collation name cannot be null."); Objects.requireNonNull(version, "Provider version cannot be null."); this.provider = provider.toUpperCase(); this.name = collationName.toUpperCase(); - if (version.isPresent()) { - this.version = Optional.of(version.get().toUpperCase()); - } else { - this.version = Optional.empty(); - } - } - - public String toStringWithoutVersion() { - return String.format("%s.%s", provider, name); + this.version = Optional.of(version); } + /** + * + * @return collation provider. + */ public String getProvider() { return provider; } + /** + * + * @return collation name. + */ public String getName() { return name; } - // Returns Optional.empty() + /** + * + * @return provider version. + */ public Optional getVersion() { return version; } + /** + * + * @param identifier collation identifier in string form of
{@code PROVIDER.COLLATION_NAME[.PROVIDER_VERSION]}. + * @return appropriate collation identifier object + */ public static CollationIdentifier fromString(String identifier) { long numDots = identifier.chars().filter(ch -> ch == '.').count(); checkArgument(numDots > 0, String.format("Invalid collation identifier: %s", identifier)); @@ -83,10 +121,13 @@ public static CollationIdentifier fromString(String identifier) { return new CollationIdentifier(parts[0], parts[1]); } else { String[] parts = identifier.split("\\.", 3); - return new CollationIdentifier(parts[0], parts[1], Optional.of(parts[2])); + return new CollationIdentifier(parts[0], parts[1], parts[2]); } } + /** + * Collation identifiers are identical when the provider, name, and version are the same. + */ @Override public boolean equals(Object o) { if (!(o instanceof CollationIdentifier)) { @@ -99,6 +140,18 @@ public boolean equals(Object o) { && this.version.equals(other.version); } + /** + * + * @return collation identifier in form of {@code PROVIDER.COLLATION_NAME}. + */ + public String toStringWithoutVersion() { + return String.format("%s.%s", provider, name); + } + + /** + * + * @return collation identifier in form of {@code PROVIDER.COLLATION_NAME[.PROVIDER_VERSION]} + */ @Override public String toString() { if (version.isPresent()) { diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java index bad4cf0fb6..75001a2ef3 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java @@ -16,7 +16,6 @@ package io.delta.kernel.types; import io.delta.kernel.annotation.Evolving; -import io.delta.kernel.expressions.CollationIdentifier; /** * The data type representing {@code string} type values. diff --git a/kernel/kernel-api/src/test/scala/io/delta/kernel/expressions/CollationIdentifierSuite.scala b/kernel/kernel-api/src/test/scala/io/delta/kernel/types/CollationIdentifierSuite.scala similarity index 96% rename from kernel/kernel-api/src/test/scala/io/delta/kernel/expressions/CollationIdentifierSuite.scala rename to kernel/kernel-api/src/test/scala/io/delta/kernel/types/CollationIdentifierSuite.scala index 69d2197b23..72fad3ed71 100644 --- a/kernel/kernel-api/src/test/scala/io/delta/kernel/expressions/CollationIdentifierSuite.scala +++ b/kernel/kernel-api/src/test/scala/io/delta/kernel/types/CollationIdentifierSuite.scala @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.delta.kernel.expressions +package io.delta.kernel.types +import io.delta.kernel.types.CollationIdentifier._ import org.scalatest.funsuite.AnyFunSuite -import io.delta.kernel.expressions.CollationIdentifier._ import java.util.Optional