Skip to content

Commit

Permalink
suggestions applied
Browse files Browse the repository at this point in the history
  • Loading branch information
ilicmarkodb committed Sep 9, 2024
1 parent 20a1081 commit 6469ba1
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.delta.kernel.expressions;
package io.delta.kernel.types;

import io.delta.kernel.annotation.Evolving;

Expand All @@ -22,15 +22,42 @@

import static io.delta.kernel.internal.util.Preconditions.checkArgument;

/**
* Identifies collation for string type.
* <a href="https://github.com/delta-io/delta/blob/master/protocol_rfcs/collated-string-type.md#collation-identifiers">
* Collation identifiers</a>
*/
@Evolving
public class CollationIdentifier {
/**
* Spark collation provider.
* It provides two collations: {@code UTF8_BINARY} and {@code UTF8_LCASE}.
* <br>
* {@code UTF8_BINARY} is default string collation. Comparing strings using this collation
* is comparing the binary values of their UTF-8 encoded forms.
* <br>
* The {@code UTF8_LCASE} collation performs case-insensitive comparisons of UTF-8 encoded strings.
*/
public static final String PROVIDER_SPARK = "SPARK";
/**
* ICU collation provider.
* <a href="https://unicode-org.github.io/icu/userguide/collation/">ICU library collations</a>
*/
public static final String PROVIDER_ICU = "ICU";

/**
* ICU library supported version.
*/
public static final String ICU_COLLATOR_VERSION = "75.1";

/**
* Default collation name.
*/
public static final String DEFAULT_COLLATION_NAME = "UTF8_BINARY";

/**
* Default collation identifier.
*/
public static final CollationIdentifier DEFAULT_COLLATION_IDENTIFIER =
new CollationIdentifier(PROVIDER_SPARK, DEFAULT_COLLATION_NAME);

Expand All @@ -39,42 +66,53 @@ public class CollationIdentifier {
private final Optional<String> version;

public CollationIdentifier(String provider, String collationName) {
Objects.requireNonNull(provider, "Collation provider cannot be null.");
Objects.requireNonNull(collationName, "Collation name cannot be null.");

this.provider = provider.toUpperCase();
this.name = collationName.toUpperCase();
this.version = Optional.empty();
}

public CollationIdentifier(String provider, String collationName, Optional<String> version) {
public CollationIdentifier(String provider, String collationName, String version) {
Objects.requireNonNull(provider, "Collation provider cannot be null.");
Objects.requireNonNull(collationName, "Collation name cannot be null.");
Objects.requireNonNull(version, "Provider version cannot be null.");

this.provider = provider.toUpperCase();
this.name = collationName.toUpperCase();
if (version.isPresent()) {
this.version = Optional.of(version.get().toUpperCase());
} else {
this.version = Optional.empty();
}
}

public String toStringWithoutVersion() {
return String.format("%s.%s", provider, name);
this.version = Optional.of(version);
}

/**
*
* @return collation provider.
*/
public String getProvider() {
return provider;
}

/**
*
* @return collation name.
*/
public String getName() {
return name;
}

// Returns Optional.empty()
/**
*
* @return provider version.
*/
public Optional<String> getVersion() {
return version;
}

/**
*
* @param identifier collation identifier in string form of <br>{@code PROVIDER.COLLATION_NAME[.PROVIDER_VERSION]}.
* @return appropriate collation identifier object
*/
public static CollationIdentifier fromString(String identifier) {
long numDots = identifier.chars().filter(ch -> ch == '.').count();
checkArgument(numDots > 0, String.format("Invalid collation identifier: %s", identifier));
Expand All @@ -83,10 +121,13 @@ public static CollationIdentifier fromString(String identifier) {
return new CollationIdentifier(parts[0], parts[1]);
} else {
String[] parts = identifier.split("\\.", 3);
return new CollationIdentifier(parts[0], parts[1], Optional.of(parts[2]));
return new CollationIdentifier(parts[0], parts[1], parts[2]);
}
}

/**
* Collation identifiers are identical when the provider, name, and version are the same.
*/
@Override
public boolean equals(Object o) {
if (!(o instanceof CollationIdentifier)) {
Expand All @@ -99,6 +140,18 @@ public boolean equals(Object o) {
&& this.version.equals(other.version);
}

/**
*
* @return collation identifier in form of {@code PROVIDER.COLLATION_NAME}.
*/
public String toStringWithoutVersion() {
return String.format("%s.%s", provider, name);
}

/**
*
* @return collation identifier in form of {@code PROVIDER.COLLATION_NAME[.PROVIDER_VERSION]}
*/
@Override
public String toString() {
if (version.isPresent()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package io.delta.kernel.types;

import io.delta.kernel.annotation.Evolving;
import io.delta.kernel.expressions.CollationIdentifier;

/**
* The data type representing {@code string} type values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.delta.kernel.expressions
package io.delta.kernel.types

import io.delta.kernel.types.CollationIdentifier._
import org.scalatest.funsuite.AnyFunSuite
import io.delta.kernel.expressions.CollationIdentifier._

import java.util.Optional

Expand Down

0 comments on commit 6469ba1

Please sign in to comment.