From c047d70ed59342b1521cd4bf1ba44ad2e2c4985b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pa=C5=82ka?= Date: Thu, 24 Oct 2024 00:41:33 +0200 Subject: [PATCH] Make actual data dypes disjunct --- src/main/ColumnOp.scala | 307 +++++++++++----------------- src/main/DataFrameBuilders.scala | 2 +- src/main/FrameSchema.scala | 2 +- src/main/JoinOnCondition.scala | 4 +- src/main/UntypedOps.scala | 4 +- src/main/When.scala | 8 +- src/main/Where.scala | 4 +- src/main/api/api.scala | 57 ++++-- src/main/functions/aggregates.scala | 10 +- src/main/types/Coerce.scala | 21 +- src/main/types/DataType.scala | 191 ++++++++++------- src/main/types/Encoder.scala | 70 +++---- src/test/CoerceTest.scala | 26 ++- src/test/JoinTest.scala | 48 ++--- src/test/WhenTest.scala | 6 +- src/test/example/Books.scala | 2 +- 16 files changed, 380 insertions(+), 382 deletions(-) diff --git a/src/main/ColumnOp.scala b/src/main/ColumnOp.scala index 9032d72..5eaa8dc 100644 --- a/src/main/ColumnOp.scala +++ b/src/main/ColumnOp.scala @@ -8,210 +8,131 @@ import org.virtuslab.iskra.UntypedOps.typed import org.virtuslab.iskra.types.* import DataType.* +trait ColumnOp: + type Out <: DataType + object ColumnOp: - trait Plus[T1 <: DataType, T2 <: DataType]: - type Out <: DataType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped + col2.untyped).typed[Out] + trait ResultType[T <: DataType] extends ColumnOp: + override type Out = T + + abstract class BinaryColumnOp[T1 <: DataType, T2 <: DataType](untypedOp: (UntypedColumn, UntypedColumn) => UntypedColumn) extends ColumnOp: + def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = untypedOp(col1.untyped, col2.untyped).typed[Out] + + class Plus[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ + _) object Plus: - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Plus[T1, T2] with - type Out = DataType.CommonNumericNonNullableType[T1, T2] - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Plus[T1, T2] with - type Out = DataType.CommonNumericNullableType[T1, T2] - - trait Minus[T1 <: DataType, T2 <: DataType]: - type Out <: DataType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped - col2.untyped).typed[Out] + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Plus[T1, T2] { type Out = CommonNumericType[T1, T2] }) = + new Plus[T1, T2] with ResultType[CommonNumericType[T1, T2]] + + class Minus[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ - _) object Minus: - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Minus[T1, T2] with - type Out = DataType.CommonNumericNonNullableType[T1, T2] - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Minus[T1, T2] with - type Out = DataType.CommonNumericNullableType[T1, T2] - - trait Mult[T1 <: DataType, T2 <: DataType]: - type Out <: DataType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped * col2.untyped).typed[Out] + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Minus[T1, T2] { type Out = CommonNumericType[T1, T2] }) = + new Minus[T1, T2] with ResultType[CommonNumericType[T1, T2]] + + class Mult[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ * _) object Mult: - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Mult[T1, T2] with - type Out = DataType.CommonNumericNonNullableType[T1, T2] - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Mult[T1, T2] with - type Out = DataType.CommonNumericNullableType[T1, T2] - - trait Div[T1 <: DataType, T2 <: DataType]: - type Out <: DataType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped / col2.untyped).typed[Out] + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Mult[T1, T2] { type Out = CommonNumericType[T1, T2] }) = + new Mult[T1, T2] with ResultType[CommonNumericType[T1, T2]] + + class Div[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ / _) object Div: - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Div[T1, T2] with - type Out = DoubleType - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Div[T1, T2] with - type Out = DoubleOptType - - trait PlusPlus[T1 <: DataType, T2 <: DataType]: - type Out <: DataType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = concat(col1.untyped, col2.untyped).typed[Out] + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Div[T1, T2] { type Out = DoubleOfCommonNullability[T1, T2] }) = + new Div[T1, T2] with ResultType[DoubleOfCommonNullability[T1, T2]] + + class PlusPlus[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](concat(_, _)) object PlusPlus: - given stringNonNullable: PlusPlus[StringType, StringType] with - type Out = StringType - given stringNullable[T1 <: StringOptType, T2 <: StringOptType]: PlusPlus[T1, T2] with - type Out = StringOptType - - trait Eq[T1 <: DataType, T2 <: DataType]: - type Out <: BooleanOptType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped === col2.untyped).typed[Out] + given strings[T1 <: StringOptLike, T2 <: StringOptLike]: (PlusPlus[T1, T2] { type Out = StringOfCommonNullability[T1, T2] }) = + new PlusPlus[T1, T2] with ResultType[StringOfCommonNullability[T1, T2]] + + class Eq[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ === _) object Eq: - given booleanNonNullable: Eq[BooleanType, BooleanType] with - type Out = BooleanType - given booleanNullable[T1 <: BooleanOptType, T2 <: BooleanOptType]: Eq[T1, T2] with - type Out = BooleanOptType - - given stringNonNullable: Eq[StringType, StringType] with - type Out = BooleanType - given stringNullable[T1 <: StringOptType, T2 <: StringOptType]: Eq[T1, T2] with - type Out = BooleanOptType - - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Eq[T1, T2] with - type Out = BooleanType - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Eq[T1, T2] with - type Out = BooleanOptType - - given structNonNullable[S1 <: Tuple, S2 <: Tuple]: Eq[StructType[S1], StructType[S2]] with - type Out = BooleanType - given structNullable[S1 <: Tuple, T1 <: StructOptType[S1], S2 <: Tuple, T2 <: StructOptType[S2]]: Eq[T1, T2] with - type Out = BooleanOptType - - trait Ne[-T1 <: DataType, -T2 <: DataType]: - type Out <: BooleanOptType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped =!= col2.untyped).typed[Out] + given booleans[T1 <: BooleanOptLike, T2 <: BooleanOptLike]: (Eq[T1, T2] { type Out = CommonBooleanType[T1, T2] }) = + new Eq[T1, T2] with ResultType[CommonBooleanType[T1, T2]] + + given strings[T1 <: StringOptLike, T2 <: StringOptLike]: (Eq[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Eq[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Eq[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Eq[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given structs[S1 <: Tuple, S2 <: Tuple, T1 <: StructOptLike[S1], T2 <: StructOptLike[S2]]: (Eq[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Eq[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + class Ne[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ =!= _) object Ne: - given booleanNonNullable: Ne[BooleanType, BooleanType] with - type Out = BooleanType - given booleanNullable[T1 <: BooleanOptType, T2 <: BooleanOptType]: Ne[T1, T2] with - type Out = BooleanOptType - - given stringNonNullable: Ne[StringType, StringType] with - type Out = BooleanType - given stringNullable[T1 <: StringOptType, T2 <: StringOptType]: Ne[T1, T2] with - type Out = BooleanOptType - - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Ne[T1, T2] with - type Out = BooleanType - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Ne[T1, T2] with - type Out = BooleanOptType - - given structNonNullable[S1 <: Tuple, S2 <: Tuple]: Ne[StructType[S1], StructType[S2]] with - type Out = BooleanType - given structNullable[S1 <: Tuple, T1 <: StructOptType[S1], S2 <: Tuple, T2 <: StructOptType[S2]]: Ne[T1, T2] with - type Out = BooleanOptType - - trait Lt[-T1 <: DataType, -T2 <: DataType]: - type Out <: BooleanOptType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped < col2.untyped).typed[Out] + given booleans[T1 <: BooleanOptLike, T2 <: BooleanOptLike]: (Ne[T1, T2] { type Out = CommonBooleanType[T1, T2] }) = + new Ne[T1, T2] with ResultType[CommonBooleanType[T1, T2]] + + given strings[T1 <: StringOptLike, T2 <: StringOptLike]: (Ne[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Ne[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Ne[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Ne[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given structs[S1 <: Tuple, S2 <: Tuple, T1 <: StructOptLike[S1], T2 <: StructOptLike[S2]]: (Ne[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Ne[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + class Lt[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ < _) object Lt: - given booleanNonNullable: Lt[BooleanType, BooleanType] with - type Out = BooleanType - given booleanNullable[T1 <: BooleanOptType, T2 <: BooleanOptType]: Lt[T1, T2] with - type Out = BooleanOptType - - given stringNonNullable: Lt[StringType, StringType] with - type Out = BooleanType - given stringNullable[T1 <: StringOptType, T2 <: StringOptType]: Lt[T1, T2] with - type Out = BooleanOptType - - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Lt[T1, T2] with - type Out = BooleanType - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Lt[T1, T2] with - type Out = BooleanOptType - - given structNonNullable[S1 <: Tuple, S2 <: Tuple]: Lt[StructType[S1], StructType[S2]] with - type Out = BooleanType - given structNullable[S1 <: Tuple, T1 <: StructOptType[S1], S2 <: Tuple, T2 <: StructOptType[S2]]: Lt[T1, T2] with - type Out = BooleanOptType - - trait Le[-T1 <: DataType, -T2 <: DataType]: - type Out <: BooleanOptType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped <= col2.untyped).typed[Out] + given booleans[T1 <: BooleanOptLike, T2 <: BooleanOptLike]: (Lt[T1, T2] { type Out = CommonBooleanType[T1, T2] }) = + new Lt[T1, T2] with ResultType[CommonBooleanType[T1, T2]] + + given strings[T1 <: StringOptLike, T2 <: StringOptLike]: (Lt[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Lt[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Lt[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Lt[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given structs[S1 <: Tuple, S2 <: Tuple, T1 <: StructOptLike[S1], T2 <: StructOptLike[S2]]: (Lt[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Lt[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + class Le[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ <= _) object Le: - given booleanNonNullable: Le[BooleanType, BooleanType] with - type Out = BooleanType - given booleanNullable[T1 <: BooleanOptType, T2 <: BooleanOptType]: Le[T1, T2] with - type Out = BooleanOptType - - given stringNonNullable: Le[StringType, StringType] with - type Out = BooleanType - given stringNullable[T1 <: StringOptType, T2 <: StringOptType]: Le[T1, T2] with - type Out = BooleanOptType - - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Le[T1, T2] with - type Out = BooleanType - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Le[T1, T2] with - type Out = BooleanOptType - - given structNonNullable[S1 <: Tuple, S2 <: Tuple]: Le[StructType[S1], StructType[S2]] with - type Out = BooleanType - given structNullable[S1 <: Tuple, T1 <: StructOptType[S1], S2 <: Tuple, T2 <: StructOptType[S2]]: Le[T1, T2] with - type Out = BooleanOptType - - trait Gt[-T1 <: DataType, -T2 <: DataType]: - type Out <: BooleanOptType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped > col2.untyped).typed[Out] + given booleans[T1 <: BooleanOptLike, T2 <: BooleanOptLike]: (Le[T1, T2] { type Out = CommonBooleanType[T1, T2] }) = + new Le[T1, T2] with ResultType[CommonBooleanType[T1, T2]] + + given strings[T1 <: StringOptLike, T2 <: StringOptLike]: (Le[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Le[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Le[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Le[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given structs[S1 <: Tuple, S2 <: Tuple, T1 <: StructOptLike[S1], T2 <: StructOptLike[S2]]: (Le[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Le[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + class Gt[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ > _) object Gt: - given booleanNonNullable: Gt[BooleanType, BooleanType] with - type Out = BooleanType - given booleanNullable[T1 <: BooleanOptType, T2 <: BooleanOptType]: Gt[T1, T2] with - type Out = BooleanOptType - - given stringNonNullable: Gt[StringType, StringType] with - type Out = BooleanType - given stringNullable[T1 <: StringOptType, T2 <: StringOptType]: Gt[T1, T2] with - type Out = BooleanOptType - - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Gt[T1, T2] with - type Out = BooleanType - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Gt[T1, T2] with - type Out = BooleanOptType - - given structNonNullable[S1 <: Tuple, S2 <: Tuple]: Gt[StructType[S1], StructType[S2]] with - type Out = BooleanType - given structNullable[S1 <: Tuple, T1 <: StructOptType[S1], S2 <: Tuple, T2 <: StructOptType[S2]]: Gt[T1, T2] with - type Out = BooleanOptType - - trait Ge[-T1 <: DataType, -T2 <: DataType]: - type Out <: BooleanOptType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped >= col2.untyped).typed[Out] + given booleans[T1 <: BooleanOptLike, T2 <: BooleanOptLike]: (Gt[T1, T2] { type Out = CommonBooleanType[T1, T2] }) = + new Gt[T1, T2] with ResultType[CommonBooleanType[T1, T2]] + + given strings[T1 <: StringOptLike, T2 <: StringOptLike]: (Gt[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Gt[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Gt[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Gt[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given structs[S1 <: Tuple, S2 <: Tuple, T1 <: StructOptLike[S1], T2 <: StructOptLike[S2]]: (Gt[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Gt[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + class Ge[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ >= _) object Ge: - given booleanNonNullable: Ge[BooleanType, BooleanType] with - type Out = BooleanType - given booleanNullable[T1 <: BooleanOptType, T2 <: BooleanOptType]: Ge[T1, T2] with - type Out = BooleanOptType - - given stringNonNullable: Ge[StringType, StringType] with - type Out = BooleanType - given stringNullable[T1 <: StringOptType, T2 <: StringOptType]: Ge[T1, T2] with - type Out = BooleanOptType - - given numericNonNullable[T1 <: NumericType, T2 <: NumericType]: Ge[T1, T2] with - type Out = BooleanType - given numericNullable[T1 <: NumericOptType, T2 <: NumericOptType]: Ge[T1, T2] with - type Out = BooleanOptType - - given structNonNullable[S1 <: Tuple, S2 <: Tuple]: Ge[StructType[S1], StructType[S2]] with - type Out = BooleanType - given structNullable[S1 <: Tuple, T1 <: StructOptType[S1], S2 <: Tuple, T2 <: StructOptType[S2]]: Ge[T1, T2] with - type Out = BooleanOptType - - trait And[-T1 <: DataType, -T2 <: DataType]: - type Out <: BooleanOptType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped && col2.untyped).typed[Out] + given booleans[T1 <: BooleanOptLike, T2 <: BooleanOptLike]: (Ge[T1, T2] { type Out = CommonBooleanType[T1, T2] }) = + new Ge[T1, T2] with ResultType[CommonBooleanType[T1, T2]] + + given strings[T1 <: StringOptLike, T2 <: StringOptLike]: (Ge[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Ge[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given numerics[T1 <: DoubleOptLike, T2 <: DoubleOptLike]: (Ge[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Ge[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + given structs[S1 <: Tuple, S2 <: Tuple, T1 <: StructOptLike[S1], T2 <: StructOptLike[S2]]: (Ge[T1, T2] { type Out = BooleanOfCommonNullability[T1, T2] }) = + new Ge[T1, T2] with ResultType[BooleanOfCommonNullability[T1, T2]] + + class And[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ && _) object And: - given booleanNonNullable: And[BooleanType, BooleanType] with - type Out = BooleanType - given booleanNullable[T1 <: BooleanOptType, T2 <: BooleanOptType]: And[T1, T2] with - type Out = BooleanOptType - - trait Or[-T1 <: DataType, -T2 <: DataType]: - type Out <: BooleanOptType - def apply(col1: Col[T1], col2: Col[T2]): Col[Out] = (col1.untyped || col2.untyped).typed[Out] + given booleans[T1 <: BooleanOptLike, T2 <: BooleanOptLike]: (And[T1, T2] { type Out = CommonBooleanType[T1, T2] }) = + new And[T1, T2] with ResultType[CommonBooleanType[T1, T2]] + + class Or[T1 <: DataType, T2 <: DataType] extends BinaryColumnOp[T1, T2](_ || _) object Or: - given booleanNonNullable: Or[BooleanType, BooleanType] with - type Out = BooleanType - given booleanNullable[T1 <: BooleanOptType, T2 <: BooleanOptType]: Or[T1, T2] with - type Out = BooleanOptType + given booleans[T1 <: BooleanOptLike, T2 <: BooleanOptLike]: (Or[T1, T2] { type Out = CommonBooleanType[T1, T2] }) = + new Or[T1, T2] with ResultType[CommonBooleanType[T1, T2]] diff --git a/src/main/DataFrameBuilders.scala b/src/main/DataFrameBuilders.scala index fa13af8..b02329b 100644 --- a/src/main/DataFrameBuilders.scala +++ b/src/main/DataFrameBuilders.scala @@ -4,7 +4,7 @@ import scala.quoted._ import org.apache.spark.sql import org.apache.spark.sql.SparkSession import org.virtuslab.iskra.DataFrame -import org.virtuslab.iskra.types.{DataType, StructType, Encoder, StructEncoder, PrimitiveEncoder} +import org.virtuslab.iskra.types.{DataType, Encoder, StructEncoder, PrimitiveEncoder} object DataFrameBuilders: extension [A](seq: Seq[A])(using encoder: Encoder[A]) diff --git a/src/main/FrameSchema.scala b/src/main/FrameSchema.scala index 706ee81..0ea4fdd 100644 --- a/src/main/FrameSchema.scala +++ b/src/main/FrameSchema.scala @@ -25,7 +25,7 @@ object FrameSchema: (S1, S2) type NullableLabeledDataType[T] = T match - case label := tpe => label := DataType.Nullable[tpe] + case label := tpe => label := DataType.AsNullable[tpe] type NullableSchema[T] = T match case Tuple => Tuple.Map[T, NullableLabeledDataType] diff --git a/src/main/JoinOnCondition.scala b/src/main/JoinOnCondition.scala index fff4a16..89b7913 100644 --- a/src/main/JoinOnCondition.scala +++ b/src/main/JoinOnCondition.scala @@ -3,7 +3,7 @@ package org.virtuslab.iskra import scala.language.implicitConversions import scala.quoted.* -import org.virtuslab.iskra.types.BooleanOptType +import org.virtuslab.iskra.types.BooleanOptLike trait OnConditionJoiner[Join <: JoinType, Left, Right] @@ -73,7 +73,7 @@ object JoinOnCondition: import quotes.reflect.* '{ ${ condition }(using ${ joiningView }) } match - case '{ $cond: Col[BooleanOptType] } => + case '{ $cond: Col[BooleanOptLike] } => '{ val joined = ${ join }.left.join(${ join }.right, ${ cond }.untyped, JoinType.typeName[T]) StructDataFrame[JoinedSchema](joined) diff --git a/src/main/UntypedOps.scala b/src/main/UntypedOps.scala index 3876744..d16dc55 100644 --- a/src/main/UntypedOps.scala +++ b/src/main/UntypedOps.scala @@ -1,7 +1,7 @@ package org.virtuslab.iskra import scala.quoted.* -import types.{DataType, Encoder, StructType, StructEncoder} +import types.{DataType, Encoder, struct, StructEncoder, StructNotNull} object UntypedOps: extension (untyped: UntypedColumn) @@ -12,5 +12,5 @@ object UntypedOps: private def typedDataFrameImpl[A : Type](df: Expr[UntypedDataFrame], encoder: Expr[StructEncoder[A]])(using Quotes) = encoder match - case '{ ${e}: Encoder.Aux[tpe, StructType[t]] } => + case '{ ${e}: Encoder.Aux[tpe, StructNotNull[t]] } => '{ ClassDataFrame[A](${ df }) } diff --git a/src/main/When.scala b/src/main/When.scala index d61fc56..c3dc9ec 100644 --- a/src/main/When.scala +++ b/src/main/When.scala @@ -1,14 +1,14 @@ package org.virtuslab.iskra import org.apache.spark.sql.{functions => f, Column => UntypedColumn} -import org.virtuslab.iskra.types.{Coerce, DataType, BooleanOptType} +import org.virtuslab.iskra.types.{Coerce, DataType, BooleanOptLike} object When: - class WhenColumn[T <: DataType](untyped: UntypedColumn) extends Col[DataType.Nullable[T]](untyped): - def when[U <: DataType](condition: Col[BooleanOptType], value: Col[U])(using coerce: Coerce[T, U]): WhenColumn[coerce.Coerced] = + class WhenColumn[T <: DataType](untyped: UntypedColumn) extends Col[DataType.AsNullable[T]](untyped): + def when[U <: DataType](condition: Col[BooleanOptLike], value: Col[U])(using coerce: Coerce[T, U]): WhenColumn[coerce.Coerced] = WhenColumn(this.untyped.when(condition.untyped, value.untyped)) def otherwise[U <: DataType](value: Col[U])(using coerce: Coerce[T, U]): Col[coerce.Coerced] = Col(this.untyped.otherwise(value.untyped)) - def when[T <: DataType](condition: Col[BooleanOptType], value: Col[T]): WhenColumn[T] = + def when[T <: DataType](condition: Col[BooleanOptLike], value: Col[T]): WhenColumn[T] = WhenColumn(f.when(condition.untyped, value.untyped)) diff --git a/src/main/Where.scala b/src/main/Where.scala index 87bd579..bdb6a52 100644 --- a/src/main/Where.scala +++ b/src/main/Where.scala @@ -1,7 +1,7 @@ package org.virtuslab.iskra import scala.quoted.* -import org.virtuslab.iskra.types.BooleanOptType +import org.virtuslab.iskra.types.BooleanOptLike trait Where[Schema, View <: SchemaView]: val view: View @@ -36,7 +36,7 @@ object Where: import quotes.reflect.* '{ ${ condition }(using ${ where }.view) } match - case '{ $cond: Col[BooleanOptType] } => + case '{ $cond: Col[BooleanOptLike] } => '{ val filtered = ${ where }.underlying.where(${ cond }.untyped) StructDataFrame[Schema](filtered) diff --git a/src/main/api/api.scala b/src/main/api/api.scala index e2a9c1b..70069f1 100644 --- a/src/main/api/api.scala +++ b/src/main/api/api.scala @@ -3,29 +3,46 @@ package api export DataFrameBuilders.toDF export types.{ - DataType, - BooleanType, - BooleanOptType, - StringType, - StringOptType, - ByteType, - ByteOptType, - ShortType, - ShortOptType, - IntegerType, - IntegerOptType, - LongType, - LongOptType, - FloatType, - FloatOptType, - DoubleType, - DoubleOptType, - StructType, - StructOptType + boolean, + boolean_?, + BooleanNotNull, + BooleanOrNull, + string, + string_?, + StringNotNull, + StringOrNull, + byte, + byte_?, + ByteNotNull, + ByteOrNull, + short, + short_?, + ShortNotNull, + ShortOrNull, + int, + int_?, + IntNotNull, + IntOrNull, + long, + long_?, + LongNotNull, + LongOrNull, + float, + float_?, + FloatNotNull, + FloatOrNull, + double, + double_?, + DoubleNotNull, + DoubleOrNull, + struct, + struct_?, + StructNotNull, + StructOrNull } export UntypedOps.typed export org.virtuslab.iskra.$ -export org.virtuslab.iskra.{Column, Columns, DataFrame, ClassDataFrame, NamedColumns, StructDataFrame, UntypedColumn, UntypedDataFrame, :=, /} +export org.virtuslab.iskra.{Column, Columns, Col, DataFrame, ClassDataFrame, NamedColumns, StructDataFrame, UntypedColumn, UntypedDataFrame, :=, /} object functions: export org.virtuslab.iskra.functions.{lit, when} diff --git a/src/main/functions/aggregates.scala b/src/main/functions/aggregates.scala index ff8de96..4267339 100644 --- a/src/main/functions/aggregates.scala +++ b/src/main/functions/aggregates.scala @@ -5,22 +5,22 @@ import org.virtuslab.iskra.Agg import org.virtuslab.iskra.Col import org.virtuslab.iskra.UntypedOps.typed import org.virtuslab.iskra.types.* -import org.virtuslab.iskra.types.DataType.{NumericOptType, Nullable} +import org.virtuslab.iskra.types.DataType.AsNullable class Sum[A <: Agg](val agg: A): - def apply[T <: NumericOptType](column: agg.View ?=> Col[T]): Col[Nullable[T]] = + def apply[T <: DoubleOptLike](column: agg.View ?=> Col[T]): Col[AsNullable[T]] = sql.functions.sum(column(using agg.view).untyped).typed class Max[A <: Agg](val agg: A): - def apply[T <: NumericOptType](column: agg.View ?=> Col[T]): Col[Nullable[T]] = + def apply[T <: DoubleOptLike](column: agg.View ?=> Col[T]): Col[AsNullable[T]] = sql.functions.max(column(using agg.view).untyped).typed class Min[A <: Agg](val agg: A): - def apply[T <: NumericOptType](column: agg.View ?=> Col[T]): Col[Nullable[T]] = + def apply[T <: DoubleOptLike](column: agg.View ?=> Col[T]): Col[AsNullable[T]] = sql.functions.min(column(using agg.view).untyped).typed class Avg[A <: Agg](val agg: A): - def apply(column: agg.View ?=> Col[NumericOptType]): Col[DoubleOptType] = + def apply(column: agg.View ?=> Col[DoubleOptLike]): Col[DoubleOrNull] = sql.functions.avg(column(using agg.view).untyped).typed object Aggregates: diff --git a/src/main/types/Coerce.scala b/src/main/types/Coerce.scala index 5c61468..e332a50 100644 --- a/src/main/types/Coerce.scala +++ b/src/main/types/Coerce.scala @@ -1,17 +1,20 @@ package org.virtuslab.iskra package types -import DataType.{CommonNumericNonNullableType, CommonNumericNullableType, NumericOptType, NumericType} - -trait Coerce[-A <: DataType, -B <: DataType]: +trait Coerce[A <: DataType, B <: DataType]: type Coerced <: DataType -object Coerce: - given sameType[A <: DataType]: Coerce[A, A] with +object Coerce extends CoerceLowPrio: + given sameType[A <: FinalDataType]: Coerce[A, A] with + override type Coerced = A + + given nullableFirst[A <: FinalDataType & Nullable, B <: FinalDataType & NonNullable](using A <:< NullableOf[B]): Coerce[A, B] with override type Coerced = A - given nullable[A <: NumericOptType, B <: NumericOptType]: Coerce[A, B] with - override type Coerced = CommonNumericNullableType[A, B] + given nullableSecond[A <: FinalDataType & NonNullable, B <: FinalDataType & Nullable](using A <:< NonNullableOf[B]): Coerce[A, B] with + override type Coerced = B - given nonNullable[A <: NumericType, B <: NumericType]: Coerce[A, B] with - override type Coerced = CommonNumericNonNullableType[A, B] +trait CoerceLowPrio: + given numeric[A <: FinalDataType & DoubleOptLike, B <: FinalDataType & DoubleOptLike]: (Coerce[A, B] { type Coerced = CommonNumericType[A, B] }) = + new Coerce[A, B]: + override type Coerced = CommonNumericType[A, B] diff --git a/src/main/types/DataType.scala b/src/main/types/DataType.scala index 55f0d63..ebf1ef9 100644 --- a/src/main/types/DataType.scala +++ b/src/main/types/DataType.scala @@ -1,79 +1,126 @@ package org.virtuslab.iskra package types -sealed trait DataType +sealed trait Nullability + +sealed trait Nullable extends Nullability +sealed trait NonNullable extends Nullability + +trait NullableOf[T <: DataType & NonNullable] extends Nullable +trait NonNullableOf[T <: DataType & Nullable] extends NonNullable + + +trait DataType + +abstract class FinalDataType extends DataType { + self: Nullability => +} object DataType: type Subtype[T <: DataType] = T - sealed trait NotNull extends DataType - - type NumericType = ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType - type NumericOptType = ByteOptType | ShortOptType | IntegerOptType | LongOptType | FloatOptType | DoubleOptType - - type Nullable[T <: DataType] <: DataType = T match - case BooleanOptType => BooleanOptType - case StringOptType => StringOptType - case ByteOptType => ByteOptType - case ShortOptType => ShortOptType - case IntegerOptType => IntegerOptType - case LongOptType => LongOptType - case FloatOptType => FloatOptType - case DoubleOptType => DoubleOptType - case StructOptType[schema] => StructOptType[schema] - - type NonNullable[T <: DataType] <: DataType = T match - case BooleanOptType => BooleanType - case StringOptType => StringType - case ByteOptType => ByteType - case ShortOptType => ShortOptType - case IntegerOptType => IntegerOptType - case LongOptType => LongOptType - case FloatOptType => FloatOptType - case DoubleOptType => DoubleOptType - case StructOptType[schema] => StructOptType[schema] - - type CommonNumericNullableType[T1 <: DataType, T2 <: DataType] <: NumericOptType = (T1, T2) match - case (DoubleOptType, DataType) | (DataType, DoubleOptType) => DoubleOptType - case (FloatOptType, DataType) | (DataType, FloatOptType) => FloatOptType - case (LongOptType, DataType) | (DataType, LongOptType) => LongOptType - case (IntegerOptType, DataType) | (DataType, IntegerOptType) => IntegerOptType - case (ShortOptType, DataType) | (DataType, ShortOptType) => ShortOptType - case (ByteOptType, DataType) | (DataType, ByteOptType) => ByteOptType - - type CommonNumericNonNullableType[T1 <: DataType, T2 <: DataType] <: NumericOptType = (T1, T2) match - case (DoubleOptType, DataType) | (DataType, DoubleOptType) => DoubleType - case (FloatOptType, DataType) | (DataType, FloatOptType) => FloatType - case (LongOptType, DataType) | (DataType, LongOptType) => LongType - case (IntegerOptType, DataType) | (DataType, IntegerOptType) => IntegerType - case (ShortOptType, DataType) | (DataType, ShortOptType) => ShortType - case (ByteOptType, DataType) | (DataType, ByteOptType) => ByteType - -import DataType.NotNull - -sealed class BooleanOptType extends DataType -final class BooleanType extends BooleanOptType, NotNull - -sealed class StringOptType extends DataType -final class StringType extends StringOptType, NotNull - -sealed class ByteOptType extends DataType -final class ByteType extends ByteOptType, NotNull - -sealed class ShortOptType extends DataType -final class ShortType extends ShortOptType, NotNull - -sealed class IntegerOptType extends DataType -final class IntegerType extends IntegerOptType, NotNull - -sealed class LongOptType extends DataType -final class LongType extends LongOptType, NotNull - -sealed class FloatOptType extends DataType -final class FloatType extends FloatOptType, NotNull - -class DoubleOptType extends DataType -final class DoubleType extends DoubleOptType, NotNull - -sealed class StructOptType[Schema <: Tuple] extends DataType -final class StructType[Schema <: Tuple] extends StructOptType[Schema], NotNull + type AsNullable[T <: DataType] <: DataType = T match + case NonNullableOf[t] => t + case Nullable => T + + +trait BooleanOptLike extends DataType +trait BooleanLike extends BooleanOptLike +final class boolean_? extends FinalDataType, NullableOf[boolean], BooleanOptLike +final class boolean extends FinalDataType, NonNullableOf[boolean_?], BooleanLike +type BooleanOrNull = boolean_? +type BooleanNotNull = boolean + +trait StringOptLike extends DataType +trait StringLike extends StringOptLike +final class string_? extends FinalDataType, NullableOf[string], StringOptLike +final class string extends FinalDataType, NonNullableOf[string_?], StringLike +type StringOrNull = string_? +type StringNotNull = string + +trait DoubleOptLike extends DataType +trait DoubleLike extends DoubleOptLike +final class double_? extends FinalDataType, NullableOf[double], DoubleOptLike +final class double extends FinalDataType, NonNullableOf[double_?], DoubleLike +type DoubleOrNull = double_? +type DoubleNotNull = double + +trait FloatOptLike extends DoubleOptLike +trait FloatLike extends FloatOptLike, DoubleLike +final class float_? extends FinalDataType, NullableOf[float], FloatOptLike +final class float extends FinalDataType, NonNullableOf[float_?], FloatLike +type FloatOrNull = float_? +type FloatNotNull = float + +trait LongOptLike extends FloatOptLike +trait LongLike extends LongOptLike, FloatLike +final class long_? extends FinalDataType, NullableOf[long], LongOptLike +final class long extends FinalDataType, NonNullableOf[long_?], LongLike +type LongOrNull = long_? +type LongNotNull = long + +trait IntOptLike extends LongOptLike +trait IntLike extends IntOptLike, LongLike +final class int_? extends FinalDataType, NullableOf[int], IntOptLike +final class int extends FinalDataType, NonNullableOf[int_?], IntLike +type IntOrNull = int_? +type IntNotNull = int + +trait ShortOptLike extends IntOptLike +trait ShortLike extends ShortOptLike, IntLike +final class short_? extends FinalDataType, NullableOf[short], ShortOptLike +final class short extends FinalDataType, NonNullableOf[short_?], ShortLike +type ShortOrNull = short_? +type ShortNotNull = short + +trait ByteOptLike extends ShortOptLike +trait ByteLike extends ByteOptLike, ShortLike +final class byte_? extends FinalDataType, NullableOf[byte], ByteOptLike +final class byte extends FinalDataType, NonNullableOf[byte_?], ByteLike +type ByteOrNull = byte_? +type ByteNotNull = byte + +trait StructOptLike[Schema <: Tuple] extends DataType +trait StructLike[Schema <: Tuple] extends StructOptLike[Schema] +final class struct_?[Schema <: Tuple] extends FinalDataType, NullableOf[struct[Schema]], StructOptLike[Schema] +final class struct[Schema <: Tuple] extends FinalDataType, NonNullableOf[struct_?[Schema]], StructLike[Schema] +type StructOrNull[Schema <: Tuple] = struct_?[Schema] +type StructNotNull[Schema <: Tuple] = struct[Schema] + +type CommonNumericType[T1 <: DataType, T2 <: DataType] <: DataType = (T1, T2) match + case (ByteLike, ByteLike) => byte + case (ByteOptLike, ByteOptLike) => byte_? + case (ShortLike, ShortLike) => short + case (ShortOptLike, ShortOptLike) => short_? + case (IntLike, IntLike) => int + case (IntOptLike, IntOptLike) => int_? + case (LongLike, LongLike) => long + case (LongOptLike, LongOptLike) => long_? + case (FloatLike, FloatLike) => float + case (FloatOptLike, FloatOptLike) => float_? + case (DoubleLike, DoubleLike) => double + case (DoubleOptLike, DoubleOptLike) => double_? + +type CommonBooleanType[T1 <: DataType, T2 <: DataType] <: DataType = (T1, T2) match + case (BooleanLike, BooleanLike) => BooleanNotNull + case (BooleanOptLike, BooleanOptLike) => BooleanOrNull + +type CommonNullability[T1 <: Nullability, T2 <: Nullability] <: Nullability = (T1, T2) match + case (NonNullable, NonNullable) => NonNullable + case _ => Nullable + +type BooleanOfNullability[N <: Nullability] <: DataType = N match + case NonNullable => BooleanNotNull + case Nullable => BooleanOrNull + +type BooleanOfCommonNullability[T1, T2] <: DataType = (T1, T2) match + case (NonNullable, NonNullable) => BooleanNotNull + case (Nullability, Nullability) => BooleanOrNull + +type DoubleOfCommonNullability[T1 <: DoubleOptLike, T2 <: DoubleOptLike] <: DataType = (T1, T2) match + case (DoubleLike, DoubleLike) => DoubleNotNull + case (DoubleOptLike, DoubleOptLike) => DoubleOrNull + +type StringOfCommonNullability[T1 <: StringOptLike, T2 <: StringOptLike] <: DataType = (T1, T2) match + case (StringLike, StringLike) => StringNotNull + case (StringOptLike, StringOptLike) => StringOrNull diff --git a/src/main/types/Encoder.scala b/src/main/types/Encoder.scala index 48f188d..88b48fd 100644 --- a/src/main/types/Encoder.scala +++ b/src/main/types/Encoder.scala @@ -30,67 +30,67 @@ trait PrimitiveNonNullableEncoder[-A] extends PrimitiveEncoder[A]: object Encoder: type Aux[-A, E <: DataType] = Encoder[A] { type ColumnType = E } - inline given boolean: PrimitiveNonNullableEncoder[Boolean] with - type ColumnType = BooleanType + inline given booleanEncoder: PrimitiveNonNullableEncoder[Boolean] with + type ColumnType = BooleanNotNull def catalystType = sql.types.BooleanType - inline given booleanOpt: PrimitiveNullableEncoder[Boolean] with - type ColumnType = BooleanOptType + inline given booleanOptEncoder: PrimitiveNullableEncoder[Boolean] with + type ColumnType = BooleanOrNull def catalystType = sql.types.BooleanType - inline given string: PrimitiveNonNullableEncoder[String] with - type ColumnType = StringType + inline given stringEncoder: PrimitiveNonNullableEncoder[String] with + type ColumnType = StringNotNull def catalystType = sql.types.StringType - inline given stringOpt: PrimitiveNullableEncoder[String] with - type ColumnType = StringOptType + inline given stringOptEncoder: PrimitiveNullableEncoder[String] with + type ColumnType = StringOrNull def catalystType = sql.types.StringType - inline given byte: PrimitiveNonNullableEncoder[Byte] with - type ColumnType = ByteType + inline given byteEncoder: PrimitiveNonNullableEncoder[Byte] with + type ColumnType = ByteNotNull def catalystType = sql.types.ByteType - inline given byteOpt: PrimitiveNullableEncoder[Byte] with - type ColumnType = ByteOptType + inline given byteOptEncoder: PrimitiveNullableEncoder[Byte] with + type ColumnType = ByteOrNull def catalystType = sql.types.ByteType - inline given short: PrimitiveNonNullableEncoder[Short] with - type ColumnType = ShortType + inline given shortEncoder: PrimitiveNonNullableEncoder[Short] with + type ColumnType = ShortNotNull def catalystType = sql.types.ShortType - inline given shortOpt: PrimitiveNullableEncoder[Short] with - type ColumnType = ShortOptType + inline given shortOptEncoder: PrimitiveNullableEncoder[Short] with + type ColumnType = ShortOrNull def catalystType = sql.types.ShortType - inline given int: PrimitiveNonNullableEncoder[Int] with - type ColumnType = IntegerType + inline given intEncoder: PrimitiveNonNullableEncoder[Int] with + type ColumnType = IntNotNull def catalystType = sql.types.IntegerType - inline given intOpt: PrimitiveNullableEncoder[Int] with - type ColumnType = IntegerOptType + inline given intOptEncoder: PrimitiveNullableEncoder[Int] with + type ColumnType = IntOrNull def catalystType = sql.types.IntegerType - inline given long: PrimitiveNonNullableEncoder[Long] with - type ColumnType = LongType + inline given longEncoder: PrimitiveNonNullableEncoder[Long] with + type ColumnType = LongNotNull def catalystType = sql.types.LongType - inline given longOpt: PrimitiveNullableEncoder[Long] with - type ColumnType = LongOptType + inline given longOptEncoder: PrimitiveNullableEncoder[Long] with + type ColumnType = LongOrNull def catalystType = sql.types.LongType - inline given float: PrimitiveNonNullableEncoder[Float] with - type ColumnType = FloatType + inline given floatEncoder: PrimitiveNonNullableEncoder[Float] with + type ColumnType = FloatNotNull def catalystType = sql.types.FloatType - inline given floatOpt: PrimitiveNullableEncoder[Float] with - type ColumnType = FloatOptType + inline given floatOptEncoder: PrimitiveNullableEncoder[Float] with + type ColumnType = FloatOrNull def catalystType = sql.types.FloatType - inline given double: PrimitiveNonNullableEncoder[Double] with - type ColumnType = DoubleType + inline given doubleEncoder: PrimitiveNonNullableEncoder[Double] with + type ColumnType = DoubleNotNull def catalystType = sql.types.DoubleType - inline given doubleOpt: PrimitiveNullableEncoder[Double] with - type ColumnType = DoubleOptType + inline given doubleOptEncoder: PrimitiveNullableEncoder[Double] with + type ColumnType = DoubleOrNull def catalystType = sql.types.DoubleType export StructEncoder.{fromMirror, optFromMirror} trait StructEncoder[-A] extends Encoder[A]: type StructSchema <: Tuple - type ColumnType = StructType[StructSchema] + type ColumnType = StructNotNull[StructSchema] override def catalystType: sql.types.StructType override def encode(a: A): sql.Row @@ -172,9 +172,9 @@ object StructEncoder: } end fromMirrorImpl - given optFromMirror[A](using encoder: StructEncoder[A]): (Encoder[Option[A]] { type ColumnType = StructOptType[encoder.StructSchema] }) = + given optFromMirror[A](using encoder: StructEncoder[A]): (Encoder[Option[A]] { type ColumnType = StructOrNull[encoder.StructSchema] }) = new Encoder[Option[A]]: - override type ColumnType = StructOptType[encoder.StructSchema] + override type ColumnType = StructOrNull[encoder.StructSchema] override def encode(value: Option[A]): Any = value.map(encoder.encode).orNull override def decode(value: Any): Any = Option(encoder.decode) override def catalystType = encoder.catalystType diff --git a/src/test/CoerceTest.scala b/src/test/CoerceTest.scala index cf3ca04..18da5e6 100644 --- a/src/test/CoerceTest.scala +++ b/src/test/CoerceTest.scala @@ -6,21 +6,31 @@ import types.* class CoerceTest extends AnyFunSuite: test("coerce-int-double") { - val c = summon[Coerce[IntegerType, DoubleType]] - summon[c.Coerced =:= DoubleType] + val c = summon[Coerce[IntNotNull, DoubleNotNull]] + summon[c.Coerced =:= DoubleNotNull] } test("coerce-short-short-opt") { - val c = summon[Coerce[ShortType, ShortOptType]] - summon[c.Coerced =:= ShortOptType] + val c = summon[Coerce[ShortNotNull, ShortOrNull]] + summon[c.Coerced =:= ShortOrNull] } test("coerce-long-byte-opt") { - val c = summon[Coerce[LongType, ByteOptType]] - summon[c.Coerced =:= LongOptType] + val c = summon[Coerce[LongNotNull, ByteOrNull]] + summon[c.Coerced =:= LongOrNull] } test("coerce-string-string-opt") { - val c = summon[Coerce[StringType, StringOptType]] - summon[c.Coerced =:= StringOptType] + val c = summon[Coerce[StringNotNull, StringOrNull]] + summon[c.Coerced =:= StringOrNull] + } + + test("coerce-string-opt-string") { + val c = summon[Coerce[StringOrNull, StringNotNull]] + summon[c.Coerced =:= StringOrNull] + } + + test("coerce-string-opt-string-opt") { + val c = summon[Coerce[StringOrNull, StringOrNull]] + summon[c.Coerced =:= StringOrNull] } diff --git a/src/test/JoinTest.scala b/src/test/JoinTest.scala index 40f1e8c..ef20665 100644 --- a/src/test/JoinTest.scala +++ b/src/test/JoinTest.scala @@ -23,10 +23,10 @@ class JoinTest extends SparkUnitTest: val joined = foos.join(bars).on($.foos.int === $.bars.int) val typedJoined: StructDataFrame[( - "foos" / "int" := IntegerType, - "foos" / "long" := LongType, - "bars" / "int" := IntegerType, - "bars" / "string" := StringType + "foos" / "int" := IntNotNull, + "foos" / "long" := LongNotNull, + "bars" / "int" := IntNotNull, + "bars" / "string" := StringNotNull )] = joined val result = joined.select( @@ -45,10 +45,10 @@ class JoinTest extends SparkUnitTest: val joined = foos.leftJoin(bars).on($.foos.int === $.bars.int) val typedJoined: StructDataFrame[( - "foos" / "int" := IntegerType, - "foos" / "long" := LongType, - "bars" / "int" := IntegerOptType, - "bars" / "string" := StringOptType + "foos" / "int" := IntNotNull, + "foos" / "long" := LongNotNull, + "bars" / "int" := IntOrNull, + "bars" / "string" := StringOrNull )] = joined val result = joined.select( @@ -68,10 +68,10 @@ class JoinTest extends SparkUnitTest: val joined = foos.rightJoin(bars).on($.foos.int === $.bars.int) val typedJoined: StructDataFrame[( - "foos" / "int" := IntegerOptType, - "foos" / "long" := LongOptType, - "bars" / "int" := IntegerType, - "bars" / "string" := StringType + "foos" / "int" := IntOrNull, + "foos" / "long" := LongOrNull, + "bars" / "int" := IntNotNull, + "bars" / "string" := StringNotNull )] = joined val result = joined.select( @@ -91,10 +91,10 @@ class JoinTest extends SparkUnitTest: val joined = foos.fullJoin(bars).on($.foos.int === $.bars.int) val typedJoined: StructDataFrame[( - "foos" / "int" := IntegerOptType, - "foos" / "long" := LongOptType, - "bars" / "int" := IntegerOptType, - "bars" / "string" := StringOptType + "foos" / "int" := IntOrNull, + "foos" / "long" := LongOrNull, + "bars" / "int" := IntOrNull, + "bars" / "string" := StringOrNull )] = joined val result = joined.select( @@ -115,8 +115,8 @@ class JoinTest extends SparkUnitTest: val joined = foos.semiJoin(bars).on($.foos.int === $.bars.int) val typedJoined: StructDataFrame[( - "foos" / "int" := IntegerType, - "foos" / "long" := LongType + "foos" / "int" := IntNotNull, + "foos" / "long" := LongNotNull )] = joined val result = joined.select( @@ -133,8 +133,8 @@ class JoinTest extends SparkUnitTest: val joined = foos.antiJoin(bars).on($.foos.int === $.bars.int) val typedJoined: StructDataFrame[( - "foos" / "int" := IntegerType, - "foos" / "long" := LongType + "foos" / "int" := IntNotNull, + "foos" / "long" := LongNotNull )] = joined val result = joined.select( @@ -151,10 +151,10 @@ class JoinTest extends SparkUnitTest: val joined = foos.crossJoin(bars) val typedJoined: StructDataFrame[( - "foos" / "int" := IntegerType, - "foos" / "long" := LongType, - "bars" / "int" := IntegerType, - "bars" / "string" := StringType + "foos" / "int" := IntNotNull, + "foos" / "long" := LongNotNull, + "bars" / "int" := IntNotNull, + "bars" / "string" := StringNotNull )] = joined val result = joined.select( diff --git a/src/test/WhenTest.scala b/src/test/WhenTest.scala index 9b14833..50fcf48 100644 --- a/src/test/WhenTest.scala +++ b/src/test/WhenTest.scala @@ -51,9 +51,9 @@ class WhenTest extends SparkUnitTest: when($.int === lit(1), lit(10)) .when($.int === lit(2), lit(100L)) .otherwise(lit(1000d)) - .as("str") + .as("double") } - .asClass[Option[Double]].collect().toList + .asClass[Double].collect().toList - result shouldEqual Seq(Some(10d), Some(100d), Some(1000d)) + result shouldEqual Seq(10d, 100d, 1000d) } diff --git a/src/test/example/Books.scala b/src/test/example/Books.scala index 108a8e9..95039ad 100644 --- a/src/test/example/Books.scala +++ b/src/test/example/Books.scala @@ -22,7 +22,7 @@ import org.virtuslab.iskra.api.* import org.apache.spark.sql.functions.lower val authorlessBooks = books.select( - lower($.title.untyped).typed[StringType].as("title"), + lower($.title.untyped).typed[StringNotNull].as("title"), $.publicationYear ) authorlessBooks.show()