diff --git a/docs/layouts/shortcodes/generated/execution_config_configuration.html b/docs/layouts/shortcodes/generated/execution_config_configuration.html index 0eccee0eb4003..128a05351bf71 100644 --- a/docs/layouts/shortcodes/generated/execution_config_configuration.html +++ b/docs/layouts/shortcodes/generated/execution_config_configuration.html @@ -244,6 +244,12 @@

Enum

In order to remap state to operators during a restore, it is required that the pipeline's streaming transformations get a UID assigned.
The planner can generate and assign explicit UIDs. If no UIDs have been set by the planner, the UIDs will be auto-generated by lower layers that can take the complete topology into account for uniqueness of the IDs. See the DataStream API for more information.
This configuration option is for experts only and the default should be sufficient for most use cases. By default, only pipelines created from a persisted compiled plan will get UIDs assigned explicitly. Thus, these pipelines can be arbitrarily moved around within the same topology without affecting the stable UIDs.

Possible values: + +
table.exec.unbounded-over.version

Streaming + 2 + Integer + Which version of the unbounded over aggregation to use: 1 - legacy version 2 - version with improved performance +
table.exec.window-agg.buffer-size-limit

Batch 100000 diff --git a/docs/layouts/shortcodes/generated/optimizer_config_configuration.html b/docs/layouts/shortcodes/generated/optimizer_config_configuration.html index 652242a17713d..2127bb6992e5f 100644 --- a/docs/layouts/shortcodes/generated/optimizer_config_configuration.html +++ b/docs/layouts/shortcodes/generated/optimizer_config_configuration.html @@ -8,24 +8,6 @@ - -
table.optimizer.skewed-join-optimization.strategy

Batch - auto -

Enum

- Flink will handle skew in shuffled joins (sort-merge and hash) at runtime by splitting data according to the skewed join key. The value of this configuration determines how Flink performs this optimization. AUTO means Flink will automatically apply this optimization, FORCED means Flink will enforce this optimization even if it introduces extra hash shuffle, and NONE means this optimization will not be executed.

Possible values: - - -
table.optimizer.skewed-join-optimization.skewed-factor

Batch - 4.0 -

Double

- When a join operator instance encounters input data that exceeds N times the median size of other concurrent join operator instances, it is considered skewed (where N represents this skewed-factor). In such cases, Flink may automatically split the skewed data into multiple parts to ensure a more balanced data distribution, unless the data volume is below the skewed threshold(defined using table.optimizer.skewed-join-optimization.skewed-threshold). - - -
table.optimizer.skewed-join-optimization.skewed-threshold

Batch - 256 mb -

MemorySize

- When a join operator instance encounters input data that exceeds N times the median size of other concurrent join operator instances, it is considered skewed (where N represents the table.optimizer.skewed-join-optimization.skewed-factor). In such cases, Flink may automatically split the skewed data into multiple parts to ensure a more balanced data distribution, unless the data volume is below this skewed threshold. -
table.optimizer.adaptive-broadcast-join.strategy

Batch auto @@ -137,6 +119,24 @@ MemorySize Min data volume threshold of the runtime filter probe side. Estimated data volume needs to be over this value to try to inject runtime filter.This value should be larger than table.optimizer.runtime-filter.max-build-data-size. + +
table.optimizer.skewed-join-optimization.skewed-factor

Batch + 4.0 + Double + When a join operator instance encounters input data that exceeds N times the median size of other concurrent join operator instances, it is considered skewed (where N represents this skewed-factor). In such cases, Flink may automatically split the skewed data into multiple parts to ensure a more balanced data distribution, unless the data volume is below the skewed threshold(defined using table.optimizer.skewed-join-optimization.skewed-threshold). + + +
table.optimizer.skewed-join-optimization.skewed-threshold

Batch + 256 mb + MemorySize + When a join operator instance encounters input data that exceeds N times the median size of other concurrent join operator instances, it is considered skewed (where N represents the table.optimizer.skewed-join-optimization.skewed-factor). In such cases, Flink may automatically split the skewed data into multiple parts to ensure a more balanced data distribution, unless the data volume is below this skewed threshold. + + +
table.optimizer.skewed-join-optimization.strategy

Batch + auto +

Enum

+ Flink will handle skew in shuffled joins (sort-merge and hash) at runtime by splitting data according to the skewed join key. The value of this configuration determines how Flink performs this optimization. AUTO means Flink will automatically apply this optimization, FORCED means Flink will enforce this optimization even if it introduces extra hash shuffle, and NONE means this optimization will not be executed.

Possible values: +
table.optimizer.source.report-statistics-enabled

Batch Streaming true diff --git a/docs/layouts/shortcodes/generated/python_configuration.html b/docs/layouts/shortcodes/generated/python_configuration.html index 257b3365b054e..e5c188d8d5fd6 100644 --- a/docs/layouts/shortcodes/generated/python_configuration.html +++ b/docs/layouts/shortcodes/generated/python_configuration.html @@ -24,7 +24,7 @@
python.executable
"python" String - Specify the path of the python interpreter used to execute the python UDF worker. The python UDF worker depends on Python 3.8+, Apache Beam (version >= 2.54.0, <= 2.61.0), Pip (version >= 20.3) and SetupTools (version >= 37.0.0). Please ensure that the specified environment meets the above requirements. The option is equivalent to the command line option "-pyexec". + Specify the path of the python interpreter used to execute the python UDF worker. The python UDF worker depends on Python 3.8+, Apache Beam (version >= 2.54.0, <= 2.61.0), Pip (version >= 20.3) and SetupTools (version >= 37.0.0). Please ensure that the specified environment meets the above requirements. The option is equivalent to the command line option "-pyexec".
python.execution-mode
diff --git a/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/config/ExecutionConfigOptions.java b/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/config/ExecutionConfigOptions.java index 97fe37b8d5c45..99bb67af5d2b6 100644 --- a/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/config/ExecutionConfigOptions.java +++ b/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/config/ExecutionConfigOptions.java @@ -554,6 +554,16 @@ public class ExecutionConfigOptions { + "all changes to downstream just like when the mini-batch is " + "not enabled."); + @Documentation.TableOption(execMode = Documentation.ExecMode.STREAMING) + public static final ConfigOption UNBOUNDED_OVER_VERSION = + ConfigOptions.key("table.exec.unbounded-over.version") + .intType() + .defaultValue(2) + .withDescription( + "Which version of the unbounded over aggregation to use: " + + " 1 - legacy version" + + " 2 - version with improved performance"); + @Documentation.TableOption(execMode = Documentation.ExecMode.STREAMING) public static final ConfigOption TABLE_EXEC_UID_GENERATION = key("table.exec.uid.generation") diff --git a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/stream/StreamExecOverAggregate.java b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/stream/StreamExecOverAggregate.java index 1ec65c852e370..e672e7e1ac65e 100644 --- a/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/stream/StreamExecOverAggregate.java +++ b/flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/stream/StreamExecOverAggregate.java @@ -25,6 +25,7 @@ import org.apache.flink.streaming.api.operators.KeyedProcessOperator; import org.apache.flink.streaming.api.transformations.OneInputTransformation; import org.apache.flink.table.api.TableException; +import org.apache.flink.table.api.config.ExecutionConfigOptions; import org.apache.flink.table.data.RowData; import org.apache.flink.table.planner.calcite.FlinkTypeFactory; import org.apache.flink.table.planner.codegen.CodeGeneratorContext; @@ -48,6 +49,7 @@ import org.apache.flink.table.planner.utils.TableConfigUtils; import org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction; import org.apache.flink.table.runtime.keyselector.RowDataKeySelector; +import org.apache.flink.table.runtime.operators.over.AbstractRowTimeUnboundedPrecedingOver; import org.apache.flink.table.runtime.operators.over.ProcTimeRangeBoundedPrecedingFunction; import org.apache.flink.table.runtime.operators.over.ProcTimeRowsBoundedPrecedingFunction; import org.apache.flink.table.runtime.operators.over.ProcTimeUnboundedPrecedingFunction; @@ -55,6 +57,7 @@ import org.apache.flink.table.runtime.operators.over.RowTimeRangeUnboundedPrecedingFunction; import org.apache.flink.table.runtime.operators.over.RowTimeRowsBoundedPrecedingFunction; import org.apache.flink.table.runtime.operators.over.RowTimeRowsUnboundedPrecedingFunction; +import org.apache.flink.table.runtime.operators.over.RowTimeUnboundedPrecedingOverFunctionV2; import org.apache.flink.table.runtime.types.LogicalTypeDataTypeConverter; import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; import org.apache.flink.table.runtime.util.StateConfigUtil; @@ -70,6 +73,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; + import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; @@ -98,6 +103,11 @@ public class StreamExecOverAggregate extends ExecNodeBase public static final String FIELD_NAME_OVER_SPEC = "overSpec"; + public static final String FIELD_NAME_UNBOUNDED_OVER_VERSION = "unboundedOverVersion"; + + @JsonProperty(FIELD_NAME_UNBOUNDED_OVER_VERSION) + private final int unboundedOverVersion; + @JsonProperty(FIELD_NAME_OVER_SPEC) private final OverSpec overSpec; @@ -114,7 +124,8 @@ public StreamExecOverAggregate( overSpec, Collections.singletonList(inputProperty), outputType, - description); + description, + tableConfig.get(ExecutionConfigOptions.UNBOUNDED_OVER_VERSION)); } @JsonCreator @@ -125,10 +136,17 @@ public StreamExecOverAggregate( @JsonProperty(FIELD_NAME_OVER_SPEC) OverSpec overSpec, @JsonProperty(FIELD_NAME_INPUT_PROPERTIES) List inputProperties, @JsonProperty(FIELD_NAME_OUTPUT_TYPE) RowType outputType, - @JsonProperty(FIELD_NAME_DESCRIPTION) String description) { + @JsonProperty(FIELD_NAME_DESCRIPTION) String description, + @Nullable @JsonProperty(FIELD_NAME_UNBOUNDED_OVER_VERSION) + Integer unboundedOverVersion) { super(id, context, persistedConfig, inputProperties, outputType, description); checkArgument(inputProperties.size() == 1); this.overSpec = checkNotNull(overSpec); + + if (unboundedOverVersion == null) { + unboundedOverVersion = 1; + } + this.unboundedOverVersion = unboundedOverVersion; } @SuppressWarnings("unchecked") @@ -316,24 +334,42 @@ private KeyedProcessFunction createUnboundedOverProce .toArray(LogicalType[]::new); if (rowTimeIdx >= 0) { - if (isRowsClause) { - // ROWS unbounded over process function - return new RowTimeRowsUnboundedPrecedingFunction<>( - config.getStateRetentionTime(), - TableConfigUtils.getMaxIdleStateRetentionTime(config), - genAggsHandler, - flattenAccTypes, - fieldTypes, - rowTimeIdx); - } else { - // RANGE unbounded over process function - return new RowTimeRangeUnboundedPrecedingFunction<>( - config.getStateRetentionTime(), - TableConfigUtils.getMaxIdleStateRetentionTime(config), - genAggsHandler, - flattenAccTypes, - fieldTypes, - rowTimeIdx); + switch (unboundedOverVersion) { + // Currently there is no migration path between first and second versions. + case AbstractRowTimeUnboundedPrecedingOver.FIRST_OVER_VERSION: + if (isRowsClause) { + // ROWS unbounded over process function + return new RowTimeRowsUnboundedPrecedingFunction<>( + config.getStateRetentionTime(), + TableConfigUtils.getMaxIdleStateRetentionTime(config), + genAggsHandler, + flattenAccTypes, + fieldTypes, + rowTimeIdx); + } else { + // RANGE unbounded over process function + return new RowTimeRangeUnboundedPrecedingFunction<>( + config.getStateRetentionTime(), + TableConfigUtils.getMaxIdleStateRetentionTime(config), + genAggsHandler, + flattenAccTypes, + fieldTypes, + rowTimeIdx); + } + case RowTimeUnboundedPrecedingOverFunctionV2.SECOND_OVER_VERSION: + return new RowTimeUnboundedPrecedingOverFunctionV2<>( + isRowsClause, + config.getStateRetentionTime(), + TableConfigUtils.getMaxIdleStateRetentionTime(config), + genAggsHandler, + flattenAccTypes, + fieldTypes, + rowTimeIdx); + default: + throw new UnsupportedOperationException( + "Unsupported unbounded over version: " + + unboundedOverVersion + + ". Valid versions are 1 and 2."); } } else { return new ProcTimeUnboundedPrecedingFunction<>( diff --git a/flink-table/flink-table-planner/src/test/resources/restore-tests/batch-exec-rank_1/rank-n-test/plan/rank-n-test.json b/flink-table/flink-table-planner/src/test/resources/restore-tests/batch-exec-rank_1/rank-n-test/plan/rank-n-test.json index 4b17b869aacfb..9669601713f30 100644 --- a/flink-table/flink-table-planner/src/test/resources/restore-tests/batch-exec-rank_1/rank-n-test/plan/rank-n-test.json +++ b/flink-table/flink-table-planner/src/test/resources/restore-tests/batch-exec-rank_1/rank-n-test/plan/rank-n-test.json @@ -302,7 +302,8 @@ "fieldType" : "BIGINT NOT NULL" } ] }, - "description" : "OverAggregate(partitionBy=[a], orderBy=[t ASC], window#0=[ROW_NUMBER(*) AS w0$o0 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], select=[a, b, t, w0$o0])" + "description" : "OverAggregate(partitionBy=[a], orderBy=[t ASC], window#0=[ROW_NUMBER(*) AS w0$o0 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], select=[a, b, t, w0$o0])", + "unboundedOverVersion" : 1 }, { "id" : 6, "type" : "batch-exec-calc_1", @@ -438,4 +439,4 @@ }, "shuffleMode" : "PIPELINED" } ] -} \ No newline at end of file +} diff --git a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-preceding-rows-with-out-of-order-records/plan/over-aggregate-bounded-partitioned-preceding-rows-with-out-of-order-records.json b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-preceding-rows-with-out-of-order-records/plan/over-aggregate-bounded-partitioned-preceding-rows-with-out-of-order-records.json index a45cccffde105..fae889bd9a2b9 100644 --- a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-preceding-rows-with-out-of-order-records/plan/over-aggregate-bounded-partitioned-preceding-rows-with-out-of-order-records.json +++ b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-preceding-rows-with-out-of-order-records/plan/over-aggregate-bounded-partitioned-preceding-rows-with-out-of-order-records.json @@ -357,7 +357,8 @@ "fieldType" : "BIGINT NOT NULL" } ] }, - "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])" + "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])", + "unboundedOverVersion" : 1 }, { "id" : 31, "type" : "stream-exec-calc_1", @@ -588,4 +589,4 @@ }, "shuffleMode" : "PIPELINED" } ] -} \ No newline at end of file +} diff --git a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-preceding-rows/plan/over-aggregate-bounded-partitioned-preceding-rows.json b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-preceding-rows/plan/over-aggregate-bounded-partitioned-preceding-rows.json index 3bb2a9273e3ad..8dc7e8578489b 100644 --- a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-preceding-rows/plan/over-aggregate-bounded-partitioned-preceding-rows.json +++ b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-preceding-rows/plan/over-aggregate-bounded-partitioned-preceding-rows.json @@ -357,7 +357,8 @@ "fieldType" : "BIGINT NOT NULL" } ] }, - "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])" + "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])", + "unboundedOverVersion" : 1 }, { "id" : 7, "type" : "stream-exec-calc_1", @@ -588,4 +589,4 @@ }, "shuffleMode" : "PIPELINED" } ] -} \ No newline at end of file +} diff --git a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-rows-with-out-of-order-records/plan/over-aggregate-bounded-partitioned-rows-with-out-of-order-records.json b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-rows-with-out-of-order-records/plan/over-aggregate-bounded-partitioned-rows-with-out-of-order-records.json index 3092a157ad776..8aad761921315 100644 --- a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-rows-with-out-of-order-records/plan/over-aggregate-bounded-partitioned-rows-with-out-of-order-records.json +++ b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-rows-with-out-of-order-records/plan/over-aggregate-bounded-partitioned-rows-with-out-of-order-records.json @@ -357,7 +357,8 @@ "fieldType" : "BIGINT NOT NULL" } ] }, - "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ RANG BETWEEN 10000 PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])" + "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ RANG BETWEEN 10000 PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])", + "unboundedOverVersion" : 1 }, { "id" : 7, "type" : "stream-exec-calc_1", @@ -588,4 +589,4 @@ }, "shuffleMode" : "PIPELINED" } ] -} \ No newline at end of file +} diff --git a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-rows/plan/over-aggregate-bounded-partitioned-rows.json b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-rows/plan/over-aggregate-bounded-partitioned-rows.json index 0e325c4b52b39..fa7b5b7697075 100644 --- a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-rows/plan/over-aggregate-bounded-partitioned-rows.json +++ b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-bounded-partitioned-rows/plan/over-aggregate-bounded-partitioned-rows.json @@ -357,7 +357,8 @@ "fieldType" : "BIGINT NOT NULL" } ] }, - "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ RANG BETWEEN 10000 PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])" + "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ RANG BETWEEN 10000 PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])", + "unboundedOverVersion" : 1 }, { "id" : 7, "type" : "stream-exec-calc_1", @@ -588,4 +589,4 @@ }, "shuffleMode" : "PIPELINED" } ] -} \ No newline at end of file +} diff --git a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-unbounded-partitioned-rows/plan/over-aggregate-unbounded-partitioned-rows.json b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-unbounded-partitioned-rows/plan/over-aggregate-unbounded-partitioned-rows.json index cd93435394734..6efa3a92257cf 100644 --- a/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-unbounded-partitioned-rows/plan/over-aggregate-unbounded-partitioned-rows.json +++ b/flink-table/flink-table-planner/src/test/resources/restore-tests/stream-exec-over-aggregate_1/over-aggregate-unbounded-partitioned-rows/plan/over-aggregate-unbounded-partitioned-rows.json @@ -347,7 +347,8 @@ "fieldType" : "BIGINT NOT NULL" } ] }, - "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ RANG BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])" + "description" : "OverAggregate(partitionBy=[c], orderBy=[rowtime ASC], window=[ RANG BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], select=[ts, a, b, c, rowtime, $5, LTCNT(a, $5) AS w0$o0, COUNT(a) AS w0$o1, $SUM0(a) AS w0$o2])", + "unboundedOverVersion" : 1 }, { "id" : 7, "type" : "stream-exec-calc_1", @@ -578,4 +579,4 @@ }, "shuffleMode" : "PIPELINED" } ] -} \ No newline at end of file +} diff --git a/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/OverAggregateITCase.scala b/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/OverAggregateITCase.scala index eef53e61c8579..82c58da9a8795 100644 --- a/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/OverAggregateITCase.scala +++ b/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/OverAggregateITCase.scala @@ -21,14 +21,15 @@ import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.java.typeutils.RowTypeInfo import org.apache.flink.table.api._ import org.apache.flink.table.api.bridge.scala._ +import org.apache.flink.table.api.config.ExecutionConfigOptions import org.apache.flink.table.planner.factories.TestValuesTableFactory import org.apache.flink.table.planner.runtime.utils.{StreamingEnvUtil, StreamingWithStateTestBase, TestData, TestingAppendSink} import org.apache.flink.table.planner.runtime.utils.BatchTestBase.row -import org.apache.flink.table.planner.runtime.utils.StreamingWithStateTestBase.StateBackendMode +import org.apache.flink.table.planner.runtime.utils.StreamingWithStateTestBase.{HEAP_BACKEND, ROCKSDB_BACKEND, StateBackendMode} import org.apache.flink.table.planner.runtime.utils.TimeTestUtil.EventTimeProcessOperator import org.apache.flink.table.planner.runtime.utils.UserDefinedFunctionTestUtils.{CountNullNonNull, CountPairs, LargerThanCount} import org.apache.flink.table.runtime.typeutils.BigDecimalTypeInfo -import org.apache.flink.testutils.junit.extensions.parameterized.ParameterizedTestExtension +import org.apache.flink.testutils.junit.extensions.parameterized.{ParameterizedTestExtension, Parameters} import org.apache.flink.types.Row import org.assertj.core.api.Assertions.{assertThat, assertThatThrownBy} @@ -37,11 +38,13 @@ import org.junit.jupiter.api.{BeforeEach, TestTemplate} import org.junit.jupiter.api.extension.ExtendWith import java.time.{Instant, LocalDateTime} +import java.util import scala.collection.{mutable, Seq} @ExtendWith(Array(classOf[ParameterizedTestExtension])) -class OverAggregateITCase(mode: StateBackendMode) extends StreamingWithStateTestBase(mode) { +class OverAggregateITCase(mode: StateBackendMode, unboundedOverVersion: Int) + extends StreamingWithStateTestBase(mode) { val data = List( (1L, 1, "Hello"), @@ -60,6 +63,7 @@ class OverAggregateITCase(mode: StateBackendMode) extends StreamingWithStateTest // unaligned checkpoints are regenerating watermarks after recovery of in-flight data // https://issues.apache.org/jira/browse/FLINK-18405 env.getCheckpointConfig.enableUnalignedCheckpoints(false) + tEnv.getConfig.set[Integer](ExecutionConfigOptions.UNBOUNDED_OVER_VERSION, unboundedOverVersion) } @TestTemplate @@ -1496,3 +1500,16 @@ class OverAggregateITCase(mode: StateBackendMode) extends StreamingWithStateTest } } } + +object OverAggregateITCase { + @Parameters(name = "StateBackend={0}, unboundedOverVersion2={1}") + def parameters(): util.Collection[Array[Any]] = { + scala.collection.JavaConverters.seqAsJavaList( + Seq[Array[Any]]( + Array(HEAP_BACKEND, 1), + Array(HEAP_BACKEND, 2), + Array(ROCKSDB_BACKEND, 1), + Array(ROCKSDB_BACKEND, 2) + )) + } +} diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/AbstractRowTimeUnboundedPrecedingOver.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/AbstractRowTimeUnboundedPrecedingOver.java index 7f91ca2b7b327..7a9d1156ba5eb 100644 --- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/AbstractRowTimeUnboundedPrecedingOver.java +++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/AbstractRowTimeUnboundedPrecedingOver.java @@ -50,6 +50,12 @@ /** A basic implementation to support unbounded event-time over-window. */ public abstract class AbstractRowTimeUnboundedPrecedingOver extends KeyedProcessFunctionWithCleanupState { + public static final int FIRST_OVER_VERSION = 1; + public static final String LATE_ELEMENTS_DROPPED_METRIC_NAME = "numLateRecordsDropped"; + public static final String ACCUMULATOR_STATE_NAME = "accState"; + public static final String INPUT_STATE_NAME = "inputState"; + public static final String CLEANUP_STATE_NAME = "RowTimeUnboundedOverCleanupTime"; + private static final long serialVersionUID = 1L; private static final Logger LOG = @@ -70,10 +76,6 @@ public abstract class AbstractRowTimeUnboundedPrecedingOver protected transient AggsHandleFunction function; - // ------------------------------------------------------------------------ - // Metrics - // ------------------------------------------------------------------------ - private static final String LATE_ELEMENTS_DROPPED_METRIC_NAME = "numLateRecordsDropped"; private transient Counter numLateRecordsDropped; @VisibleForTesting @@ -107,7 +109,7 @@ public void open(OpenContext openContext) throws Exception { // initialize accumulator state InternalTypeInfo accTypeInfo = InternalTypeInfo.ofFields(accTypes); ValueStateDescriptor accStateDesc = - new ValueStateDescriptor("accState", accTypeInfo); + new ValueStateDescriptor(ACCUMULATOR_STATE_NAME, accTypeInfo); accState = getRuntimeContext().getState(accStateDesc); // input element are all binary row as they are came from network @@ -115,10 +117,10 @@ public void open(OpenContext openContext) throws Exception { ListTypeInfo rowListTypeInfo = new ListTypeInfo(inputType); MapStateDescriptor> inputStateDesc = new MapStateDescriptor>( - "inputState", Types.LONG, rowListTypeInfo); + INPUT_STATE_NAME, Types.LONG, rowListTypeInfo); inputState = getRuntimeContext().getMapState(inputStateDesc); - initCleanupTimeState("RowTimeUnboundedOverCleanupTime"); + initCleanupTimeState(CLEANUP_STATE_NAME); // metrics this.numLateRecordsDropped = diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeRangeUnboundedPrecedingFunction.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeRangeUnboundedPrecedingFunction.java index 59f39bf600c4c..32ae67c15a944 100644 --- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeRangeUnboundedPrecedingFunction.java +++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeRangeUnboundedPrecedingFunction.java @@ -19,6 +19,8 @@ package org.apache.flink.table.runtime.operators.over; import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.utils.JoinedRowData; +import org.apache.flink.table.runtime.generated.AggsHandleFunction; import org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction; import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.util.Collector; @@ -56,23 +58,26 @@ public RowTimeRangeUnboundedPrecedingFunction( @Override public void processElementsWithSameTimestamp(List curRowList, Collector out) throws Exception { - int i = 0; - // all same timestamp data should have same aggregation value. - while (i < curRowList.size()) { - RowData curRow = curRowList.get(i); + processElementsWithSameTimestampRange(function, output, curRowList, out); + } + + /** + * First aggregate all the records with the same timestamp, only then in second step emit them. + * All emitted records with same timestamp should have the same aggregated value. + */ + static void processElementsWithSameTimestampRange( + AggsHandleFunction function, + JoinedRowData outputRecord, + List curRowList, + Collector out) + throws Exception { + for (RowData curRow : curRowList) { function.accumulate(curRow); - i += 1; } - - // emit output row - i = 0; RowData aggValue = function.getValue(); - while (i < curRowList.size()) { - RowData curRow = curRowList.get(i); - // prepare output row - output.replace(curRow, aggValue); - out.collect(output); - i += 1; + for (RowData curRow : curRowList) { + outputRecord.replace(curRow, aggValue); + out.collect(outputRecord); } } } diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeRowsUnboundedPrecedingFunction.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeRowsUnboundedPrecedingFunction.java index c145209d8d705..dd535fd066e56 100644 --- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeRowsUnboundedPrecedingFunction.java +++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeRowsUnboundedPrecedingFunction.java @@ -19,6 +19,8 @@ package org.apache.flink.table.runtime.operators.over; import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.utils.JoinedRowData; +import org.apache.flink.table.runtime.generated.AggsHandleFunction; import org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction; import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.util.Collector; @@ -56,16 +58,20 @@ public RowTimeRowsUnboundedPrecedingFunction( @Override public void processElementsWithSameTimestamp(List curRowList, Collector out) throws Exception { - int i = 0; - while (i < curRowList.size()) { - RowData curRow = curRowList.get(i); - // accumulate current row + processElementsWithSameTimestampRows(function, output, curRowList, out); + } + + /** Aggregate AND emit rows one by one. */ + static void processElementsWithSameTimestampRows( + AggsHandleFunction function, + JoinedRowData outputRecord, + List curRowList, + Collector out) + throws Exception { + for (RowData curRow : curRowList) { function.accumulate(curRow); - // prepare output row - output.replace(curRow, function.getValue()); - // emit output row - out.collect(output); - i += 1; + outputRecord.replace(curRow, function.getValue()); + out.collect(outputRecord); } } } diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeUnboundedPrecedingOverFunctionV2.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeUnboundedPrecedingOverFunctionV2.java new file mode 100644 index 0000000000000..fda03a2bcd00c --- /dev/null +++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/over/RowTimeUnboundedPrecedingOverFunctionV2.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.runtime.operators.over; + +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.api.common.functions.OpenContext; +import org.apache.flink.api.common.state.MapState; +import org.apache.flink.api.common.state.MapStateDescriptor; +import org.apache.flink.api.common.state.ValueState; +import org.apache.flink.api.common.state.ValueStateDescriptor; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.java.typeutils.ListTypeInfo; +import org.apache.flink.metrics.Counter; +import org.apache.flink.streaming.api.functions.KeyedProcessFunction; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.utils.JoinedRowData; +import org.apache.flink.table.runtime.dataview.PerKeyStateDataViewStore; +import org.apache.flink.table.runtime.functions.KeyedProcessFunctionWithCleanupState; +import org.apache.flink.table.runtime.generated.AggsHandleFunction; +import org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.util.Collector; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +import static org.apache.flink.table.runtime.operators.over.AbstractRowTimeUnboundedPrecedingOver.ACCUMULATOR_STATE_NAME; +import static org.apache.flink.table.runtime.operators.over.AbstractRowTimeUnboundedPrecedingOver.CLEANUP_STATE_NAME; +import static org.apache.flink.table.runtime.operators.over.AbstractRowTimeUnboundedPrecedingOver.INPUT_STATE_NAME; +import static org.apache.flink.table.runtime.operators.over.AbstractRowTimeUnboundedPrecedingOver.LATE_ELEMENTS_DROPPED_METRIC_NAME; +import static org.apache.flink.table.runtime.operators.over.RowTimeRangeUnboundedPrecedingFunction.processElementsWithSameTimestampRange; +import static org.apache.flink.table.runtime.operators.over.RowTimeRowsUnboundedPrecedingFunction.processElementsWithSameTimestampRows; + +/** + * A ProcessFunction to support unbounded ROWS and RANGE windows. + * + *

ROWS E.g.: SELECT rowtime, b, c, min(c) OVER (PARTITION BY b ORDER BY rowtime ROWS BETWEEN + * UNBOUNDED preceding AND CURRENT ROW), max(c) OVER (PARTITION BY b ORDER BY rowtime ROWS BETWEEN + * UNBOUNDED preceding AND CURRENT ROW) FROM T. + * + *

RANGE E.g.: SELECT rowtime, b, c, min(c) OVER (PARTITION BY b ORDER BY rowtime RANGE BETWEEN + * UNBOUNDED preceding AND CURRENT ROW), max(c) OVER (PARTITION BY b ORDER BY rowtime RANGE BETWEEN + * UNBOUNDED preceding AND CURRENT ROW) FROM T. + */ +public class RowTimeUnboundedPrecedingOverFunctionV2 + extends KeyedProcessFunctionWithCleanupState { + public static final int SECOND_OVER_VERSION = 2; + + private static final long serialVersionUID = 1L; + + private static final Logger LOG = + LoggerFactory.getLogger(RowTimeUnboundedPrecedingOverFunctionV2.class); + + // whether this is a ROWS or RANGE operation + private final boolean isRowsWindow; + private final GeneratedAggsHandleFunction genAggsHandler; + private final LogicalType[] accTypes; + private final LogicalType[] inputFieldTypes; + private final int rowTimeIdx; + + protected transient JoinedRowData output; + // state to hold the accumulators of the aggregations + private transient ValueState accState; + // state to hold rows until the next watermark arrives + private transient MapState> inputState; + + protected transient AggsHandleFunction function; + + private transient Counter numLateRecordsDropped; + + @VisibleForTesting + protected Counter getCounter() { + return numLateRecordsDropped; + } + + public RowTimeUnboundedPrecedingOverFunctionV2( + boolean isRowsWindow, + long minRetentionTime, + long maxRetentionTime, + GeneratedAggsHandleFunction genAggsHandler, + LogicalType[] accTypes, + LogicalType[] inputFieldTypes, + int rowTimeIdx) { + super(minRetentionTime, maxRetentionTime); + this.isRowsWindow = isRowsWindow; + this.genAggsHandler = genAggsHandler; + this.accTypes = accTypes; + this.inputFieldTypes = inputFieldTypes; + this.rowTimeIdx = rowTimeIdx; + } + + @Override + public void open(OpenContext openContext) throws Exception { + function = genAggsHandler.newInstance(getRuntimeContext().getUserCodeClassLoader()); + function.open(new PerKeyStateDataViewStore(getRuntimeContext())); + + output = new JoinedRowData(); + + // initialize accumulator state + InternalTypeInfo accTypeInfo = InternalTypeInfo.ofFields(accTypes); + ValueStateDescriptor accStateDesc = + new ValueStateDescriptor<>(ACCUMULATOR_STATE_NAME, accTypeInfo); + accState = getRuntimeContext().getState(accStateDesc); + + // input element are all binary row as they are came from network + InternalTypeInfo inputType = InternalTypeInfo.ofFields(inputFieldTypes); + ListTypeInfo rowListTypeInfo = new ListTypeInfo<>(inputType); + MapStateDescriptor> inputStateDesc = + new MapStateDescriptor<>(INPUT_STATE_NAME, Types.LONG, rowListTypeInfo); + inputState = getRuntimeContext().getMapState(inputStateDesc); + + initCleanupTimeState(CLEANUP_STATE_NAME); + + // metrics + this.numLateRecordsDropped = + getRuntimeContext().getMetricGroup().counter(LATE_ELEMENTS_DROPPED_METRIC_NAME); + } + + /** + * Puts an element from the input stream into state if it is not late. Registers a timer for the + * next watermark. + * + * @param input The input value. + * @param ctx A {@link Context} that allows querying the timestamp of the element and getting + * TimerService for registering timers and querying the time. The context is only valid + * during the invocation of this method, do not store it. + * @param out The collector for returning result values. + * @throws Exception + */ + @Override + public void processElement( + RowData input, + KeyedProcessFunction.Context ctx, + Collector out) + throws Exception { + // register state-cleanup timer + registerProcessingCleanupTimer(ctx, ctx.timerService().currentProcessingTime()); + + long timestamp = input.getLong(rowTimeIdx); + long curWatermark = ctx.timerService().currentWatermark(); + + if (timestamp <= curWatermark) { + // discard late record + numLateRecordsDropped.inc(); + return; + } + // put row into state + List rowList = inputState.get(timestamp); + if (rowList == null) { + rowList = new ArrayList<>(); + // if that's the first timestamp for the given key, register the timer to process + // those records. + ctx.timerService().registerEventTimeTimer(timestamp); + } + rowList.add(input); + inputState.put(timestamp, rowList); + } + + @Override + public void onTimer( + long timestamp, + KeyedProcessFunction.OnTimerContext ctx, + Collector out) + throws Exception { + if (isProcessingTimeTimer(ctx)) { + cleanupState(ctx); + return; + } + + RowData lastAccumulator = accState.value(); + if (lastAccumulator == null) { + lastAccumulator = function.createAccumulators(); + } + function.setAccumulators(lastAccumulator); + + processElementsWithSameTimestamp(timestamp, out); + + lastAccumulator = function.getAccumulators(); + accState.update(lastAccumulator); + + registerProcessingCleanupTimer(ctx, ctx.timerService().currentProcessingTime()); + } + + /** + * Process the same timestamp datas, the mechanism is different between rows and range window. + */ + private void processElementsWithSameTimestamp(long timestamp, Collector out) + throws Exception { + List curRowList = inputState.get(timestamp); + if (curRowList == null) { + // Ignore the same timestamp datas if the state is cleared already. + LOG.warn( + "The state is cleared because of state ttl. " + + "This will result in incorrect result. " + + "You can increase the state ttl to avoid this."); + } else { + if (isRowsWindow) { + processElementsWithSameTimestampRows(function, output, curRowList, out); + } else { + processElementsWithSameTimestampRange(function, output, curRowList, out); + } + } + inputState.remove(timestamp); + } + + private void cleanupState(OnTimerContext ctx) throws Exception { + if (stateCleaningEnabled) { + // we check whether there are still records which have not been processed yet + if (inputState.isEmpty()) { + // we clean the state + cleanupState(inputState, accState); + function.cleanup(); + } else { + // There are records left to process because a watermark has not been received + // yet. + // This would only happen if the input stream has stopped. So we don't need to + // clean up. + // We leave the state as it is and schedule a new cleanup timer + registerProcessingCleanupTimer(ctx, ctx.timerService().currentProcessingTime()); + } + } + } + + @Override + public void close() throws Exception { + if (null != function) { + function.close(); + } + } +} diff --git a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/streaming/util/TestStreamEnvironment.java b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/streaming/util/TestStreamEnvironment.java index c3d14e1c03dbb..badde2c002d2c 100644 --- a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/streaming/util/TestStreamEnvironment.java +++ b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/streaming/util/TestStreamEnvironment.java @@ -201,6 +201,11 @@ private static void randomizeConfiguration(MiniCluster miniCluster, Configuratio } miniCluster.overrideRestoreModeForChangelogStateBackend(); } + randomize( + conf, + ConfigOptions.key("table.exec.unbounded-over.version").intType().noDefaultValue(), + 1, + 2); } /**