Skip to content

Adjust default wal_throttle_threshold_in_byte and region_migration_speed_limit_bytes_per_second #15273

Adjust default wal_throttle_threshold_in_byte and region_migration_speed_limit_bytes_per_second

Adjust default wal_throttle_threshold_in_byte and region_migration_speed_limit_bytes_per_second #15273

name: Multi-Cluster IT
on:
push:
branches:
- master
- 'rel/1.*'
- 'rc/1.*'
- 'force_ci/**'
paths-ignore:
- 'docs/**'
- 'site/**'
- 'iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/**' #queryengine
pull_request:
branches:
- master
- 'rel/1.*'
- 'rc/1.*'
- 'force_ci/**'
paths-ignore:
- 'docs/**'
- 'site/**'
- 'iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/**' #queryengine
# allow manually run the action:
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3
MAVEN_ARGS: --batch-mode --no-transfer-progress
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
jobs:
auto-create-schema:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-pl integration-test \
-am -PMultiClusterIT2AutoCreateSchema \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-auto-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
path: integration-test/target/cluster-logs
retention-days: 30
manual-create-schema:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster1: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
cluster2: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode]
os: [ ubuntu-latest ]
exclude:
- cluster1: LightWeightStandaloneMode
cluster2: LightWeightStandaloneMode
- cluster1: LightWeightStandaloneMode
cluster2: ScalableSingleNodeMode
- cluster1: ScalableSingleNodeMode
cluster2: LightWeightStandaloneMode
- cluster1: ScalableSingleNodeMode
cluster2: HighPerformanceMode
- cluster1: HighPerformanceMode
cluster2: LightWeightStandaloneMode
- cluster1: HighPerformanceMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusBatchMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusBatchMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusStreamMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusStreamMode
cluster2: HighPerformanceMode
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2ManualCreateSchema \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-manual-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
subscription-arch-verification:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [ 17 ]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster1: [ ScalableSingleNodeMode ]
cluster2: [ ScalableSingleNodeMode ]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2SubscriptionArchVerification \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-subscription-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
subscription-regression-consumer:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [ 17 ]
# do not use HighPerformanceMode here, otherwise some tests will cause the GH runner to receive a shutdown signal
cluster1: [ ScalableSingleNodeMode ]
cluster2: [ ScalableSingleNodeMode ]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2SubscriptionRegressionConsumer \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-subscription-regression-consumer-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
subscription-regression-misc:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [ 17 ]
# do not use HighPerformanceMode here, otherwise some tests will cause the GH runner to receive a shutdown signal
cluster1: [ ScalableSingleNodeMode ]
cluster2: [ ScalableSingleNodeMode ]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2SubscriptionRegressionMisc \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-subscription-regression-misc-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
table-model:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output
while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-pl integration-test \
-am -PMultiClusterIT2TableModel \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)
mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/
echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"
if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-table-model-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
path: integration-test/target/cluster-logs
retention-days: 30