Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#45014
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <[email protected]>
  • Loading branch information
3pointer authored and ti-chi-bot committed Jan 8, 2025
1 parent 8ce140d commit b074975
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 1 deletion.
21 changes: 21 additions & 0 deletions br/cmd/br/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,27 @@ func runRestoreCommand(command *cobra.Command, cmdName string) error {
return nil
}

<<<<<<< HEAD
=======
// print workaround when we met not fresh or incompatible cluster error on full cluster restore
func printWorkaroundOnFullRestoreError(command *cobra.Command, err error) {
if !errors.ErrorEqual(err, berrors.ErrRestoreNotFreshCluster) &&
!errors.ErrorEqual(err, berrors.ErrRestoreIncompatibleSys) {
return
}
fmt.Println("#######################################################################")
switch {
case errors.ErrorEqual(err, berrors.ErrRestoreNotFreshCluster):
fmt.Println("# the target cluster is not fresh, cannot restore.")
fmt.Println("# you can drop existing databases and tables and start restore again")
case errors.ErrorEqual(err, berrors.ErrRestoreIncompatibleSys):
fmt.Println("# the target cluster is not compatible with the backup data,")
fmt.Println("# you can remove 'with-sys-table' flag to skip restoring system tables")
}
fmt.Println("#######################################################################")
}

>>>>>>> 8c5ca7b2008 (restore: precheck cluster is empty when first time full restore (#45014))
func runRestoreRawCommand(command *cobra.Command, cmdName string) error {
cfg := task.RestoreRawConfig{
RawKvConfig: task.RawKvConfig{Config: task.Config{LogProgress: HasLogFile()}},
Expand Down
10 changes: 10 additions & 0 deletions br/pkg/backup/push.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,16 @@ func (push *pushDown) pushBackup(
// Finished.
return res, nil
}
<<<<<<< HEAD
=======
failpoint.Inject("backup-timeout-error", func(val failpoint.Value) {
msg := val.(string)
logutil.CL(ctx).Info("failpoint backup-timeout-error injected.", zap.String("msg", msg))
resp.Error = &backuppb.Error{
Msg: msg,
}
})
>>>>>>> 8c5ca7b2008 (restore: precheck cluster is empty when first time full restore (#45014))
failpoint.Inject("backup-storage-error", func(val failpoint.Value) {
msg := val.(string)
logutil.CL(ctx).Debug("failpoint backup-storage-error injected.", zap.String("msg", msg))
Expand Down
70 changes: 70 additions & 0 deletions br/pkg/task/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,76 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf
return errors.Trace(err)
}

<<<<<<< HEAD
=======
if client.IsIncremental() {
// don't support checkpoint for the ddl restore
log.Info("the incremental snapshot restore doesn't support checkpoint mode, so unuse checkpoint.")
cfg.UseCheckpoint = false
}

restoreSchedulers, schedulersConfig, err := restorePreWork(ctx, client, mgr, true)
if err != nil {
return errors.Trace(err)
}

schedulersRemovable := false
defer func() {
// don't reset pd scheduler if checkpoint mode is used and restored is not finished
if cfg.UseCheckpoint && !schedulersRemovable {
log.Info("skip removing pd schehduler for next retry")
return
}
log.Info("start to remove the pd scheduler")
// run the post-work to avoid being stuck in the import
// mode or emptied schedulers.
restorePostWork(ctx, client, restoreSchedulers)
log.Info("finish removing pd scheduler")
}()

var checkpointSetWithTableID map[int64]map[string]struct{}
if cfg.UseCheckpoint {
taskName := cfg.generateSnapshotRestoreTaskName(client.GetClusterID(ctx))
sets, restoreSchedulersConfigFromCheckpoint, err := client.InitCheckpoint(ctx, s, taskName, schedulersConfig, cfg.UseCheckpoint)
if err != nil {
return errors.Trace(err)
}
if restoreSchedulersConfigFromCheckpoint != nil {
restoreSchedulers = mgr.MakeUndoFunctionByConfig(*restoreSchedulersConfigFromCheckpoint)
}
checkpointSetWithTableID = sets

defer func() {
// need to flush the whole checkpoint data so that br can quickly jump to
// the log kv restore step when the next retry.
log.Info("wait for flush checkpoint...")
client.WaitForFinishCheckpoint(ctx, len(cfg.FullBackupStorage) > 0 || !schedulersRemovable)
}()
}

if isFullRestore(cmdName) {
// we need check cluster is fresh every time. except restore from a checkpoint.
if client.IsFull() && len(checkpointSetWithTableID) == 0 {
if err = client.CheckTargetClusterFresh(ctx); err != nil {
return errors.Trace(err)
}
}
// todo: move this check into InitFullClusterRestore, we should move restore config into a separate package
// to avoid import cycle problem which we won't do it in this pr, then refactor this
//
// if it's point restore and reached here, then cmdName=FullRestoreCmd and len(cfg.FullBackupStorage) > 0
if cfg.WithSysTable {
client.InitFullClusterRestore(cfg.ExplicitFilter)
}
}

if client.IsFullClusterRestore() && client.HasBackedUpSysDB() {
if err = client.CheckSysTableCompatibility(mgr.GetDomain(), tables); err != nil {
return errors.Trace(err)
}
}

>>>>>>> 8c5ca7b2008 (restore: precheck cluster is empty when first time full restore (#45014))
sp := utils.BRServiceSafePoint{
BackupTS: restoreTS,
TTL: utils.DefaultBRGCSafePointTTL,
Expand Down
5 changes: 5 additions & 0 deletions br/tests/_utils/run_br
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@

set -eux

<<<<<<< HEAD
bin/br.test -test.coverprofile="$TEST_DIR/cov.$TEST_NAME.$$.out.log" DEVEL "$@" \
-L "debug" \
=======
br.test -test.coverprofile="$TEST_DIR/cov.$TEST_NAME.$$.out.log" DEVEL "$@" \
-L "info" \
>>>>>>> 8c5ca7b2008 (restore: precheck cluster is empty when first time full restore (#45014))
--ca "$TEST_DIR/certs/ca.pem" \
--cert "$TEST_DIR/certs/br.pem" \
--key "$TEST_DIR/certs/br.key"
3 changes: 3 additions & 0 deletions br/tests/br_backup_empty/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ if [ $? -ne 0 ]; then
exit 1
fi

i=1
while [ $i -le $DB_COUNT ]; do
run_sql "DROP DATABASE $DB$i;"
i=$(($i+1))
Expand All @@ -70,6 +71,7 @@ run_sql "CREATE TABLE ${DB}1.usertable1 ( \
echo "backup empty table start..."
run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/empty_table"

i=1
while [ $i -le $DB_COUNT ]; do
run_sql "DROP DATABASE $DB$i;"
i=$(($i+1))
Expand All @@ -81,6 +83,7 @@ run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/empty_table"
# insert one row to make sure table is restored.
run_sql "INSERT INTO ${DB}1.usertable1 VALUES (\"a\", \"b\");"

i=1
while [ $i -le $DB_COUNT ]; do
run_sql "DROP DATABASE $DB$i;"
i=$(($i+1))
Expand Down
1 change: 1 addition & 0 deletions br/tests/br_full_ddl/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ fi

# clear restore environment
run_sql "DROP DATABASE $DB;"
run_sql "DROP DATABASE __tidb_br_temporary_mysql;"
# restore full
echo "restore start..."
export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/pdutil/PDEnabledPauseConfig=return(true)"
Expand Down
2 changes: 1 addition & 1 deletion br/tests/br_systables/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ add_test_data() {
}

delete_test_data() {
run_sql "DROP TABLE usertest.test;"
run_sql "DROP DATABASE usertest;"
}

rollback_modify() {
Expand Down

0 comments on commit b074975

Please sign in to comment.