Skip to content

Commit

Permalink
Merge pull request #568 from github/exponential-backoff-merge-master
Browse files Browse the repository at this point in the history
Exponential backoff merge master
  • Loading branch information
Shlomi Noach authored Apr 15, 2018
2 parents 85bbde2 + 50b8f2c commit 3b7f1a7
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 1 deletion.
10 changes: 10 additions & 0 deletions go/base/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ type MigrationContext struct {
CriticalLoadHibernateSeconds int64
PostponeCutOverFlagFile string
CutOverLockTimeoutSeconds int64
CutOverExponentialBackoff bool
ExponentialBackoffMaxInterval int64
ForceNamedCutOverCommand bool
PanicFlagFile string
HooksPath string
Expand Down Expand Up @@ -341,6 +343,14 @@ func (this *MigrationContext) SetCutOverLockTimeoutSeconds(timeoutSeconds int64)
return nil
}

func (this *MigrationContext) SetExponentialBackoffMaxInterval(intervalSeconds int64) error {
if intervalSeconds < 2 {
return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.ExponentialBackoffMaxInterval)
}
this.ExponentialBackoffMaxInterval = intervalSeconds
return nil
}

func (this *MigrationContext) SetDefaultNumRetries(retries int64) {
this.throttleMutex.Lock()
defer this.throttleMutex.Unlock()
Expand Down
5 changes: 5 additions & 0 deletions go/cmd/gh-ost/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ func main() {

flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running")
flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges")
flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer intervals between failed cut-over attempts. Wait intervals obey a maximum configurable with 'exponential-backoff-max-interval').")
exponentialBackoffMaxInterval := flag.Int64("exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between attempts when performing various operations with exponential backoff.")
chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)")
dmlBatchSize := flag.Int64("dml-batch-size", 10, "batch size for DML events to apply in a single transaction (range 1-100)")
defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking")
Expand Down Expand Up @@ -237,6 +239,9 @@ func main() {
if err := migrationContext.SetCutOverLockTimeoutSeconds(*cutOverLockTimeoutSeconds); err != nil {
log.Errore(err)
}
if err := migrationContext.SetExponentialBackoffMaxInterval(*exponentialBackoffMaxInterval); err != nil {
log.Errore(err)
}

log.Infof("starting gh-ost %+v", AppVersion)
acceptSignals(migrationContext)
Expand Down
36 changes: 35 additions & 1 deletion go/logic/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,34 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo
return err
}

// `retryOperationWithExponentialBackoff` attempts running given function, waiting 2^(n-1)
// seconds between each attempt, where `n` is the running number of attempts. Exits
// as soon as the function returns with non-error, or as soon as `MaxRetries`
// attempts are reached. Wait intervals between attempts obey a maximum of
// `ExponentialBackoffMaxInterval`.
func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) {
var interval int64
maxRetries := int(this.migrationContext.MaxRetries())
maxInterval := this.migrationContext.ExponentialBackoffMaxInterval
for i := 0; i < maxRetries; i++ {
newInterval := int64(math.Exp2(float64(i - 1)))
if newInterval <= maxInterval {
interval = newInterval
}
if i != 0 {
time.Sleep(time.Duration(interval) * time.Second)
}
err = operation()
if err == nil {
return nil
}
}
if len(notFatalHint) == 0 {
this.migrationContext.PanicAbort <- err
}
return err
}

// executeAndThrottleOnError executes a given function. If it errors, it
// throttles.
func (this *Migrator) executeAndThrottleOnError(operation func() error) (err error) {
Expand Down Expand Up @@ -372,7 +400,13 @@ func (this *Migrator) Migrate() (err error) {
if err := this.hooksExecutor.onBeforeCutOver(); err != nil {
return err
}
if err := this.retryOperation(this.cutOver); err != nil {
var retrier func(func() error, ...bool) error
if this.migrationContext.CutOverExponentialBackoff {
retrier = this.retryOperationWithExponentialBackoff
} else {
retrier = this.retryOperation
}
if err := retrier(this.cutOver); err != nil {
return err
}
atomic.StoreInt64(&this.migrationContext.CutOverCompleteFlag, 1)
Expand Down

0 comments on commit 3b7f1a7

Please sign in to comment.