-
Notifications
You must be signed in to change notification settings - Fork 75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Backoff based on HTTP statuses #537
Changes from 10 commits
5060750
39e51be
30c9dff
374c44d
1e598c6
204e19b
799a51c
a6df198
0549a2e
56d5908
404d610
75c83e9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -38,12 +38,12 @@ interface MetricsData { | |||||
} | ||||||
|
||||||
interface RegistrationData { | ||||||
appName: string; | ||||||
appName: string; | ||||||
instanceId: string; | ||||||
sdkVersion: string; | ||||||
strategies: string[]; | ||||||
started: Date; | ||||||
interval: number | ||||||
started: Date; | ||||||
interval: number; | ||||||
} | ||||||
|
||||||
export default class Metrics extends EventEmitter { | ||||||
|
@@ -61,6 +61,8 @@ export default class Metrics extends EventEmitter { | |||||
|
||||||
private metricsJitter: number; | ||||||
|
||||||
private failures: number = 0; | ||||||
|
||||||
private disabled: boolean; | ||||||
|
||||||
private url: string; | ||||||
|
@@ -111,21 +113,39 @@ export default class Metrics extends EventEmitter { | |||||
return getAppliedJitter(this.metricsJitter); | ||||||
} | ||||||
|
||||||
getFailures(): number { | ||||||
return this.failures; | ||||||
} | ||||||
|
||||||
getInterval(): number { | ||||||
if(this.metricsInterval === 0 || this.failures === -1) { | ||||||
return 0; | ||||||
} else { | ||||||
return this.metricsInterval + | ||||||
(this.failures * this.metricsInterval) + | ||||||
this.getAppliedJitter(); | ||||||
} | ||||||
|
||||||
} | ||||||
|
||||||
private startTimer(): void { | ||||||
if (this.disabled) { | ||||||
if (this.disabled || this.getInterval() === 0) { | ||||||
return; | ||||||
} | ||||||
this.timer = setTimeout(() => { | ||||||
this.sendMetrics(); | ||||||
}, this.metricsInterval + this.getAppliedJitter()); | ||||||
this.timer = setTimeout( | ||||||
() => { | ||||||
this.sendMetrics(); | ||||||
}, | ||||||
this.getInterval(), | ||||||
); | ||||||
|
||||||
if (process.env.NODE_ENV !== 'test' && typeof this.timer.unref === 'function') { | ||||||
this.timer.unref(); | ||||||
} | ||||||
} | ||||||
|
||||||
start(): void { | ||||||
if (typeof this.metricsInterval === 'number' && this.metricsInterval > 0) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typescript compiler stated that this was no longer necessary |
||||||
if (this.metricsInterval > 0) { | ||||||
this.startTimer(); | ||||||
this.registerInstance(); | ||||||
} | ||||||
|
@@ -170,6 +190,19 @@ export default class Metrics extends EventEmitter { | |||||
return true; | ||||||
} | ||||||
|
||||||
unrecoverableError(url: string, statusCode: number) { | ||||||
this.emit(UnleashEvents.Warn, `${url} returning ${statusCode}, stopping metrics`); | ||||||
this.failures = -1; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
wouldn't this be enough? Just to avoid dealing with failures = -1 which is a bit confusing IMO There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can see the benefit in having a value indicate unrecoverable error, but I think it should in that case be a separate variable? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, we're calling an explicit |
||||||
this.stop(); | ||||||
} | ||||||
|
||||||
backoff(url: string, statusCode: number): void { | ||||||
this.failures = Math.min(10, this.failures + 1); | ||||||
// eslint-disable-next-line max-len | ||||||
this.emit(UnleashEvents.Warn, `${url} returning ${statusCode}. Backing off to ${this.getInterval()}`); | ||||||
this.startTimer(); | ||||||
gastonfournier marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
|
||||||
async sendMetrics(): Promise<void> { | ||||||
if (this.disabled) { | ||||||
return; | ||||||
|
@@ -194,16 +227,22 @@ export default class Metrics extends EventEmitter { | |||||
timeout: this.timeout, | ||||||
httpOptions: this.httpOptions, | ||||||
}); | ||||||
this.startTimer(); | ||||||
if (res.status === 404) { | ||||||
this.emit(UnleashEvents.Warn, `${url} returning 404, stopping metrics`); | ||||||
this.stop(); | ||||||
} | ||||||
if (!res.ok) { | ||||||
if (res.status === 404 || res.status === 403 || res.status == 401) { | ||||||
this.unrecoverableError(url, res.status); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it really unrecoverable? If our API is down for a small period of time you may get a 404 response and it may recover later. Or you may get a 403 because your token does not have access to a resource, and then some admin might grant that token permissions. If we make this unrecoverable, they will need a restart. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remember from the quick discovery/discussion session we had that we thought maybe not stop polling just treat it as a backoff-response, for the reasons @gastonfournier mentions here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For 404:
For 401/403: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But doing what the Java SDK did instead, immediately log at error level that the API most likely is incorrectly configured and then back off to our maximum backoff (10 * interval), could also be an answer There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, that makes sense! I'm still a bit worried we stop posting and we will not notice what's wrong. If we backoff might mean we get a request every now and then but not hammering our servers There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, for metrics is ok to be more aggressive and stop posting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. renamed the method to |
||||||
} else if ( | ||||||
res.status === 429 || | ||||||
res.status === 500 || | ||||||
res.status === 502 || | ||||||
res.status === 503 || | ||||||
res.status === 504 | ||||||
) { | ||||||
this.backoff(url, res.status); | ||||||
} | ||||||
this.restoreBucket(payload.bucket); | ||||||
this.emit(UnleashEvents.Warn, `${url} returning ${res.status}`, await res.text()); | ||||||
} else { | ||||||
this.emit(UnleashEvents.Sent, payload); | ||||||
this.reduceBackoff(); | ||||||
} | ||||||
} catch (err) { | ||||||
this.restoreBucket(payload.bucket); | ||||||
|
@@ -212,6 +251,11 @@ export default class Metrics extends EventEmitter { | |||||
} | ||||||
} | ||||||
|
||||||
reduceBackoff(): void { | ||||||
this.failures = Math.max(0, this.failures - 1); | ||||||
this.startTimer(); | ||||||
} | ||||||
|
||||||
assertBucket(name: string): void { | ||||||
if (this.disabled) { | ||||||
return; | ||||||
|
@@ -243,7 +287,7 @@ export default class Metrics extends EventEmitter { | |||||
} | ||||||
|
||||||
private increaseCounter(name: string, enabled: boolean, inc = 1): void { | ||||||
if(inc === 0) { | ||||||
if (inc === 0) { | ||||||
return; | ||||||
} | ||||||
this.assertBucket(name); | ||||||
|
@@ -252,8 +296,8 @@ export default class Metrics extends EventEmitter { | |||||
|
||||||
private increaseVariantCounter(name: string, variantName: string, inc = 1): void { | ||||||
this.assertBucket(name); | ||||||
if(this.bucket.toggles[name].variants[variantName]) { | ||||||
this.bucket.toggles[name].variants[variantName]+=inc | ||||||
if (this.bucket.toggles[name].variants[variantName]) { | ||||||
this.bucket.toggles[name].variants[variantName] += inc; | ||||||
} else { | ||||||
this.bucket.toggles[name].variants[variantName] = inc; | ||||||
} | ||||||
|
@@ -276,7 +320,7 @@ export default class Metrics extends EventEmitter { | |||||
} | ||||||
|
||||||
createMetricsData(): MetricsData { | ||||||
const bucket = {...this.bucket, stop: new Date()}; | ||||||
const bucket = { ...this.bucket, stop: new Date() }; | ||||||
this.resetBucket(); | ||||||
return { | ||||||
appName: this.appName, | ||||||
|
@@ -286,20 +330,20 @@ export default class Metrics extends EventEmitter { | |||||
} | ||||||
|
||||||
private restoreBucket(bucket: Bucket): void { | ||||||
if(this.disabled) { | ||||||
if (this.disabled) { | ||||||
return; | ||||||
} | ||||||
this.bucket.start = bucket.start; | ||||||
|
||||||
const { toggles } = bucket; | ||||||
Object.keys(toggles).forEach(toggleName => { | ||||||
const toggle = toggles[toggleName]; | ||||||
Object.keys(toggles).forEach((toggleName) => { | ||||||
const toggle = toggles[toggleName]; | ||||||
this.increaseCounter(toggleName, true, toggle.yes); | ||||||
this.increaseCounter(toggleName, false, toggle.no); | ||||||
|
||||||
Object.keys(toggle.variants).forEach(variant => { | ||||||
Object.keys(toggle.variants).forEach((variant) => { | ||||||
this.increaseVariantCounter(toggleName, variant, toggle.variants[variant]); | ||||||
}) | ||||||
}); | ||||||
}); | ||||||
} | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,6 +55,8 @@ export default class Repository extends EventEmitter implements EventEmitter { | |
|
||
private headers?: CustomHeaders; | ||
|
||
private failures: number = 0; | ||
|
||
private customHeadersFunction?: CustomHeadersFunction; | ||
|
||
private timeout?: number; | ||
|
@@ -241,11 +243,92 @@ Message: ${err.message}`, | |
return obj; | ||
} | ||
|
||
getFailures(): number { | ||
return this.failures; | ||
} | ||
|
||
nextFetch(): number { | ||
return this.refreshInterval + this.failures * this.refreshInterval; | ||
} | ||
|
||
private backoff(): number { | ||
this.failures = Math.min(this.failures + 1, 10); | ||
return this.nextFetch(); | ||
} | ||
|
||
private countSuccess(): number { | ||
this.failures = Math.max(this.failures - 1, 0); | ||
return this.nextFetch(); | ||
} | ||
|
||
// Emits correct error message based on what failed, | ||
// and returns 0 as the next fetch interval (stop polling) | ||
private unrecoverableError(url: string, statusCode: number): number { | ||
this.failures += 1; | ||
if (statusCode === 404) { | ||
this.emit( | ||
UnleashEvents.Error, | ||
new Error( | ||
// eslint-disable-next-line max-len | ||
`${url} responded NOT_FOUND (404) which means your API url most likely needs correction. Stopping refresh of toggles`, | ||
), | ||
); | ||
} else if (statusCode === 401 || statusCode === 403) { | ||
this.emit( | ||
UnleashEvents.Error, | ||
new Error( | ||
// eslint-disable-next-line max-len | ||
`${url} responded ${statusCode} which means your API key is not allowed to connect. Stopping refresh of toggles`, | ||
), | ||
); | ||
} | ||
return 0; | ||
} | ||
|
||
// We got a status code we know what to do with, so will log correct message | ||
// and return the new interval. | ||
private recoverableError(url: string, statusCode: number): number { | ||
let nextFetch = this.backoff(); | ||
if (statusCode === 429) { | ||
this.emit( | ||
UnleashEvents.Warn, | ||
// eslint-disable-next-line max-len | ||
`${url} responded TOO_MANY_CONNECTIONS (429). Backing off`, | ||
); | ||
} else if (statusCode === 500 || | ||
statusCode === 502 || | ||
statusCode === 503 || | ||
statusCode === 504) { | ||
this.emit( | ||
UnleashEvents.Warn, | ||
`${url} responded ${statusCode}. Backing off`, | ||
); | ||
} | ||
return nextFetch; | ||
} | ||
|
||
private handleErrorCases(url: string, statusCode: number): number { | ||
if (statusCode === 401 || statusCode === 403 || statusCode === 404) { | ||
return this.unrecoverableError(url, statusCode); | ||
} else if ( | ||
statusCode === 429 || | ||
statusCode === 500 || | ||
statusCode === 502 || | ||
statusCode === 503 || | ||
statusCode === 504 | ||
) { | ||
return this.recoverableError(url, statusCode); | ||
} else { | ||
const error = new Error(`Response was not statusCode 2XX, but was ${statusCode}`); | ||
this.emit(UnleashEvents.Error, error); | ||
return this.refreshInterval; | ||
} | ||
} | ||
|
||
async fetch(): Promise<void> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed. Something like what we did for metrics. I'll have a look. |
||
if (this.stopped || !(this.refreshInterval > 0)) { | ||
return; | ||
} | ||
|
||
let nextFetch = this.refreshInterval; | ||
try { | ||
let mergedTags; | ||
|
@@ -257,7 +340,6 @@ Message: ${err.message}`, | |
const headers = this.customHeadersFunction | ||
? await this.customHeadersFunction() | ||
: this.headers; | ||
|
||
const res = await get({ | ||
url, | ||
etag: this.etag, | ||
|
@@ -268,14 +350,11 @@ Message: ${err.message}`, | |
httpOptions: this.httpOptions, | ||
supportedSpecVersion: SUPPORTED_SPEC_VERSION, | ||
}); | ||
|
||
if (res.status === 304) { | ||
// No new data | ||
this.emit(UnleashEvents.Unchanged); | ||
} else if (!res.ok) { | ||
const error = new Error(`Response was not statusCode 2XX, but was ${res.status}`); | ||
this.emit(UnleashEvents.Error, error); | ||
} else { | ||
} else if (res.ok) { | ||
nextFetch = this.countSuccess(); | ||
try { | ||
const data: ClientFeaturesResponse = await res.json(); | ||
if (res.headers.get('etag') !== null) { | ||
|
@@ -287,6 +366,8 @@ Message: ${err.message}`, | |
} catch (err) { | ||
this.emit(UnleashEvents.Error, err); | ||
} | ||
} else { | ||
nextFetch = this.handleErrorCases(url, res.status); | ||
} | ||
} catch (err) { | ||
const e = err as { code: string }; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should
failures
be initialized to -1? I can see this working but it makes it hard to reason about when we're checking against -1 hereThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
failures start at 0, if we initialize it to -1 we'd never actually try to post metrics.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But since you're asking, I guess it isn't as clear as I thought it'd be. extract to a method called
stopPosting()
instead?