Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEVPROD-13567: improve retry and traceability for GitHub app token creation errors #8693

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions model/githubapp/github_app_auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ import (
"github.com/evergreen-ci/evergreen"
"github.com/google/go-github/v52/github"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)

const (
githubAppEndpointAttribute = "evergreen.githubapp.endpoint"
githubAppAttemptAttribute = "evergreen.githubapp.attempt"
githubAppURLAttribute = "evergreen.githubapp.url"
githubAppErrorAttribute = "evergreen.githubapp.error"
githubAppMethodAttribute = "evergreen.githubapp.method"
githubAppStatusCodeAttribute = "evergreen.githubapp.status_code"
)

// GithubAppAuth holds the appId and privateKey for the github app associated with the project.
Expand Down Expand Up @@ -141,6 +152,12 @@ func (g *GithubAppAuth) CreateInstallationToken(ctx context.Context, owner, repo
// createInstallationTokenForID returns an installation token from GitHub given an installation ID.
// This function cannot be moved to thirdparty because it is needed to set up the environment.
func (g *GithubAppAuth) createInstallationTokenForID(ctx context.Context, installationID int64, opts *github.InstallationTokenOptions) (string, *github.InstallationPermissions, error) {
const caller = "CreateInstallationToken"
ctx, span := tracer.Start(ctx, caller, trace.WithAttributes(
attribute.String(githubAppEndpointAttribute, caller),
))
defer span.End()

client, err := getGitHubClientForAuth(g)
if err != nil {
return "", nil, errors.Wrap(err, "getting GitHub client for token creation")
Expand All @@ -150,12 +167,16 @@ func (g *GithubAppAuth) createInstallationTokenForID(ctx context.Context, instal
token, resp, err := client.Apps.CreateInstallationToken(ctx, installationID, opts)
if resp != nil {
defer resp.Body.Close()
span.SetAttributes(attribute.Int(githubAppStatusCodeAttribute, resp.StatusCode))
}
if err != nil {
return "", nil, errors.Wrapf(err, "creating installation token for installation id: '%d'", installationID)
span.SetAttributes(attribute.String(githubAppErrorAttribute, err.Error()))
return "", nil, errors.Wrapf(err, "creating installation token for installation id: %d", installationID)
}
if token == nil {
return "", nil, errors.Errorf("Installation token for installation 'id': %d not found", installationID)
err := errors.Errorf("Installation token for installation 'id': %d not found", installationID)
span.SetAttributes(attribute.String(githubAppErrorAttribute, err.Error()))
return "", nil, err
}

return token.GetToken(), token.GetPermissions(), nil
Expand Down
88 changes: 80 additions & 8 deletions model/githubapp/github_app_installation.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package githubapp

import (
"context"
"io"
"net/http"
"sync"
"time"
Expand All @@ -11,9 +12,12 @@ import (
"github.com/evergreen-ci/utility"
"github.com/golang-jwt/jwt"
"github.com/google/go-github/v52/github"
"github.com/mongodb/grip"
"github.com/mongodb/grip/message"
"github.com/pkg/errors"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/mongo/options"
"go.opentelemetry.io/otel/attribute"
)

const (
Expand Down Expand Up @@ -105,24 +109,92 @@ func (g *GitHubClient) Close() {
// This function cannot be moved to thirdparty because it is needed to set up the environment.
// Couple this with a defered call with Close() to clean up the client.
func getGitHubClientForAuth(authFields *GithubAppAuth) (*GitHubClient, error) {
retryConf := utility.NewDefaultHTTPRetryConf()
retryConf.MaxDelay = GitHubRetryMaxDelay
retryConf.BaseDelay = GitHubRetryMinDelay
retryConf.MaxRetries = GitHubMaxRetries

key, err := jwt.ParseRSAPrivateKeyFromPEM(authFields.PrivateKey)
if err != nil {
return nil, errors.Wrap(err, "parsing private key")
}

httpClient := utility.GetHTTPRetryableClient(retryConf)
itr := ghinstallation.NewAppsTransportFromPrivateKey(httpClient.Transport, authFields.AppID, key)
httpClient.Transport = itr
itr := ghinstallation.NewAppsTransportFromPrivateKey(utility.DefaultTransport(), authFields.AppID, key)
httpClient := utility.GetCustomHTTPRetryableClientWithTransport(itr, githubClientShouldRetry(), utility.RetryHTTPDelay(utility.RetryOptions{
MinDelay: GitHubRetryMinDelay,
MaxDelay: GitHubRetryMaxDelay,
MaxAttempts: GitHubMaxRetries + 1,
}))

client := github.NewClient(httpClient)
wrappedClient := GitHubClient{Client: client}
return &wrappedClient, nil
}

func githubClientShouldRetry() utility.HTTPRetryFunction {
defaultRetryableStatuses := utility.NewDefaultHTTPRetryConf().Statuses

return func(index int, req *http.Request, resp *http.Response, err error) bool {
const op = "githubClientShouldRetry"
_, span := tracer.Start(req.Context(), op)
defer span.End()

span.SetAttributes(attribute.Int(githubAppAttemptAttribute, index))
span.SetAttributes(attribute.String(githubAppURLAttribute, req.URL.String()))
span.SetAttributes(attribute.String(githubAppMethodAttribute, req.Method))

makeLogMsg := func(extraFields map[string]any) message.Fields {
msg := message.Fields{
"url": req.URL.String(),
"method": req.Method,
"attempt": index,
"op": op,
}
for k, v := range extraFields {
msg[k] = v
}
return msg
}

if err != nil {
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
return true
}
if utility.IsTemporaryError(err) {
return true
}

// TODO (DEVPROD-13567): retry in situations where there's no
// response but the error is still retryable (e.g. connection reset
// by peer).

grip.Error(message.WrapError(err, makeLogMsg(map[string]any{
"message": "GitHub endpoint encountered unretryable error",
})))

return false
}

if resp == nil {
grip.Error(message.WrapError(err, makeLogMsg(map[string]any{
"message": "GitHub app endpoint returned nil response",
})))
return true
}

for _, statusCode := range defaultRetryableStatuses {
if resp.StatusCode == statusCode {
return true
}
}

// TODO (DEVPROD-13567): retry when response from GitHub is non-OK due
// to a transient problem that is still retryable (e.g. secondary rate
// limit exceeded).
grip.ErrorWhen(resp.StatusCode >= http.StatusBadRequest, makeLogMsg(map[string]any{
"message": "GitHub app endpoint returned response but is not retryable",
"status_code": resp.StatusCode,
}))

return false
}
}

// getInstallationIDFromGitHub returns an installation ID from GitHub given an owner and a repo.
// This function cannot be moved to thirdparty because it is needed to set up the environment.
func getInstallationIDFromGitHub(ctx context.Context, authFields *GithubAppAuth, owner, repo string) (int64, error) {
Expand Down
12 changes: 12 additions & 0 deletions model/githubapp/otel.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package githubapp

import (
"fmt"

"github.com/evergreen-ci/evergreen"
"go.opentelemetry.io/otel"
)

var packageName = fmt.Sprintf("%s%s", evergreen.PackageName, "/model/githubapp")

var tracer = otel.GetTracerProvider().Tracer(packageName)
Loading