Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
137993: server: fix errors returned from TriggerMetadataUpdateJob r=xinhaoz a=xinhaoz

Previously POST on `/api/v2/table_metadata/updatejob` would return error if the table metadata update job was unclaimed, which is an expected failure. In those cases we should return 200 with `JobTriggered: false` and a message describing the failure.

The gPRC handler to trigger the job was already returning `status.Unavailable` if the job was unclaimed. The http api handler has now been adjusted to treat this error code as an expected failure case and will return a 200 response with the message `Job is unclaimed` as the reason for failing to trigger the job.

Fixes: cockroachdb#137777

Release note (bug fix): On the v2 databases page, users should no longer see console errors when visitingn the db page directly after node / sql pod startup.

Co-authored-by: Xin Hao Zhang <[email protected]>
  • Loading branch information
craig[bot] and xinhaoz committed Jan 3, 2025
2 parents f911df9 + 21c3bde commit 393c4f9
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 4 deletions.
7 changes: 6 additions & 1 deletion pkg/server/api_v2_databases_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const (
MetadataNotStale JobStatusMessage = "Not enough time has elapsed since last job run"
JobRunning JobStatusMessage = "Job is already running"
JobTriggered JobStatusMessage = "Job triggered successfully"
JobUnclaimed JobStatusMessage = "Job is unclaimed"
)

const (
Expand Down Expand Up @@ -1023,8 +1024,12 @@ func (a *apiV2Server) triggerTableMetadataUpdateJob(
if err != nil {
st, ok := status.FromError(err)
if ok {
if st.Code() == codes.Aborted {
switch st.Code() {
case codes.Aborted:
return tmJobTriggeredResponse{JobTriggered: false, Message: JobRunning}, nil
case codes.Unavailable:
return tmJobTriggeredResponse{JobTriggered: false, Message: JobUnclaimed}, nil
default:
}
}
return tmJobTriggeredResponse{}, err
Expand Down
49 changes: 49 additions & 0 deletions pkg/server/api_v2_databases_metadata_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,55 @@ func TestTriggerMetadataUpdateJob(t *testing.T) {
})
}

// TestTriggerMetadataUpdateJobTriggerFailed tests that when we
// fail to trigger the job for expected reasons, the api returns without
// an error and with the correct message describing the failure.
// The expected reasons are:
// 1. The job is unclaimed - this can happen on startup.
// 2. The job signal is unavailable - this can happen directly
// after the job starting or when a run is already in progress.
func TestTriggerMetadataUpdateJobTriggerFailed(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
ctx := context.Background()
// Set the adoption time to 1 hour in the future so the job
// won't be adopted in this test run.
adoptDuration := time.Hour
ts := serverutils.StartServerOnly(t, base.TestServerArgs{
Knobs: base.TestingKnobs{
JobsTestingKnobs: &jobs.TestingKnobs{
IntervalOverrides: jobs.TestingIntervalOverrides{
Adopt: &adoptDuration,
},
},
},
})
defer ts.Stopper().Stop(ctx)
client, err := ts.GetAdminHTTPClient()
require.NoError(t, err)

t.Run("job is unclaimed", func(t *testing.T) {
resp := makeApiRequest[tmJobTriggeredResponse](t, client, ts.AdminURL().WithPath("/api/v2/table_metadata/updatejob/").String(), http.MethodPost)
require.False(t, resp.JobTriggered)
require.Contains(t, resp.Message, JobUnclaimed)
})

t.Run("job signal is unavailable", func(t *testing.T) {
// The job signal can be unavailable because the job is running an
// update or the job was just claimed and has yet to get to the point
// where it can be signalled. These are expected and the api should
// return without error.
conn := sqlutils.MakeSQLRunner(ts.SQLConn(t))
// Mimic the job being claimed by the node by setting the claim_instance_id to 1.
// The job won't be running since we delayed adoptions, so the signal is still
// unavailable.
conn.Exec(t, `UPDATE system.jobs SET claim_instance_id = 1 WHERE id = $1`, jobs.UpdateTableMetadataCacheJobID)
resp := makeApiRequest[tmJobTriggeredResponse](t, client, ts.AdminURL().WithPath("/api/v2/table_metadata/updatejob/").String(), http.MethodPost)
require.False(t, resp.JobTriggered)
require.Contains(t, resp.Message, JobRunning)
})
}

func makeApiRequest[T any](
t *testing.T, client http.Client, uri string, httpMethod string,
) (mdResp T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,13 @@ export const TableMetadataJobControl: React.FC<

const triggerUpdateTableMetaJob = useCallback(
async (onlyIfStale = true) => {
const resp = await triggerUpdateTableMetaJobApi({ onlyIfStale });
if (resp.jobTriggered) {
return refreshJobStatus();
try {
const resp = await triggerUpdateTableMetaJobApi({ onlyIfStale });
if (resp.jobTriggered) {
return refreshJobStatus();
}
} catch {
// We don't need to do anything with additional errors right now.
}
},
[refreshJobStatus],
Expand Down

0 comments on commit 393c4f9

Please sign in to comment.