Skip to content

Commit

Permalink
Various run execution fixes (incl massive memory bloat issue)
Browse files Browse the repository at this point in the history
This commit fixes various issues with run executions, including a pretty gnarly memory bloat issue when resuming a run that had a decent number of completed tasks (e.g. anything over a few). Other issues fixed:

- Run executions no longer are bound to a queue, which will allow more parallel runs in a single job (instead of 1).
- Serverless function timeouts (504) errors are now handled better, and no longer are retried using the graphile worker failure/retry mechanism (causing massive delays).
- Fixed the job_key design of the performRunExecutionV2 task, which will ensure resumed runs are executed
- Added a mechanism to measure the amount of execution time a given run has accrued, and added a maximum execution duration on the org to be able to limit total execution time for a single run
  • Loading branch information
ericallam committed Sep 5, 2023
1 parent bf6a2a0 commit 30ba73c
Show file tree
Hide file tree
Showing 10 changed files with 258 additions and 106 deletions.
2 changes: 1 addition & 1 deletion apps/webapp/app/components/run/RunCompletedDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ export function RunCompletedDetail({ run }: { run: MatchedRun }) {
<RunPanelDivider />
{run.error && <RunPanelError text={run.error.message} stackTrace={run.error.stack} />}
{run.output ? (
<CodeBlock language="json" code={run.output} maxLines={8} />
<CodeBlock language="json" code={run.output} maxLines={36} />
) : (
run.output === null && <Paragraph variant="small">This run returned nothing</Paragraph>
)}
Expand Down
6 changes: 4 additions & 2 deletions apps/webapp/app/models/jobRunExecution.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export type EnqueueRunExecutionV2Options = {
resumeTaskId?: string;
isRetry?: boolean;
skipRetrying?: boolean;
executionCount?: number;
};

export async function enqueueRunExecutionV2(
Expand All @@ -44,10 +45,11 @@ export async function enqueueRunExecutionV2(
isRetry: typeof options.isRetry === "boolean" ? options.isRetry : false,
},
{
queueName: `job:${run.jobId}:env:${run.environmentId}`,
tx,
runAt: options.runAt,
jobKey: `job_run:${run.id}`,
jobKey: `job_run:${run.id}:${options.executionCount ?? 0}${
options.resumeTaskId ? `:task:${options.resumeTaskId}` : ""
}`,
maxAttempts: options.skipRetrying ? 1 : undefined,
}
);
Expand Down
11 changes: 4 additions & 7 deletions apps/webapp/app/services/endpointApi.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
} from "@trigger.dev/core";
import { safeBodyFromResponse, safeParseBodyFromResponse } from "~/utils/json";
import { logger } from "./logger.server";
import { performance } from "node:perf_hooks";

export class EndpointApiError extends Error {
constructor(message: string, stack?: string) {
Expand All @@ -28,10 +29,7 @@ export class EndpointApiError extends Error {
}

export class EndpointApi {
constructor(
private apiKey: string,
private url: string
) {}
constructor(private apiKey: string, private url: string) {}

async ping(endpointId: string): Promise<PongResponse> {
const response = await safeFetch(this.url, {
Expand Down Expand Up @@ -165,9 +163,7 @@ export class EndpointApi {
}

async executeJobRequest(options: RunJobBody) {
logger.debug("executeJobRequest()", {
options,
});
const startTimeInMs = performance.now();

const response = await safeFetch(this.url, {
method: "POST",
Expand All @@ -183,6 +179,7 @@ export class EndpointApi {
response,
parser: RunJobResponseSchema,
errorParser: ErrorWithStackSchema,
durationInMs: Math.floor(performance.now() - startTimeInMs),
};
}

Expand Down
Loading

0 comments on commit 30ba73c

Please sign in to comment.