Skip to content

Commit

Permalink
Infer SSG worker thread count
Browse files Browse the repository at this point in the history
  • Loading branch information
slorber committed Jan 17, 2025
1 parent a472e20 commit 3fe6044
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 15 deletions.
5 changes: 4 additions & 1 deletion packages/docusaurus/src/ssg/ssg.ts
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,9 @@ It might also require to wrap your client code in ${logger.code(
export default async function worker(arg: {
pathnames: string[];
params: SSGParams;
worker: number;
}): Promise<{collectedData: SiteCollectedData}> {
return generateStaticFiles(arg);
return PerfLogger.async(`SSG Worker ${arg.worker}`, () =>
generateStaticFiles(arg),
);
}
79 changes: 66 additions & 13 deletions packages/docusaurus/src/ssg/ssgExecutor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {PerfLogger} from '@docusaurus/logger';
import {generateStaticFiles} from './ssg';
import {createSSGParams} from './ssgParams';
import {renderHashRouterTemplate} from './ssgTemplate';
import {generateHashRouterEntrypoint} from './ssgUtils';
import {generateHashRouterEntrypoint, SSGWorkerThreads} from './ssgUtils';
import type {Props, RouterType} from '@docusaurus/types';
import type {SiteCollectedData} from '../common';
import type {GenerateStaticFilesResult} from './ssg';
Expand Down Expand Up @@ -42,11 +42,13 @@ const createSimpleSSGExecutor: CreateSSGExecutor = async ({
}) => {
return {
run: () => {
return PerfLogger.async('Generate static files', () =>
generateStaticFiles({
pathnames,
params,
}),
return PerfLogger.async(
'Generate static files - Using current worker thread',
() =>
generateStaticFiles({
pathnames,
params,
}),
);
},

Expand All @@ -56,14 +58,65 @@ const createSimpleSSGExecutor: CreateSSGExecutor = async ({
};
};

// Sensible default that gives decent performances
// It's hard to have a perfect formula that works for all hosts
// Each thread has some creation overhead
// Having 1 thread per cpu doesn't necessarily improve perf on small sites
// We want to ensure that we don't create a worker thread for less than x paths
function inferNumberOfThreads({
pageCount,
cpuCount,
minPagesPerCpu,
maxThreads,
}: {
pageCount: number;
cpuCount: number;
minPagesPerCpu: number;
maxThreads: number;
}) {
// Calculate "ideal" amount of threads based on the number of pages to render
const threadsByWorkload = Math.ceil(pageCount / minPagesPerCpu);
// Use the smallest of threadsByWorkload or cpuCount, ensuring min=1 thread
const threads = Math.max(1, Math.min(threadsByWorkload, cpuCount));
return Math.min(maxThreads, threads);
}

function getNumberOfThreads(pathnames: string[]) {
if (typeof SSGWorkerThreads !== 'undefined') {
return SSGWorkerThreads;
}
return inferNumberOfThreads({
pageCount: pathnames.length,
cpuCount: os.cpus().length,

// These are "magic value" that we should refine based on user feedback
// Local tests show that it's not worth spawning new workers for few pages
minPagesPerCpu: 100,
// Local tests show that even if there are many CPUs and pages
// Using too many threads decrease performance, probably because of IOs
maxThreads: 8,
});
}

const createPooledSSGExecutor: CreateSSGExecutor = async ({
params,
pathnames,
}) => {
// TODO make this configurable
// Sensible default that gives the best improvement so far:
const numberOfThreads = os.cpus().length / 2;
const numberOfThreads = getNumberOfThreads(pathnames);

// When the inferred or provided number of threads is just 1
// It's not worth it to use a thread pool
// This also allows users to disable the thread pool with the env variable
// DOCUSAURUS_SSG_WORKER_THREADS=1
if (numberOfThreads === 1) {
return createSimpleSSGExecutor({params, pathnames});
}

// TODO this is not ideal for performance
// Some chunks may contain more expensive pages
// and we have to wait for the slowest chunk to finish to complete SSG
// There can be a significant time lapse between the fastest/slowest worker
const pathnamesChunks = _.chunk(
pathnames,
Math.ceil(pathnames.length / numberOfThreads),
Expand All @@ -86,7 +139,7 @@ const createPooledSSGExecutor: CreateSSGExecutor = async ({
return {
run: async () => {
const results = await PerfLogger.async(
'Generate static files - pooled',
'Generate static files - Using worker threads pool',
async () => {
return Promise.all(
pathnamesChunks.map((chunk, chunkIndex) => {
Expand All @@ -96,6 +149,7 @@ const createPooledSSGExecutor: CreateSSGExecutor = async ({
return pool.run({
pathnames: chunk,
params,
worker: chunkIndex + 1,
}) as Promise<GenerateStaticFilesResult>;
},
);
Expand Down Expand Up @@ -138,10 +192,9 @@ export async function executeSSG({
return {collectedData: {}};
}

const createExecutor =
process.env.DOCUSAURUS_DISABLE_SSG_POOL === 'true'
? createSimpleSSGExecutor
: createPooledSSGExecutor;
const createExecutor = props.siteConfig.future.v4.enableSSGWorkerThreads
? createPooledSSGExecutor
: createSimpleSSGExecutor;

const executor = await createExecutor({params, pathnames: props.routesPaths});

Expand Down
9 changes: 8 additions & 1 deletion packages/docusaurus/src/ssg/ssgUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import fs from 'fs-extra';
import path from 'path';
import type {SSGParams} from './ssgParams';

// Secret way to set SSR plugin concurrency option
// Secret way to set SSR plugin async concurrency option
// Waiting for feedback before documenting this officially?
export const SSGConcurrency = process.env.DOCUSAURUS_SSR_CONCURRENCY
? parseInt(process.env.DOCUSAURUS_SSR_CONCURRENCY, 10)
Expand All @@ -18,6 +18,13 @@ export const SSGConcurrency = process.env.DOCUSAURUS_SSR_CONCURRENCY
// See also https://github.com/sindresorhus/p-map/issues/24
32;

// Secret way to set SSR plugin async concurrency option
// Waiting for feedback before documenting this officially?
export const SSGWorkerThreads: number | undefined = process.env
.DOCUSAURUS_SSG_WORKER_THREADS
? parseInt(process.env.DOCUSAURUS_SSG_WORKER_THREADS, 10)
: undefined;

function pathnameToFilename({
pathname,
trailingSlash,
Expand Down

0 comments on commit 3fe6044

Please sign in to comment.