From 1693bda778a21adb8fcd4232e74e0ddf8fdf84b2 Mon Sep 17 00:00:00 2001 From: Kevin Bulteel Date: Fri, 22 Mar 2024 14:47:28 +0100 Subject: [PATCH] api: better handle static file streams --- package-lock.json | 12 ++++ packages/@hec.js/api/lib/src/api.js | 6 +- packages/@hec.js/api/lib/src/files/local.js | 69 +++++++++++++------ packages/@hec.js/api/lib/src/pages/local.js | 26 ++++--- packages/@hec.js/api/lib/src/routing/serve.js | 5 ++ .../api/lib/types/src/pages/local.d.ts | 9 +-- packages/@hec.js/api/package.json | 5 +- packages/@hec.js/api/test/files.test.js | 3 +- 8 files changed, 98 insertions(+), 37 deletions(-) diff --git a/package-lock.json b/package-lock.json index 2da347e..3c72fee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3801,6 +3801,17 @@ "through2": "^2.0.0" } }, + "node_modules/md5-file": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/md5-file/-/md5-file-5.0.0.tgz", + "integrity": "sha512-xbEFXCYVWrSx/gEKS1VPlg84h/4L20znVIulKw6kMfmBUAZNAnF00eczz9ICMl+/hjQGo5KSXRxbL/47X3rmMw==", + "bin": { + "md5-file": "cli.js" + }, + "engines": { + "node": ">=10.13.0" + } + }, "node_modules/mdn-data": { "version": "2.0.30", "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz", @@ -6802,6 +6813,7 @@ "license": "MIT", "dependencies": { "@hec.js/api": "file:./", + "md5-file": "^5.0.0", "mime-types": "^2.1.35", "urlpattern-polyfill": "^9.0.0" } diff --git a/packages/@hec.js/api/lib/src/api.js b/packages/@hec.js/api/lib/src/api.js index 7098f55..743b4e7 100644 --- a/packages/@hec.js/api/lib/src/api.js +++ b/packages/@hec.js/api/lib/src/api.js @@ -62,6 +62,10 @@ export class API { apiRequest = request; } + if (!apiRequest.path.startsWith('/.well-known') && apiRequest.path.match(/\/[_.]/)) { + return new Response(null, { status: 403 }); + } + const context = { status: 404, url: null, @@ -125,7 +129,7 @@ export class API { } } } - } + } findRoutes(apiRequest, this.#routes); diff --git a/packages/@hec.js/api/lib/src/files/local.js b/packages/@hec.js/api/lib/src/files/local.js index 07a16ff..17ddd1a 100644 --- a/packages/@hec.js/api/lib/src/files/local.js +++ b/packages/@hec.js/api/lib/src/files/local.js @@ -1,6 +1,8 @@ -import { stat, readFile } from 'fs/promises'; -import { lookup } from 'mime-types'; -import path from 'path'; +import { stat, open } from 'fs/promises'; +import { lookup } from 'mime-types'; +import path from 'path'; +import md5 from 'md5-file'; +import { ReadStream } from 'fs'; /** @typedef {{ etag?: string, 'last-modified'?: string } & { [key: string]: string }} FileInfo */ @@ -51,37 +53,60 @@ export function files(options = {}) { return new Response(null, { status: 404 }); } - const data = await readFile(filePath), - etag = `"${Buffer.from(await crypto.subtle.digest('SHA-1', data)).toString('hex')}"`, + const etag = `W/"${ await md5(filePath) }"`, size = fileStats.size, lastModified = new Date(fileStats.mtime).toUTCString(), range = parseRangeHeader(request, size); - if (headers.has('if-range') && ifRange != lastModified && !ifRange?.includes(etag)) { + if (headers.has('if-range') && ifRange !== lastModified && ifRange !== etag) { headers.delete('range'); } const responseHeaders = { - 'content-type' : lookup(filePath) || 'application/octect-stream', - 'content-size' : (range.end - range.start).toString(), - 'cache-control' : options.cacheControl, - 'last-modified' : lastModified, - 'etag' : etag, - 'accept-ranges' : 'bytes', - 'content-range' : `bytes ${range.start}-${range.end - 1}/${size}` + 'content-type' : lookup(filePath) || 'application/octect-stream', + 'content-length' : (range.end - range.start + 1).toString(), + 'cache-control' : options.cacheControl, + 'last-modified' : lastModified, + 'etag' : etag, + 'accept-ranges' : 'bytes', }; + if (request.headers.has('range')) { + responseHeaders['content-range'] = `bytes ${range.start}-${range.end}/${size}`; + } + if (ifNoneMatch === etag || ifModifiedSince === lastModified) { return new Response(null, { status: 304, headers: responseHeaders }); } - cache.set(filePath, responseHeaders); + if (options.cacheDuration) { + cache.set(filePath, responseHeaders); + setTimeout(() => cache.delete(filePath), options.cacheDuration); + } + + const file = await open(filePath), + fileStream = file.createReadStream(range); - setTimeout(() => cache.delete(filePath), options.cacheDuration); + return new Response(new ReadableStream({ + start(stream) { + let isClosed = false; - return new Response( - headers.has('range') ? data.subarray(range.start, range.end) : data, - { + const end = async () => { + if (!isClosed) { + isClosed = true; + stream.close(); + fileStream.close(); + await file.close(); + } + } + + request.signal.addEventListener('abort', () => end()); + + fileStream.on('data', (data) => stream.enqueue(data)); + fileStream.on('close', () => end()); + fileStream.on('error', () => end()); + } + }), { status : headers.has('range') ? 206 : 200, headers : responseHeaders } @@ -100,7 +125,11 @@ export function files(options = {}) { */ function parseRangeHeader(request, size) { const r = request.headers.get('range'), - s = r ? r.substring(6).split('-').map(parseInt) : [0, size]; + m = size - 1, + s = r ? r.substring(6).split('-').map(e => parseInt(e)) : [0, m]; - return { start : s[0] || 0, end: Math.min(s[1] || size, size) }; + return { + start: s[0] || 0, + end: Math.min(s[1] || m, m) + }; } \ No newline at end of file diff --git a/packages/@hec.js/api/lib/src/pages/local.js b/packages/@hec.js/api/lib/src/pages/local.js index 23b9e40..c4a0f2a 100644 --- a/packages/@hec.js/api/lib/src/pages/local.js +++ b/packages/@hec.js/api/lib/src/pages/local.js @@ -6,15 +6,17 @@ import path from 'path'; * fileProvider: (request: Request) => Promise, * indexes?: string[], * index?: string, - * errorPages?: { [key: number]: string } + * errorPages?: { [key: number]: string }, + * cacheControl?: string * }} options * * @returns { (request: Request) => Promise } * * @description - * Option `fileProvder` is called to retrieve a file - * Option `index` if this is set, it will serve all requests that don't match a file - * Option `indexes` is used to append strings the and of a url if it's not found. + * Option `fileProvder` is called to retrieve a file + * Option `index` if this is set, it will serve all requests that don't match a file + * Option `indexes` is used to append strings the and of a url if it's not found. + * Option `cacheControl` is used to set a `cache-control` header to the `html` responses. * Example using ['.html', 'index.html']: * - request: `/foobar` => /foobar.html * - request: `/foobar/` => /foobar/index.html @@ -23,8 +25,9 @@ import path from 'path'; * Example: { 404: '/404.html' } */ export function pages(options) { - options.directory ??= '.'; - options.indexes ??= ['.html', 'index.html']; + options.directory ??= '.'; + options.indexes ??= ['.html', 'index.html']; + options.cacheControl ??= 'no-cache'; const fileProvider = options.fileProvider; @@ -41,16 +44,21 @@ export function pages(options) { response = await options.fileProvider(new Request(request.url + index, request)); if (response.ok) { - response.headers.set('cache-control', 'no-cache'); + response.headers.set('cache-control', options.cacheControl); return response; } } if (options.index) { - return options.fileProvider(new Request(origin + options.index)); - } + response = await options.fileProvider(new Request(origin + options.index)); + if (response.ok) { + response.headers.set('cache-control', options.cacheControl); + } + + return response; + } } if (!response.ok) { diff --git a/packages/@hec.js/api/lib/src/routing/serve.js b/packages/@hec.js/api/lib/src/routing/serve.js index 4ce8596..ebdfe99 100644 --- a/packages/@hec.js/api/lib/src/routing/serve.js +++ b/packages/@hec.js/api/lib/src/routing/serve.js @@ -18,10 +18,15 @@ export function serveBy(fetch) { req.headers['cf-connecting-ip'] ??= req.socket.remoteAddress; req.headers['x-real-ip'] ??= req.socket.remoteAddress; + const abort = new AbortController(); + + req.on('close', () => abort.abort()); + fetch( new Request(`${ scheme }://${ req.headers.host }${ req.url }`, { method: req.method, duplex: 'half', + signal: abort.signal, // @ts-ignore headers: req.headers, body: ['HEAD', 'GET', 'OPTIONS'].includes(req.method) ? null : diff --git a/packages/@hec.js/api/lib/types/src/pages/local.d.ts b/packages/@hec.js/api/lib/types/src/pages/local.d.ts index b593bd7..3fe47bb 100644 --- a/packages/@hec.js/api/lib/types/src/pages/local.d.ts +++ b/packages/@hec.js/api/lib/types/src/pages/local.d.ts @@ -1,12 +1,11 @@ /** - * @template T - * * @param {{ * directory?: string, * fileProvider: (request: Request) => Promise, * indexes?: string[], * index?: string, - * errorPages?: { [key: number]: string } + * errorPages?: { [key: number]: string }, + * cacheControl?: string * }} options * * @returns { (request: Request) => Promise } @@ -15,6 +14,7 @@ * Option `fileProvder` is called to retrieve a file * Option `index` if this is set, it will serve all requests that don't match a file * Option `indexes` is used to append strings the and of a url if it's not found. + * Option `cacheControl` is used to set a `cache-control` header to the `html` responses. * Example using ['.html', 'index.html']: * - request: `/foobar` => /foobar.html * - request: `/foobar/` => /foobar/index.html @@ -22,7 +22,7 @@ * Option `errorPages` is used to determine a HTML page for a given error status code. * Example: { 404: '/404.html' } */ -export function pages(options: { +export function pages(options: { directory?: string; fileProvider: (request: Request) => Promise; indexes?: string[]; @@ -30,4 +30,5 @@ export function pages(options: { errorPages?: { [key: number]: string; }; + cacheControl?: string; }): (request: Request) => Promise; diff --git a/packages/@hec.js/api/package.json b/packages/@hec.js/api/package.json index 0c931d9..e24200c 100644 --- a/packages/@hec.js/api/package.json +++ b/packages/@hec.js/api/package.json @@ -11,7 +11,8 @@ }, "dependencies": { "@hec.js/api": "file:./", - "urlpattern-polyfill": "^9.0.0", - "mime-types": "^2.1.35" + "md5-file": "^5.0.0", + "mime-types": "^2.1.35", + "urlpattern-polyfill": "^9.0.0" } } diff --git a/packages/@hec.js/api/test/files.test.js b/packages/@hec.js/api/test/files.test.js index 756cfb6..23924c0 100644 --- a/packages/@hec.js/api/test/files.test.js +++ b/packages/@hec.js/api/test/files.test.js @@ -19,7 +19,8 @@ api.route({ path: '/*', fetch: files({ directory: './packages/@hec.js/api/test/assets', - cacheControl: 'private, max-age=0' + cacheControl: 'private, max-age=0', + cacheDuration: 0 }) });