Skip to content

Commit

Permalink
Feat: bundle a default whisper model (#304)
Browse files Browse the repository at this point in the history
* add scripts to download whisper model & ffmpeg wasm for bundle

* use default whisper model if no downloaded
  • Loading branch information
an-lee authored Feb 12, 2024
1 parent b8011d2 commit 825031c
Show file tree
Hide file tree
Showing 13 changed files with 312 additions and 69 deletions.
28 changes: 0 additions & 28 deletions .github/workflows/playwright.yml

This file was deleted.

36 changes: 36 additions & 0 deletions .github/workflows/test-enjoy-app.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Test Enjoy App
on: workflow_dispatch
jobs:
test:
timeout-minutes: 60
runs-on: ${{ matrix.os }}
strategy:
matrix:
os:
[
macos-11,
macos-12,
macos-13,
macos-latest,
windows-latest,
ubuntu-latest,
]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 20
- name: Install dependencies
run: npm install -g yarn && yarn
- name: Install Playwright Browsers
run: yarn playwright install --with-deps
- name: Package
run: yarn package:enjoy
- name: Run Playwright tests
run: yarn test:enjoy
- uses: actions/upload-artifact@v3
if: always()
with:
name: playwright-report
path: playwright-report/
retention-days: 30
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,11 @@ package-lock.json
*/playwright-report/
*/blob-report/
*/playwright/.cache/

# whisper models
ggml-*.bin

# ffmpeg wasm
ffmpeg-core.wasm
ffmpeg-core.js
ffmpeg-core.worker.js
Empty file added enjoy/assets/libs/.keep
Empty file.
16 changes: 0 additions & 16 deletions enjoy/assets/libs/ffmpeg-core.js

This file was deleted.

Binary file removed enjoy/assets/libs/ffmpeg-core.wasm
Binary file not shown.
1 change: 0 additions & 1 deletion enjoy/assets/libs/ffmpeg-core.worker.js

This file was deleted.

6 changes: 5 additions & 1 deletion enjoy/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
"publish": "rimraf .vite && electron-forge publish",
"lint": "eslint --ext .ts,.tsx .",
"test": "playwright test",
"create-migration": "zx ./src/main/db/create-migration.mjs"
"create-migration": "zx ./src/main/db/create-migration.mjs",
"download-whisper-model": "zx ./scripts/download-whisper-model.mjs",
"download-ffmpeg-wasm": "zx ./scripts/download-ffmpeg-wasm.mjs",
"postinstall": "zx ./scripts/download-whisper-model.mjs && zx ./scripts/download-ffmpeg-wasm.mjs"
},
"keywords": [],
"author": {
Expand Down Expand Up @@ -56,6 +59,7 @@
"eslint-plugin-import": "^2.29.1",
"flora-colossus": "^2.0.0",
"octokit": "^3.1.2",
"progress": "^2.0.3",
"tailwind-merge": "^2.2.1",
"tailwindcss": "^3.4.1",
"tailwindcss-animate": "^1.0.7",
Expand Down
149 changes: 149 additions & 0 deletions enjoy/scripts/download-ffmpeg-wasm.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env zx

import axios from "axios";
import { createHash } from "crypto";
import { HttpsProxyAgent } from "https-proxy-agent";

console.log(chalk.blue("=> Download ffmpeg wasm files"));

const files = [
{
name: "ffmpeg-core.wasm",
md5: "ff1676d6a417d1162dba70dbe8dfd354",
},
{
name: "ffmpeg-core.worker.js",
md5: "09dc7f1cd71bb52bd9afc22afdf1f6da",
},
{
name: "ffmpeg-core.js",
md5: "30296628fd78e4ef1c939f36c1d31527",
},
];
const pendingFiles = [];
const dir = path.join(process.cwd(), "assets/libs");
fs.ensureDirSync(dir);

await Promise.all(
files.map(async (file) => {
try {
if (fs.statSync(path.join(dir, file.name)).isFile()) {
console.log(chalk.green(`=> File ${file.name} already exists`));

const hash = await hashFile(path.join(dir, file.name), { algo: "md5" });
if (hash === file.md5) {
console.log(chalk.green(`=> File ${file.name} MD5 match`));
} else {
console.log(
chalk.yellow(
`=> File ${file.name} MD5 not match, start to redownload`
)
);
fs.removeSync(path.join(dir, file.name));
pendingFiles.push(file);
}
} else {
pendingFiles.push(file);
}
} catch (err) {
if (err && err.code !== "ENOENT") {
console.log(chalk.red(`=> Error: ${err}`));
process.exit(1);
}
pendingFiles.push(file);
}
})
);

if (pendingFiles.length === 0) {
console.log(chalk.green("=> All files already exist"));
process.exit(0);
} else {
console.log(chalk.blue(`=> Start to download ${pendingFiles.length} files`));
}

const proxyUrl =
process.env.HTTPS_PROXY ||
process.env.https_proxy ||
process.env.HTTP_PROXY ||
process.env.http_proxy;

if (proxyUrl) {
const { hostname, port, protocol } = new URL(proxyUrl);
const httpsAgent = new HttpsProxyAgent(proxyUrl);
axios.defaults.proxy = {
host: hostname,
port: port,
protocol: protocol,
};
axios.defaults.httpsAgent = httpsAgent;
console.log(chalk.blue(`=> Use proxy: ${proxyUrl}`));
}

const download = async (url, dest, md5) => {
return spinner(async () => {
console.log(chalk.blue(`=> Start to download file ${url}`));
await axios
.get(url, {
responseType: "arraybuffer",
})
.then(async (response) => {
const data = Buffer.from(response.data, "binary");

fs.writeFileSync(dest, data);
const hash = await hashFile(dest, { algo: "md5" });
console.log(chalk.blue(`=> File ${dest}(MD5: ${hash})`));
if (hash === md5) {
console.log(chalk.green(`=> ${dest} downloaded successfully`));
} else {
console.log(
chalk.red(
`=> Error: ${dest} MD5 not match, ${hash} should be ${md5}`
)
);
process.exit(1);
}
})
.catch((err) => {
console.log(chalk.red(`=> Error: ${err}`));
process.exit(1);
});
});
};

function hashFile(file, options) {
const algo = options.algo || "md5";
return new Promise((resolve, reject) => {
const hash = createHash(algo);
const stream = fs.createReadStream(file);
stream.on("error", reject);
stream.on("data", (chunk) => hash.update(chunk));
stream.on("end", () => resolve(hash.digest("hex")));
});
}

const cleanup = () => {
files.forEach((file) => {
try {
fs.removeSync(path.join(dir, file.name));
} catch (err) {
console.log(chalk.red(`=> Error: ${err}`));
}
});
};

const baseURL = "https://unpkg.com/@ffmpeg/[email protected]/dist/esm";
try {
await Promise.all(
pendingFiles.map((file) =>
download(`${baseURL}/${file.name}`, path.join(dir, file.name), file.md5)
)
);
} catch (err) {
console.log(chalk.red(`=> Error: ${err}`));
cleanup();
process.exit(1);
}

console.log(chalk.green("=> All files downloaded successfully"));
process.exit(0);
96 changes: 96 additions & 0 deletions enjoy/scripts/download-whisper-model.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env zx

import axios from "axios";
import progress from "progress";
import { createHash } from "crypto";

const model = "ggml-base.en-q5_1.bin";
const md5 = "55309cc6613788f07ac7988985210734";

const dir = path.join(process.cwd(), "lib/whisper.cpp/models");

console.log(chalk.blue(`=> Download whisper model ${model}`));

fs.ensureDirSync(dir);
try {
if (fs.statSync(path.join(dir, model)).isFile()) {
console.log(chalk.green(`=> Model ${model} already exists`));
const hash = await hashFile(path.join(dir, model), { algo: "md5" });
if (hash === md5) {
console.log(chalk.green(`=> Model ${model} MD5 match`));
process.exit(0);
} else {
console.log(
chalk.red(`=> Model ${model} MD5 not match, start to redownload`)
);
fs.removeSync(path.join(dir, model));
}
}
} catch (err) {
if (err && err.code !== "ENOENT") {
console.log(chalk.red(`=> Error: ${err}`));
process.exit(1);
} else {
console.log(chalk.blue(`=> Start to download model ${model}`));
}
}

const proxyUrl =
process.env.HTTPS_PROXY ||
process.env.https_proxy ||
process.env.HTTP_PROXY ||
process.env.http_proxy;

if (proxyUrl) {
const { hostname, port, protocol } = new URL(proxyUrl);
axios.defaults.proxy = {
host: hostname,
port: port,
protocol: protocol,
};
}

const modelUrlPrefix =
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main";

function hashFile(path, options) {
const algo = options.algo || "md5";
return new Promise((resolve, reject) => {
const hash = createHash(algo);
const stream = fs.createReadStream(path);
stream.on("error", reject);
stream.on("data", (chunk) => hash.update(chunk));
stream.on("end", () => resolve(hash.digest("hex")));
});
}

const download = async (url, dest) => {
return axios
.get(url, { responseType: "stream" })
.then((response) => {
const totalLength = response.headers["content-length"];

const progressBar = new progress(`-> downloading [:bar] :percent :etas`, {
width: 40,
complete: "=",
incomplete: " ",
renderThrottle: 1,
total: parseInt(totalLength),
});

response.data.on("data", (chunk) => {
progressBar.tick(chunk.length);
});

response.data.pipe(fs.createWriteStream(dest)).on("close", () => {
console.log(chalk.green(`=> Model ${model} downloaded successfully`));
process.exit(0);
});
})
.catch((err) => {
console.log(chalk.red(`=> Error: ${err}`));
process.exit(1);
});
};

await download(`${modelUrlPrefix}/${model}`, path.join(dir, model));
Loading

0 comments on commit 825031c

Please sign in to comment.