Skip to content

Commit

Permalink
Merge branch 'main' into jhen-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
jhen0409 committed Jan 13, 2025
2 parents ba19a63 + 407b2db commit 82d3ce6
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 26 deletions.
34 changes: 17 additions & 17 deletions .github/workflows/build-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,27 @@ on:
workflow_dispatch:
inputs:
upload-artifacts:
type: boolean
default: false
type: string
default: 'NO'
artifacts-retention-days:
type: number
default: 1
workflow_call:
inputs:
upload-artifacts:
type: boolean
default: false
type: string
default: 'YES'
artifacts-retention-days:
type: number
default: 3

jobs:
build-linux-x86_64:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
submodules: 'true'
submodules: "true"
- name: node_modules cache
uses: actions/[email protected]
with:
Expand All @@ -46,19 +46,19 @@ jobs:
bash ./scripts/prepare-linux.sh
bash ./scripts/build-linux.sh
- name: Upload build artifacts
if: inputs.upload-artifacts == 'true'
if: github.event.inputs.upload-artifacts == 'YES' || inputs.upload-artifacts == 'YES'
uses: actions/upload-artifact@v4
with:
name: bin-linux-x86_64
path: bin
retention-days: ${{ inputs.artifacts-retention-days }}

build-linux-arm64:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
submodules: 'true'
submodules: "true"
- name: node_modules cache
uses: actions/[email protected]
with:
Expand All @@ -72,7 +72,7 @@ jobs:
- uses: actions/[email protected]
with:
node-version: 20
cache: 'yarn'
cache: "yarn"
- name: Install dependencies
run: yarn install
- name: Setup QEMU
Expand All @@ -89,7 +89,7 @@ jobs:
arm64v8/ubuntu:latest \
bash -c "./scripts/prepare-linux.sh && ./scripts/build-linux.sh"
- name: Upload build artifacts
if: inputs.upload-artifacts == 'true'
if: github.event.inputs.upload-artifacts == 'YES' || inputs.upload-artifacts == 'YES'
uses: actions/upload-artifact@v4
with:
name: bin-linux-arm64
Expand All @@ -104,7 +104,7 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
submodules: 'true'
submodules: "true"
- name: node_modules cache
uses: actions/[email protected]
with:
Expand All @@ -118,13 +118,13 @@ jobs:
- uses: actions/[email protected]
with:
node-version: 20
cache: 'yarn'
cache: "yarn"
- name: Install dependencies
run: yarn install
- name: Build (macOS)
run: bash ./scripts/build-macos.sh
- name: Upload build artifacts
if: inputs.upload-artifacts == 'true'
if: github.event.inputs.upload-artifacts == 'YES' || inputs.upload-artifacts == 'YES'
uses: actions/upload-artifact@v4
with:
name: bin-${{ matrix.os }}
Expand All @@ -136,7 +136,7 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
submodules: 'true'
submodules: "true"
- name: node_modules cache
uses: actions/[email protected]
with:
Expand All @@ -150,15 +150,15 @@ jobs:
- uses: actions/[email protected]
with:
node-version: 20
cache: 'yarn'
cache: "yarn"
- name: Install dependencies
run: yarn install
- name: Prepare & build
run: |
powershell ./scripts/prepare-windows.ps1
powershell ./scripts/build-windows.ps1
- name: Upload build artifacts
if: inputs.upload-artifacts == 'true'
if: github.event.inputs.upload-artifacts == 'YES' || inputs.upload-artifacts == 'YES'
uses: actions/upload-artifact@v4
with:
name: bin-windows
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,12 @@ on:
jobs:
build:
uses: ./.github/workflows/build-release.yml
with:
upload-artifacts: true
artifacts-retention-days: 3
publish:
permissions:
contents: write
pull-requests: read
needs: build
runs-on: ubuntu-latest
if: startsWith(github.ref, 'refs/tags/v')
steps:
- uses: actions/checkout@v4
with:
Expand Down
3 changes: 2 additions & 1 deletion .release-it.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"$schema": "https://unpkg.com/release-it@17/schema/release-it.json",
"git": {
"commitMessage": "chore: release v${version}"
"commitMessage": "chore: release v${version}",
"requireCleanWorkingDir": false
},
"github": {
"release": true
Expand Down
22 changes: 21 additions & 1 deletion lib/binding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,31 @@ export type LlamaModelOptions = {
model: string
embedding?: boolean
embd_normalize?: number
pooling_type?: number
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
n_ctx?: number
n_batch?: number
n_ubatch?: number
n_threads?: number
n_gpu_layers?: number
flash_attn?: boolean
cache_type_k?:
| 'f16'
| 'f32'
| 'q8_0'
| 'q4_0'
| 'q4_1'
| 'iq4_nl'
| 'q5_0'
| 'q5_1'
cache_type_v?:
| 'f16'
| 'f32'
| 'q8_0'
| 'q4_0'
| 'q4_1'
| 'iq4_nl'
| 'q5_0'
| 'q5_1'
use_mlock?: boolean
use_mmap?: boolean
vocab_only?: boolean
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@fugood/llama.node",
"access": "public",
"version": "0.3.4",
"version": "0.3.6",
"description": "Llama.cpp for Node.js",
"main": "lib/index.js",
"scripts": {
Expand Down
41 changes: 39 additions & 2 deletions src/LlamaContext.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "ggml.h"
#include "LlamaContext.h"
#include "DetokenizeWorker.h"
#include "DisposeWorker.h"
Expand Down Expand Up @@ -60,6 +61,36 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
exports.Set("LlamaContext", func);
}

const std::vector<ggml_type> kv_cache_types = {
GGML_TYPE_F32,
GGML_TYPE_F16,
GGML_TYPE_BF16,
GGML_TYPE_Q8_0,
GGML_TYPE_Q4_0,
GGML_TYPE_Q4_1,
GGML_TYPE_IQ4_NL,
GGML_TYPE_Q5_0,
GGML_TYPE_Q5_1,
};

static ggml_type kv_cache_type_from_str(const std::string & s) {
for (const auto & type : kv_cache_types) {
if (ggml_type_name(type) == s) {
return type;
}
}
throw std::runtime_error("Unsupported cache type: " + s);
}

static int32_t pooling_type_from_str(const std::string & s) {
if (s == "none") return LLAMA_POOLING_TYPE_NONE;
if (s == "mean") return LLAMA_POOLING_TYPE_MEAN;
if (s == "cls") return LLAMA_POOLING_TYPE_CLS;
if (s == "last") return LLAMA_POOLING_TYPE_LAST;
if (s == "rank") return LLAMA_POOLING_TYPE_RANK;
return LLAMA_POOLING_TYPE_UNSPECIFIED;
}

// construct({ model, embedding, n_ctx, n_batch, n_threads, n_gpu_layers,
// use_mlock, use_mmap }): LlamaContext throws error
LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
Expand All @@ -83,18 +114,24 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)

params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
params.embedding = get_option<bool>(options, "embedding", false);
if (params.embedding) {
// For non-causal models, batch size must be equal to ubatch size
params.n_ubatch = params.n_batch;
}
params.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
int32_t pooling_type = get_option<int32_t>(options, "pooling_type", -1);
params.pooling_type = (enum llama_pooling_type) pooling_type;
params.pooling_type = (enum llama_pooling_type) pooling_type_from_str(
get_option<std::string>(options, "pooling_type", "").c_str()
);

params.cpuparams.n_threads =
get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
params.flash_attn = get_option<bool>(options, "flash_attn", false);
params.cache_type_k = kv_cache_type_from_str(get_option<std::string>(options, "cache_type_k", "f16").c_str());
params.cache_type_v = kv_cache_type_from_str(get_option<std::string>(options, "cache_type_v", "f16").c_str());

params.use_mlock = get_option<bool>(options, "use_mlock", false);
params.use_mmap = get_option<bool>(options, "use_mmap", true);
params.numa =
Expand Down

0 comments on commit 82d3ce6

Please sign in to comment.