diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5a0f502e2287..2b9b4402fab2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,8 +43,8 @@ jobs: mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \ curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \ tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \ - sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \ sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \ + sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \ sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ - name: Test run: | diff --git a/Dockerfile b/Dockerfile index 5e39303a7753..f67a1f3ebf10 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,8 +63,8 @@ RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSIO mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \ curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH:-$(go env GOARCH)}${TARGETVARIANT}.tar.gz" | \ tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \ - cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \ cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \ + ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \ cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ # \ # ; fi diff --git a/api/api_test.go b/api/api_test.go index 147774dfb48c..2947842e427c 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -30,10 +30,10 @@ import ( ) type modelApplyRequest struct { - ID string `json:"id"` - URL string `json:"url"` - Name string `json:"name"` - Overrides map[string]string `json:"overrides"` + ID string `json:"id"` + URL string `json:"url"` + Name string `json:"name"` + Overrides map[string]interface{} `json:"overrides"` } func getModelStatus(url string) (response map[string]interface{}) { @@ -243,7 +243,7 @@ var _ = Describe("API test", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", Name: "bert", - Overrides: map[string]string{ + Overrides: map[string]interface{}{ "backend": "llama", }, }) @@ -269,7 +269,7 @@ var _ = Describe("API test", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml", Name: "bert", - Overrides: map[string]string{}, + Overrides: map[string]interface{}{}, }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) @@ -297,7 +297,7 @@ var _ = Describe("API test", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "github:go-skynet/model-gallery/openllama_3b.yaml", Name: "openllama_3b", - Overrides: map[string]string{"backend": "llama"}, + Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128}, }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) @@ -366,9 +366,8 @@ var _ = Describe("API test", func() { } response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - URL: "github:go-skynet/model-gallery/gpt4all-j.yaml", - Name: "gpt4all-j", - Overrides: map[string]string{}, + URL: "github:go-skynet/model-gallery/gpt4all-j.yaml", + Name: "gpt4all-j", }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) diff --git a/pkg/grpc/llm/llama/llama.go b/pkg/grpc/llm/llama/llama.go index 7d867813cbbd..18e481efe60c 100644 --- a/pkg/grpc/llm/llama/llama.go +++ b/pkg/grpc/llm/llama/llama.go @@ -17,9 +17,20 @@ type LLM struct { } func (llm *LLM) Load(opts *pb.ModelOptions) error { + + ropeFreqBase := float32(10000) + ropeFreqScale := float32(1) + + if opts.RopeFreqBase != 0 { + ropeFreqBase = opts.RopeFreqBase + } + if opts.RopeFreqScale != 0 { + ropeFreqScale = opts.RopeFreqScale + } + llamaOpts := []llama.ModelOption{ - llama.WithRopeFreqBase(opts.RopeFreqBase), - llama.WithRopeFreqScale(opts.RopeFreqScale), + llama.WithRopeFreqBase(ropeFreqBase), + llama.WithRopeFreqScale(ropeFreqScale), } if opts.ContextSize != 0 { @@ -58,6 +69,15 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error { } func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption { + ropeFreqBase := float32(10000) + ropeFreqScale := float32(1) + + if opts.RopeFreqBase != 0 { + ropeFreqBase = opts.RopeFreqBase + } + if opts.RopeFreqScale != 0 { + ropeFreqScale = opts.RopeFreqScale + } predictOptions := []llama.PredictOption{ llama.SetTemperature(opts.Temperature), llama.SetTopP(opts.TopP), @@ -65,8 +85,8 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption { llama.SetTokens(int(opts.Tokens)), llama.SetThreads(int(opts.Threads)), llama.WithGrammar(opts.Grammar), - llama.SetRopeFreqBase(opts.RopeFreqBase), - llama.SetRopeFreqScale(opts.RopeFreqScale), + llama.SetRopeFreqBase(ropeFreqBase), + llama.SetRopeFreqScale(ropeFreqScale), llama.SetNegativePromptScale(opts.NegativePromptScale), llama.SetNegativePrompt(opts.NegativePrompt), }