Skip to content

Commit

Permalink
llama.cpp and ipex run alternate (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielHirschTNG authored and mschuettlerTNG committed Nov 25, 2024
1 parent b2db5c1 commit 2f2398d
Show file tree
Hide file tree
Showing 18 changed files with 142 additions and 24 deletions.
11 changes: 11 additions & 0 deletions WebUI/electron/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,10 @@ function initEventHandle() {
return pathsManager.scanLLMModles(false);
});

ipcMain.handle("getDownloadedGGUFLLMs", (event) => {
return pathsManager.scanGGUFLLMModels(false);
});

ipcMain.handle("getDownloadedEmbeddingModels", (event) => {
return pathsManager.scanEmbedding(false);
});
Expand Down Expand Up @@ -577,6 +581,13 @@ function wakeupApiService() {
const wordkDir = path.resolve(app.isPackaged ? path.join(process.resourcesPath, "service") : path.join(__dirname, "../../../service"));
const comfyWordkDir = path.resolve(app.isPackaged ? path.join(process.resourcesPath, "ComfyUI") : path.join(__dirname, "../../../ComfyUI"));
const baseDir = app.isPackaged ? process.resourcesPath : path.join(__dirname, "../../../");

// replace `torchvision.transforms.functional_tensor` with `torchvision.transforms.functional` in `degradations.py`
const basicSrPathFileToBePatched = path.resolve(path.join(baseDir, "env/Lib/site-packages/basicsr/data/degradations.py"));
const fileContent = fs.readFileSync(basicSrPathFileToBePatched, 'utf8');
const patchedContent = fileContent.replace('torchvision.transforms.functional_tensor', 'torchvision.transforms.functional');
fs.writeFileSync(basicSrPathFileToBePatched, patchedContent, 'utf8');

const pythonExe = path.resolve(path.join(baseDir, "env/python.exe"));
const additionalEnvVariables = {
"SYCL_ENABLE_DEFAULT_CONTEXTS": "1",
Expand Down
23 changes: 23 additions & 0 deletions WebUI/electron/pathsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import path from "node:path";
export class PathsManager {
modelPaths: ModelPaths = {
llm: "",
ggufLLM: "",
embedding: "",
stableDiffusion: "",
inpaint: "",
Expand Down Expand Up @@ -119,6 +120,28 @@ export class PathsManager {
}
return models
}
scanGGUFLLMModels(returnDefaults = true) {
const models = returnDefaults ? [
"lanok/Meta-Llama-3.1-8B-Instruct-Q5_K_M-GGUF",
] : [];
const dir = this.modelPaths.ggufLLM;
if (fs.existsSync(dir)) {
const modelsSet = new Set(models);
fs.readdirSync(dir).forEach(pathname => {
if (pathname.endsWith(".gguf")) {
const modelName = pathname;
if (!modelsSet.has(modelName)) {
modelsSet.add(modelName)
models.push(modelName)
}
}
});
}
else {
fs.mkdirSync(dir, { recursive: true });
}
return models
}
scanLora(returnDefaults = true) {
const models = returnDefaults ? [
"None",
Expand Down
1 change: 1 addition & 0 deletions WebUI/electron/preload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ contextBridge.exposeInMainWorld("electronAPI", {
getDownloadedInpaintModels: () => ipcRenderer.invoke("getDownloadedInpaintModels"),
getDownloadedLoras: () => ipcRenderer.invoke("getDownloadedLoras"),
getDownloadedLLMs: () => ipcRenderer.invoke("getDownloadedLLMs"),
getDownloadedGGUFLLMs: () => ipcRenderer.invoke("getDownloadedGGUFLLMs"),
getDownloadedEmbeddingModels: () => ipcRenderer.invoke("getDownloadedEmbeddingModels"),
openImageWithSystem: (url: string) => ipcRenderer.send("openImageWithSystem", url),
selecteImage: (url: string) => ipcRenderer.send("selecteImage", url),
Expand Down
1 change: 1 addition & 0 deletions WebUI/external/model_config.dev.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"llm": "../service/models/llm/checkpoints",
"ggufLLM": "../service/models/llm/gguf",
"embedding": "../service/models/llm/embedding",
"stableDiffusion": "../service/models/stable_diffusion/checkpoints",
"inpaint": "../service/models/stable_diffusion/inpaint",
Expand Down
1 change: 1 addition & 0 deletions WebUI/src/assets/i18n/en-US.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"SETTINGS_TAB_BASIC": "Basic Settings",
"SETTINGS_TAB_MODEL": "Models",
"SETTINGS_INFERENCE_DEVICE": "Inference Device",
"SETTINGS_LLM_BACKEND": "LLM Backend",
"SETTINGS_MODEL_IMAGE_SIZE": "Image Size",
"SETTINGS_MODEL_IMAGE_RESOLUTION": "Image Resolution",
"SETTINGS_MODEL_IMAGE_RESOLUTION_STRANDARD": "Standard",
Expand Down
1 change: 1 addition & 0 deletions WebUI/src/assets/i18n/ko.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"SETTINGS_TAB_BASIC": "기본 설정",
"SETTINGS_TAB_MODEL": "모델",
"SETTINGS_INFERENCE_DEVICE": "추론 장치",
"SETTINGS_LLM_BACKEND": "대규모 언어 모델 백엔드",
"SETTINGS_MODEL_IMAGE_RESOLUTION": "이미지 해상도",
"SETTINGS_MODEL_IMAGE_RESOLUTION_STRANDARD": "표준",
"SETTINGS_MODEL_IMAGE_RESOLUTION_HD": "HD",
Expand Down
1 change: 1 addition & 0 deletions WebUI/src/assets/i18n/zh-CN.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"SETTINGS_TAB_BASIC": "基本设置",
"SETTINGS_TAB_MODEL": "模型",
"SETTINGS_INFERENCE_DEVICE": "推理设备",
"SETTINGS_LLM_BACKEND": "大语言模型后端",
"SETTINGS_MODEL_IMAGE_SIZE": "图片大小",
"SETTINGS_MODEL_IMAGE_RESOLUTION": "图片分辨率",
"SETTINGS_MODEL_IMAGE_RESOLUTION_STRANDARD": "标准",
Expand Down
8 changes: 8 additions & 0 deletions WebUI/src/assets/js/store/globalSetup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export const useGlobalSetup = defineStore("globalSetup", () => {
quality: 0,
enableRag: false,
llm_model: "microsoft/Phi-3-mini-4k-instruct",
ggufLLM_model: "meta-llama-3.1-8b-instruct.Q5_K_M.gguf",
sd_model: "Lykon/dreamshaper-8",
inpaint_model: "Lykon/dreamshaper-8-inpainting",
negativePrompt: "bad hands, nsfw",
Expand All @@ -45,6 +46,7 @@ export const useGlobalSetup = defineStore("globalSetup", () => {

const paths = ref<ModelPaths>({
llm: "",
ggufLLM: "",
embedding: "",
stableDiffusion: "",
inpaint: "",
Expand All @@ -62,6 +64,10 @@ export const useGlobalSetup = defineStore("globalSetup", () => {

const graphicsList = ref(new Array<GraphicsItem>());

const llmBackends = ref(new Array<string>("IPEX-LLM", "LLAMA.CPP"));

const currentLLMBackend = ref("IPEX-LLM");

let envType = "";

const loadingState = ref("loading");
Expand Down Expand Up @@ -312,6 +318,8 @@ export const useGlobalSetup = defineStore("globalSetup", () => {
paths,
apiHost,
graphicsList,
llmBackends,
currentLLMBackend,
loadingState,
errorMessage,
hdPersistentConfirmation,
Expand Down
16 changes: 11 additions & 5 deletions WebUI/src/assets/js/store/models.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { defineStore } from "pinia";

type ModelType = "llm" | "embedding" | "stableDiffusion" | "inpaint" | "lora" | "vae";
type ModelType = "llm" | "embedding" | "stableDiffusion" | "inpaint" | "lora" | "vae" | "ggufLLM";

export type Model = {
name: string;
Expand All @@ -15,36 +15,41 @@ const predefinedModels: Model[] = [
{ name: 'mistralai/Mistral-7B-Instruct-v0.3', type: 'llm', downloaded: false },
// { name: 'google/gemma-7b', type: 'llm', downloaded: false },
// { name: 'THUDM/chatglm3-6b', type: 'llm', downloaded: false },
]
{ name: 'meta-llama-3.1-8b-instruct.Q5_K_M.gguf', type: 'ggufLLM', downloaded: false },
{ name: 'smollm2-1.7b-instruct-q4_k_m.gguf', type: 'ggufLLM', downloaded: false },
]

export const useModels = defineStore("models", () => {

const hfToken = ref<string | undefined>(undefined);
const models = ref(predefinedModels);
const llms = computed(() => models.value.filter(m => m.type === 'llm'));

const downloadList = ref<DownloadModelParam[]>([]);
const ggufLLMs = computed(() => models.value.filter(m => m.type === 'ggufLLM'));

async function refreshModels() {
const sdModels = await window.electronAPI.getDownloadedDiffusionModels();
const llmModels = await window.electronAPI.getDownloadedLLMs();
const ggufModels = await window.electronAPI.getDownloadedGGUFLLMs();
const loraModels = await window.electronAPI.getDownloadedLoras();
const inpaintModels = await window.electronAPI.getDownloadedInpaintModels();
const embeddingModels = await window.electronAPI.getDownloadedEmbeddingModels();

const downloadedModels = [
...sdModels.map<Model>(name => ({ name, type: 'stableDiffusion', downloaded: true })),
...llmModels.map<Model>(name => ({ name, type: 'llm', downloaded: true })),
...ggufModels.map<Model>(name => ({ name, type: 'ggufLLM', downloaded: true })),
...loraModels.map<Model>(name => ({ name, type: 'lora', downloaded: true })),
...inpaintModels.map<Model>(name => ({ name, type: 'inpaint', downloaded: true })),
...embeddingModels.map<Model>(name => ({ name, type: 'embedding', downloaded: true })),
];
const notYetDownloaded = (model: Model) => !downloadedModels.map(m => m.name).includes(model.name);

models.value = [...downloadedModels, ...predefinedModels.filter(notYetDownloaded)];

console.log(models);

}
}

async function download(models: DownloadModelParam[]) {
};
Expand All @@ -53,6 +58,7 @@ export const useModels = defineStore("models", () => {
return {
models,
llms,
ggufLLMs,
hfToken,
hfTokenIsValid: computed(() => hfToken.value?.startsWith('hf_')),
downloadList,
Expand Down
1 change: 1 addition & 0 deletions WebUI/src/env.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type electronAPI = {
getDownloadedInpaintModels(): Promise<string[]>,
getDownloadedLoras(): Promise<string[]>,
getDownloadedLLMs(): Promise<string[]>,
getDownloadedGGUFLLMs(): Promise<string[]>,
getDownloadedEmbeddingModels(): Promise<string[]>,
openImageWithSystem(url: string): void,
selecteImage(url: string): void,
Expand Down
19 changes: 16 additions & 3 deletions WebUI/src/views/Answer.vue
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@
<div class="flex flex-col gap-2 flex-auto h-full">
<div class="flex items-center justify-between gap-5 text-white px-2">
<div class="flex items-center">
<drop-selector :array="models.llms" @change="changeLLMModel" class="w-96">

<drop-selector v-if="globalSetup.currentLLMBackend === 'IPEX-LLM'" :array="models.llms" @change="changeLLMModel" class="w-96">
{{ console.log('models.llms', models.llms) }}
<template #selected>
<model-drop-down-item
:model="models.llms.find((m) => m.name === globalSetup.modelSettings.llm_model)"></model-drop-down-item>
Expand All @@ -104,6 +106,15 @@
<model-drop-down-item :model="slotItem.item"></model-drop-down-item>
</template>
</drop-selector>
<drop-selector v-if="globalSetup.currentLLMBackend === 'LLAMA.CPP'" :array="models.ggufLLMs" @change="" class="w-96">
<template #selected>
<model-drop-down-item
:model="models.ggufLLMs.find((m) => m.name === globalSetup.modelSettings.ggufLLM_model)"></model-drop-down-item>
</template>
<template #list="slotItem">
<model-drop-down-item :model="slotItem.item"></model-drop-down-item>
</template>
</drop-selector>
<button class="svg-icon i-refresh w-5 h-5 text-purple-500 flex-none ml-1"
@animationend="removeRonate360" @click="refreshLLMModles"></button>
<!-- <button
Expand Down Expand Up @@ -330,7 +341,8 @@ async function updateTitle(conversation: ChatItem[]) {
device: globalSetup.modelSettings.graphics,
prompt: chatContext,
enable_rag: false,
model_repo_id: globalSetup.modelSettings.llm_model,
model_repo_id: globalSetup.currentLLMBackend === 'IPEX-LLM' ? globalSetup.modelSettings.llm_model : globalSetup.modelSettings.ggufLLM_model,
backend_type: globalSetup.currentLLMBackend,
print_metrics: false
};
const response = await fetch(`${ globalSetup.apiHost }/api/llm/chat`, {
Expand Down Expand Up @@ -454,7 +466,8 @@ async function generate(chatContext: ChatItem[]) {
device: globalSetup.modelSettings.graphics,
prompt: chatContext,
enable_rag: ragData.enable,
model_repo_id: globalSetup.modelSettings.llm_model
model_repo_id: globalSetup.currentLLMBackend === 'IPEX-LLM' ? globalSetup.modelSettings.llm_model : globalSetup.modelSettings.ggufLLM_model,
backend_type: globalSetup.currentLLMBackend
};
const response = await fetch(`${globalSetup.apiHost}/api/llm/chat`, {
method: "POST", headers: {
Expand Down
22 changes: 22 additions & 0 deletions WebUI/src/views/AppSettings.vue
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,25 @@
<div v-show="tabIndex == 1" class="flex-auto h-0 flex flex-col gap-5 pt-3 border-t border-color-spilter">
<div class="px-3 flex-none flex flex-col gap-3">
<SettingsUi></SettingsUi>
<div class="flex flex-col gap-2">
<p>{{ languages.SETTINGS_LLM_BACKEND }}</p>
<div class="flex items-center gap-2">
<drop-selector :array="globalSetup.llmBackends" @change="changeLLMBackend">
<template #selected>
<div class="flex gap-2 items-center">
<span class="rounded-full bg-green-500 w-2 h-2"></span>
<span>{{ globalSetup.currentLLMBackend }}</span>
</div>
</template>
<template #list="slotItem">
<div class="flex gap-2 items-center">
<span class="rounded-full bg-green-500 w-2 h-2"></span>
<span>{{ slotItem.item }}</span>
</div>
</template>
</drop-selector>
</div>
</div>
</div>
</div>
<!--Model-->
Expand Down Expand Up @@ -411,6 +430,9 @@ function updateSizeLimit() {
}
}
function changeLLMBackend(item: string, _: number) {
globalSetup.currentLLMBackend = item;
}
function changeSDModel(item: any, _: number) {
modelSettings.sd_model = item as string;
Expand Down
1 change: 0 additions & 1 deletion service/ipex_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
PreTrainedTokenizer,

Check failure on line 17 in service/ipex_backend.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

service/ipex_backend.py:17:5: F401 `transformers.PreTrainedTokenizer` imported but unused
TextStreamer

Check failure on line 18 in service/ipex_backend.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

service/ipex_backend.py:18:5: F401 `transformers.TextStreamer` imported but unused
)
from huggingface_hub import InferenceClient
from llm_params import LLMParams
from ipex_llm.transformers import AutoModelForCausalLM

Expand Down
32 changes: 25 additions & 7 deletions service/llama_cpp_backend.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,33 @@
from typing import Dict, List
from os import path
from llm_interface import LLMInterface
from llama_cpp import CreateChatCompletionStreamResponse, Iterator, Llama
from llm_params import LLMParams
import model_config
import gc

class LlamaCpp(LLMInterface):
def __init__(self):
self._model = None
self.stop_generate = False
self._last_repo_id = None

def load_model(self, params: LLMParams, model_path: str = r"C:\Users\InnoHacker\Downloads\meta-llama-3.1-8b-instruct.Q5_K_M.gguf", n_gpu_layers: int = -1, context_length: int = 16000):
self._model = Llama(
model_path=model_path,
n_gpu_layers=n_gpu_layers,
n_ctx=context_length,
)
def load_model(self, params: LLMParams, n_gpu_layers: int = -1, context_length: int = 16000):
model_repo_id = params.model_repo_id
if self._model is None or self._last_repo_id != model_repo_id:
self.unload_model()

model_base_path = model_config.config.get("ggufLLM")
model_name = model_repo_id.replace("/", "---")
model_path = path.abspath(path.join(model_base_path, model_name))

self._model = Llama(
model_path=model_path,
n_gpu_layers=n_gpu_layers,
n_ctx=context_length,
)

self._last_repo_id = model_repo_id

def create_chat_completion(self, messages: List[Dict[str, str]]):
completion: Iterator[CreateChatCompletionStreamResponse] = self._model.create_chat_completion(
Expand All @@ -23,7 +37,11 @@ def create_chat_completion(self, messages: List[Dict[str, str]]):
return completion

def unload_model(self):
pass
if self._model is not None:
self._model.close()
del self._model
gc.collect()
self._model = None

def get_backend_type(self):
return "llama_cpp"
6 changes: 4 additions & 2 deletions service/llm_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ class LLMParams:
device: int
enable_rag: bool
model_repo_id: str
backend_type: str

def __init__(
self, prompt: list, device: int, enable_rag: bool, model_repo_id: str
self, prompt: list, device: int, enable_rag: bool, model_repo_id: str, backend_type: str
) -> None:
self.prompt = prompt
self.device = device
self.enable_rag = enable_rag
self.model_repo_id = model_repo_id
self.model_repo_id = model_repo_id
self.backend_type = backend_type
1 change: 1 addition & 0 deletions service/model_config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"llm": "./models/llm/checkpoints",
"ggufLLM": "./models/llm/gguf",
"stableDiffusion": "./models/stable_diffusion/checkpoints",
"lora": "./models/stable_diffusion/lora",
"vae": "./models/stable_diffusion/vae",
Expand Down
1 change: 1 addition & 0 deletions service/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

config = {
"llm": "./models/llm/checkpoints",
"ggufLLM": "./models/llm/gguf",
"embedding": "./models/llm/embedding",
"stableDiffusion": "./models/stable_diffusion/checkpoints",
"lora": "./models/stable_diffusion/lora",
Expand Down
Loading

0 comments on commit 2f2398d

Please sign in to comment.