-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.env.template
70 lines (48 loc) · 2.12 KB
/
.env.template
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# ----- Model-convert-merge Env ----- #
# LLAMA Model size
MODEL_SIZE=7B
# LLAMA model path
INPUT_DIR=/app/models/llama/$MODEL_SIZE
# Model name
MODEL_TYPE=Chinese-Alpaca-Plus
# LoRA name
LORA_NAME_1=chinese-llama-plus-lora-7b
LORA_NAME_2=chinese-alpaca-plus-lora-7b
# Model path after conversion to HF format
HF_OUTPUT_DIR=$INPUT_DIR/$MODEL_SIZE-HF
# LoRA path and LoRA name
LORA_PATH=$INPUT_DIR/LoRA
LORA_MODEL=$LORA_PATH/$LORA_NAME_1,$LORA_PATH/$LORA_NAME_2
# Model format
OUTPUT_TYPE=pth
# Model position after conversion and merging
MERGED_OUTPUT_DIR=$INPUT_DIR/$MODEL_TYPE/$MODEL_SIZE
#
QUANTIZE_TYPE=q4_0
# ----- Text-generation-webui Env ----- #
#
DATA_DIR=./data/text-generation-webui
# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX
# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
TORCH_CUDA_ARCH_LIST=6.1
# these commands worked for me with roughly 4.5GB of vram
CLI_ARGS=--model ggml-model-q4_0.bin --threads 8 --n_ctx 512 --listen --chat
# the following examples have been tested with the files linked in docs/README_docker.md:
# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices
# the port the webui binds to on the host
HOST_PORT=7860
# the port the webui binds to inside the container
CONTAINER_PORT=7860
# the port the api binds to on the host
HOST_API_PORT=5000
# the port the api binds to inside the container
CONTAINER_API_PORT=5000
# the port the api stream endpoint binds to on the host
HOST_API_STREAM_PORT=5005
# the port the api stream endpoint binds to inside the container
CONTAINER_API_STREAM_PORT=5005
# the version used to install text-generation-webui from
WEBUI_VERSION=HEAD