-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocker-compose.yml
97 lines (88 loc) · 2.9 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
version: '3.9'
# model-convert-merge
x-base-service: &base_service
build: ./services/model-convert-merge
image: model-convert-merge
volumes:
- ./models:/app/models
services:
init-dir:
<<: *base_service
profiles: ["init-dir"]
container_name: init-dir
command:
- mkdir
- -p
- $HF_OUTPUT_DIR
- $INPUT_DIR/LoRA
- $INPUT_DIR/$MODEL_TYPE
- $INPUT_DIR/$MODEL_TYPE/$MODEL_SIZE
models-convert:
<<: *base_service
profiles: ["models-convert"]
container_name: models-convert
environment:
- CLI_ARGS=convert_llama_weights_to_hf.py --input_dir $INPUT_DIR --model_size $MODEL_SIZE --output_dir $HF_OUTPUT_DIR
models-merge:
<<: *base_service
profiles: ["models-merge"]
container_name: models-merge
environment:
- CLI_ARGS=merge_llama_with_chinese_lora.py --base_model $HF_OUTPUT_DIR --lora_model $LORA_MODEL --output_type $OUTPUT_TYPE --output_dir $MERGED_OUTPUT_DIR
init-dir-quantize:
<<: *base_service
profiles: ["init-dir-quantize"]
container_name: init-dir-quantize
command: >
sh -c "mkdir -p $INPUT_DIR/$MODEL_TYPE/$MODEL_SIZE &&
mv $INPUT_DIR/$MODEL_TYPE/$MODEL_SIZE/tokenizer.model $INPUT_DIR/$MODEL_TYPE "
llama-cpp-models-convert:
<<: *base_service
profiles: ["llama-cpp-models-convert"]
container_name: llama-cpp-models-convert
environment:
- CLI_ARGS=convert.py $MERGED_OUTPUT_DIR
llama-cpp-quantize:
build: ./services/llama.cpp
profiles: ["llama-cpp-quantize"]
container_name: llama-cpp-quantize
image: llama.cpp:full-zh
command:
- ./quantize
- $MERGED_OUTPUT_DIR/ggml-model-f16.bin
- $MERGED_OUTPUT_DIR/ggml-model-q4_0.bin
- $QUANTIZE_TYPE
volumes:
- ./models:/app/models
text-generation-webui:
profiles: ["text-generation-webui"]
build:
context: ./services/text-generation-webui
args:
# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}
WEBUI_VERSION: ${WEBUI_VERSION}
container_name: text-generation-webui
env_file: .env
ports:
- "${HOST_PORT}:${CONTAINER_PORT}"
- "${HOST_API_PORT}:${CONTAINER_API_PORT}"
- "${HOST_API_STREAM_PORT}:${CONTAINER_API_STREAM_PORT}"
stdin_open: true
tty: true
volumes:
# - ${DATA_DIR}/characters:/app/characters
# - ${DATA_DIR}/extensions:/app/extensions
# - ${DATA_DIR}/loras:/app/loras
# - ${DATA_DIR}/presets:/app/presets
# - ${DATA_DIR}/prompts:/app/prompts
# - ${DATA_DIR}/softprompts:/app/softprompts
# - ${DATA_DIR}/training:/app/training
- ./models/llama/$MODEL_SIZE/$MODEL_TYPE/$MODEL_SIZE:/app/models
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]