Skip to content

Commit

Permalink
feat(vllm): expose 'load_format' (#3943)
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler authored Oct 23, 2024
1 parent c75ecfa commit ae1ec4e
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 0 deletions.
2 changes: 2 additions & 0 deletions backend/python/vllm/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ async def LoadModel(self, request, context):

if request.Quantization != "":
engine_args.quantization = request.Quantization
if request.LoadFormat != "":
engine_args.load_format = request.LoadFormat
if request.GPUMemoryUtilization != 0:
engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
if request.TrustRemoteCode:
Expand Down
1 change: 1 addition & 0 deletions core/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
DraftModel: c.DraftModel,
AudioPath: c.VallE.AudioPath,
Quantization: c.Quantization,
LoadFormat: c.LoadFormat,

Check failure on line 142 in core/backend/options.go

View workflow job for this annotation

GitHub Actions / build-linux

unknown field LoadFormat in struct literal of type "github.com/mudler/LocalAI/pkg/grpc/proto".ModelOptions
GPUMemoryUtilization: c.GPUMemoryUtilization,
TrustRemoteCode: c.TrustRemoteCode,
EnforceEager: c.EnforceEager,
Expand Down
1 change: 1 addition & 0 deletions core/config/backend_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ type LLMConfig struct {
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
LoadFormat string `yaml:"load_format"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager"` // vLLM
Expand Down

0 comments on commit ae1ec4e

Please sign in to comment.