diff --git a/src/engine.py b/src/engine.py index 48e8aac..f999315 100644 --- a/src/engine.py +++ b/src/engine.py @@ -146,6 +146,8 @@ async def _initialize_engines(self): base_model_paths=self.base_model_paths, response_role=self.response_role, chat_template=self.tokenizer.tokenizer.chat_template, + enable_auto_tools=os.getenv('ENABLE_AUTO_TOOL_CHOICE', 'false').lower() == 'true', + tool_parser=os.getenv('TOOL_CALL_PARSER', "") or None, lora_modules=lora_modules, prompt_adapters=None, request_logger=None diff --git a/src/engine_args.py b/src/engine_args.py index 4c445df..45e50d1 100644 --- a/src/engine_args.py +++ b/src/engine_args.py @@ -89,9 +89,7 @@ "qlora_adapter_name_or_path": os.getenv('QLORA_ADAPTER_NAME_OR_PATH', None), "disable_logprobs_during_spec_decoding": os.getenv('DISABLE_LOGPROBS_DURING_SPEC_DECODING', None), "otlp_traces_endpoint": os.getenv('OTLP_TRACES_ENDPOINT', None), - "use_v2_block_manager": os.getenv('USE_V2_BLOCK_MANAGER', 'true'), - "enable_auto_tool_choice": os.getenv('ENABLE_AUTO_TOOL_CHOICE', 'false').lower() == 'true', - "tool_call_parser": os.getenv('TOOL_CALL_PARSER', "") or None + "use_v2_block_manager": os.getenv('USE_V2_BLOCK_MANAGER', 'true') } def match_vllm_args(args):