diff --git a/examples/cloud-edge-collaborative-inference-for-llm/benchmarkingjob.yaml b/examples/cloud-edge-collaborative-inference-for-llm/benchmarkingjob.yaml index 05a3425a..aa5e0941 100755 --- a/examples/cloud-edge-collaborative-inference-for-llm/benchmarkingjob.yaml +++ b/examples/cloud-edge-collaborative-inference-for-llm/benchmarkingjob.yaml @@ -22,7 +22,7 @@ benchmarkingjob: - name: "query-routing" # the url address of test algorithm configuration file; string type; # the file format supports yaml/yml; - url: "./examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting_spec.yaml" + url: "./examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting.yaml" # the configuration of ranking leaderboard rank: diff --git a/examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py b/examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py index e0669e55..751160ae 100644 --- a/examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py +++ b/examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py @@ -36,6 +36,7 @@ def __init__(self, **kwargs): self.model_name = kwargs.get("model", None) self.backend = kwargs.get("backend", "huggingface") self._set_config() + self.load() def _set_config(self): """Set the model path in our environment variables due to Sedna’s [check](https://github.com/kubeedge/sedna/blob/ac623ab32dc37caa04b9e8480dbe1a8c41c4a6c2/lib/sedna/core/base.py#L132). diff --git a/examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting.yaml b/examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting.yaml index 2fd8a15b..4240a03f 100644 --- a/examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting.yaml +++ b/examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting.yaml @@ -67,29 +67,46 @@ algorithm: url: "./examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py" hyperparameters: - # name of the hyperparameter; string type; + # name of the hyperparameter; string type; - model: values: - "gpt-4o-mini" + - backend: + # backend; string type; + # currently the options of value are as follows: + # 1> "huggingface": transformers backend; + # 2> "vllm": vLLM backend; + # 3> "api": OpenAI API backend; + # 4> "EagleSpecDec": EAGLE Speculative Decoding framework; + # 5> "LadeSepcDec": Lookahead Decoding framework; + values: + # - "EagleSpecDec" + - "api" - temperature: + # What sampling temperature to use, between 0 and 2; float type; + # For reproducable results, the temperature should be set to 0; values: - - 0 + - 0.9 - top_p: + # nucleus sampling parameter; float type; values: - - 0.8 + - 0.9 - max_tokens: + # The maximum number of tokens that can be generated in the chat completion; int type; values: - 1024 - repetition_penalty: + # The parameter for repetition penalty; float type; values: - 1.05 - use_cache: + # Whether to use reponse cache; boolean type; values: - true - type: "hard_example_mining" # name of Router module; string type; # BERTRouter, EdgeOnly, CloudOnly, RandomRouter, OracleRouter - name: "OracleRouter" + name: "CloudOnly" # the url address of python module; string type; url: "./examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/hard_sample_mining.py"