fix: yaml configuration

Signed-off-by: FuryMartin <[email protected]>
kubeedge · Jan 31, 2025 · 82c01c7 · 82c01c7
1 parent 3711666
commit 82c01c7
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 5 deletions.
diff --git a/examples/cloud-edge-collaborative-inference-for-llm/benchmarkingjob.yaml b/examples/cloud-edge-collaborative-inference-for-llm/benchmarkingjob.yaml
@@ -22,7 +22,7 @@ benchmarkingjob:
       - name: "query-routing"
         # the url address of test algorithm configuration file; string type;
         # the file format supports yaml/yml;
-        url: "./examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting_spec.yaml"
+        url: "./examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting.yaml"
 
   # the configuration of ranking leaderboard
   rank:

diff --git a/...es/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py b/...es/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py
@@ -36,6 +36,7 @@ def __init__(self, **kwargs):
         self.model_name = kwargs.get("model", None)
         self.backend = kwargs.get("backend", "huggingface")
         self._set_config()
+        self.load()
 
     def _set_config(self):
         """Set the model path in our environment variables due to Sedna’s [check](https://github.com/kubeedge/sedna/blob/ac623ab32dc37caa04b9e8480dbe1a8c41c4a6c2/lib/sedna/core/base.py#L132).

diff --git a/...-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting.yaml b/...-edge-collaborative-inference-for-llm/testalgorithms/query-routing/test_queryrouting.yaml
@@ -67,29 +67,46 @@ algorithm:
       url: "./examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py"
 
       hyperparameters:
-        # name of the hyperparameter; string type;
+      # name of the hyperparameter; string type;
         - model:
             values:
               - "gpt-4o-mini"
+        - backend:
+            # backend; string type;
+            # currently the options of value are as follows:
+            #  1> "huggingface": transformers backend;
+            #  2> "vllm": vLLM backend;
+            #  3> "api": OpenAI API backend;
+            #  4> "EagleSpecDec": EAGLE Speculative Decoding framework;
+            #  5> "LadeSepcDec": Lookahead Decoding framework;
+            values:
+              # - "EagleSpecDec"
+              - "api"
         - temperature:
+            # What sampling temperature to use, between 0 and 2; float type;
+            # For reproducable results, the temperature should be set to 0;
             values:
-              - 0
+              - 0.9
         - top_p:
+            # nucleus sampling parameter; float type;
             values:
-              - 0.8
+              - 0.9
         -  max_tokens:
+            # The maximum number of tokens that can be generated in the chat completion; int type;
             values:
               - 1024
         -  repetition_penalty:
+            # The parameter for repetition penalty; float type;
             values:
               - 1.05
         -  use_cache:
+            # Whether to use reponse cache; boolean type;
             values:
               - true
 
     - type: "hard_example_mining"
       # name of Router module; string type;
       # BERTRouter, EdgeOnly, CloudOnly, RandomRouter, OracleRouter
-      name: "OracleRouter"
+      name: "CloudOnly"
       # the url address of python module; string type;
       url: "./examples/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/hard_sample_mining.py"