feat: modify architecture picture; fix: cloud-model multi-backend

Signed-off-by: FuryMartin <[email protected]>
kubeedge · Jan 16, 2025 · 3711666 · 3711666
1 parent ad1566e
commit 3711666
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 11 deletions.
diff --git a/examples/cloud-edge-collaborative-inference-for-llm/README.md b/examples/cloud-edge-collaborative-inference-for-llm/README.md
@@ -30,7 +30,7 @@ Additionally, Speculative Decoding $^{[3]}$ is another promising strategy to fur
 
 The overall design is shown in the figure below.
 
-![image-20240926143857223](./assets/image-20241128535482354.png)
+![image-20240926143857223](./assets/image-20250115535482354.png)
 
 When Ianvs starts the benchmarking job, the Test Env Manager will first pass the data of the user-specified Dataset to the Test Case Controller for Joint Inference one by one.
 

diff --git a/...s/cloud-edge-collaborative-inference-for-llm/assets/image-20241128535482354.png b/...s/cloud-edge-collaborative-inference-for-llm/assets/image-20241128535482354.png
diff --git a/...s/cloud-edge-collaborative-inference-for-llm/assets/image-20250115535482354.png b/...s/cloud-edge-collaborative-inference-for-llm/assets/image-20250115535482354.png
diff --git a/...es/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py b/...es/cloud-edge-collaborative-inference-for-llm/testalgorithms/query-routing/cloud_model.py
@@ -18,7 +18,7 @@
 
 from core.common.log import LOGGER
 from sedna.common.class_factory import ClassType, ClassFactory
-from models import APIBasedLLM
+from models import APIBasedLLM, HuggingfaceLLM, VllmLLM, EagleSpecDecModel, LadeSpecDecLLM
 
 os.environ['BACKEND_TYPE'] = 'TORCH'
 
@@ -32,18 +32,40 @@ def __init__(self, **kwargs):
         """Initialize the CloudModel.  See `APIBasedLLM` for details about `kwargs`.
         """
         LOGGER.info(kwargs)
-        self.model = APIBasedLLM(**kwargs)
-        self.load(kwargs.get("model", "gpt-4o-mini"))
+        self.kwargs = kwargs
+        self.model_name = kwargs.get("model", None)
+        self.backend = kwargs.get("backend", "huggingface")
+        self._set_config()
 
-    def load(self, model):
-        """Set the model.
+    def _set_config(self):
+        """Set the model path in our environment variables due to Sedna’s [check](https://github.com/kubeedge/sedna/blob/ac623ab32dc37caa04b9e8480dbe1a8c41c4a6c2/lib/sedna/core/base.py#L132).
+        """
+        pass
+        #
+        # os.environ["model_path"] = self.model_name
 
-        Parameters
-        ----------
-        model : str
-            Existing model from your OpenAI provider. Example: `gpt-4o-mini`
+    def load(self, **kwargs):
+        """Set the model backend to be used. Will be called by Sedna's JointInference interface.
+
+        Raises
+        ------
+        Exception
+            When the backend is not supported.
         """
-        self.model._load(model = model)
+        if self.backend == "huggingface":
+            self.model = HuggingfaceLLM(**self.kwargs)
+        elif self.backend == "vllm":
+            self.model = VllmLLM(**self.kwargs)
+        elif self.backend == "api":
+            self.model = APIBasedLLM(**self.kwargs)
+        elif self.backend == "EagleSpecDec":
+            self.model = EagleSpecDecModel(**self.kwargs)
+        elif self.backend == "LadeSpecDec":
+            self.model = LadeSpecDecLLM(**self.kwargs)
+        else:
+            raise Exception(f"Backend {self.backend} is not supported. Please use 'huggingface', 'vllm', or `api` ")
+
+        self.model._load(self.kwargs.get("model", None))
 
     def inference(self, data, **kwargs):
         """Inference the model with the given data.