Improve cold starts by only load detic/qwen when they are init, added…

… model warmup function
overeasy-sh · Jun 12, 2024 · f23ce88 · f23ce88
1 parent 631e685
commit f23ce88
Show file tree

Hide file tree

Showing 7 changed files with 58 additions and 40 deletions.
diff --git a/overeasy/models/LLMs/qwenvl.py b/overeasy/models/LLMs/qwenvl.py
@@ -5,9 +5,6 @@
 from overeasy.logging import log_time
 from overeasy.types import MultimodalLLM
 
-# Note: The default behavior now has injection attack prevention off.
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat", trust_remote_code=True)
-
 # use bf16
 # model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat", device_map="auto", trust_remote_code=True, bf16=True).eval()
 # use fp16

diff --git a/overeasy/models/__init__.py b/overeasy/models/__init__.py
@@ -3,3 +3,22 @@
 from .recognition import *
 from .LLMs import *
 from .classification import *
+
+def warmup_models():
+    qwen = QwenVL()
+    del qwen
+    dino = GroundingDINO()
+    del dino
+    detic = DETIC()
+    del detic
+    owlv2 = OwlV2()
+    del owlv2
+    yoloworld = YOLOWorld()
+    del yoloworld
+    clip = CLIP()
+    del clip
+    laionclip = LaionCLIP()
+    del laionclip
+    bio = BiomedCLIP()
+    del bio
+
diff --git a/overeasy/models/detection/detic.py b/overeasy/models/detection/detic.py
@@ -19,6 +19,7 @@
 def setup_cfg(args):
     from centernet.config import add_centernet_config
     from detic.config import add_detic_config
+    from detectron2.config import get_cfg
 
     cfg = get_cfg()
     cfg.MODEL.DEVICE = "cpu" if args.cpu else "cuda"
@@ -38,6 +39,10 @@ def setup_cfg(args):
 
 
 def load_detic_model(classes : List[str]):
+    from detectron2.data.detection_utils import _apply_exif_orientation
+    from detectron2.utils.logger import setup_logger
+    from detic.predictor import VisualizationDemo
+
     mp.set_start_method("spawn", force=True)
     setup_logger(name="fvcore")
 
@@ -57,7 +62,6 @@ def load_detic_model(classes : List[str]):
     args.pred_all_class = False
     cfg = setup_cfg(args)
 
-    from detic.predictor import VisualizationDemo
 
     # https://github.com/facebookresearch/Detic/blob/main/detic/predictor.py#L39
     demo = VisualizationDemo(cfg, args)
@@ -69,7 +73,7 @@ def load_detic_model(classes : List[str]):
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 def check_dependencies():
-    # Create the ~/.cache/autodistill directory if it doesn't exist
+
     original_dir = os.getcwd()
     autodistill_dir = os.path.expanduser("~/.overeasy")
     os.makedirs(autodistill_dir, exist_ok=True)
@@ -111,16 +115,15 @@ def check_dependencies():
         subprocess.run(["wget", model_url, "-O", model_path])
 
     os.chdir(original_dir)
-check_dependencies()
 
 
-from detectron2.config import get_cfg
-from detectron2.data.detection_utils import _apply_exif_orientation
-from detectron2.utils.logger import setup_logger
+
 
 
 class DETIC(BoundingBoxModel):
     def __init__(self):
+        check_dependencies()
+
         self.classes = None
 
     def set_classes(self, classes: List[str]):

diff --git a/overeasy/models/detection/dino.py b/overeasy/models/detection/dino.py
@@ -1,4 +1,3 @@
-import os
 import urllib.request
 import cv2
 import numpy as np
@@ -12,13 +11,15 @@
 from overeasy.logging import log_time
 import warnings
 import sys, io
+import os
+import cv2
+
 # Ignore the specific UserWarning about torch.meshgrid
 warnings.filterwarnings("ignore", message="torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument.", category=UserWarning, module='torch.functional')
 
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-if not torch.cuda.is_available():
-    print("WARNING: CUDA not available. GroundingDINO will run very slowly.")
+if DEVICE.type != 'cuda':
+    warnings.warn("Warning: CUDA is not available. DINO will run on CPU, which may result in slower performance.")
 
 class GroundingDINOModel(Enum):
     Pretrain_1_8M = "pretrain"
@@ -89,15 +90,6 @@ def load_grounding_dino(model: GroundingDINOModel):
 
 
 
-import os
-from dataclasses import dataclass
-import torch
-import cv2
-
-HOME = os.path.expanduser("~")
-DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-if DEVICE.type != 'cuda':
-    print("Warning: CUDA is not available. DINO will run on CPU, which may result in slower performance.")
 
 
 def combine_detections(detections_list: List[Detections], classes: List[str], overwrite_class_ids=None):
@@ -158,10 +150,14 @@ def __init__(
         box_threshold: float = 0.35,
         text_threshold: float = 0.25,
     ):
+        # Redirect grounding dino setup output to a string
         original_stdout = sys.stdout
         sys.stdout = io.StringIO()
         try:
             self.grounding_dino_model = load_grounding_dino(model=type)
+        except Exception as e:
+            print(f"Error loading GroundingDINO model: {e}")
+            print(sys.stdout.getvalue())
         finally:
             sys.stdout = original_stdout
         self.box_threshold = box_threshold

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,7 +38,7 @@ boto3 = "^1.34.121"
 groq = "^0.9.0"
 litellm = "^1.40.9"
 jsonref = "^1.1.0"
-google-cloud-aiplatform = "1.54.1"
+google-cloud-aiplatform = "^1.54.1"
 
 [tool.poetry.scripts]
 post_install = "overeasy.models.detection.detic:check_dependencies"

diff --git a/warmup.py b/warmup.py
@@ -0,0 +1,3 @@
+from overeasy.models import warmup_models
+
+warmup_models()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from overeasy.models import warmup_models

		warmup_models()