opendatahub-io · Jooho · Feb 3, 2025 · Jan 17, 2025 · Jan 31, 2025 · Feb 3, 2025
diff --git a/config/crd/full/serving.kserve.io_inferencegraphs.yaml b/config/crd/full/serving.kserve.io_inferencegraphs.yaml
@@ -527,6 +527,8 @@ spec:
                       properties:
                         name:
                           type: string
+                        request:
+                          type: string
                       required:
                       - name
                       type: object
@@ -592,6 +594,8 @@ spec:
                   - type
                   type: object
                 type: array
+              deploymentMode:
+                type: string
               observedGeneration:
                 format: int64
                 type: integer

diff --git a/config/crd/full/serving.kserve.io_inferenceservices.yaml b/config/crd/full/serving.kserve.io_inferenceservices.yaml
diff --git a/pkg/apis/serving/v1alpha1/inference_graph.go b/pkg/apis/serving/v1alpha1/inference_graph.go
@@ -304,6 +304,8 @@ type InferenceGraphStatus struct {
 	// Url for the InferenceGraph
 	// +optional
 	URL *apis.URL `json:"url,omitempty"`
+	// InferenceGraph DeploymentMode
+	DeploymentMode string `json:"deploymentMode,omitempty"`
 }
 
 // InferenceGraphList contains a list of InferenceGraph

diff --git a/pkg/apis/serving/v1beta1/inference_service_status.go b/pkg/apis/serving/v1beta1/inference_service_status.go
@@ -50,6 +50,8 @@ type InferenceServiceStatus struct {
 	Components map[ComponentType]ComponentStatusSpec `json:"components,omitempty"`
 	// Model related statuses
 	ModelStatus ModelStatus `json:"modelStatus,omitempty"`
+	// InferenceService DeploymentMode
+	DeploymentMode string `json:"deploymentMode,omitempty"`
 }
 
 // ComponentStatusSpec describes the state of the component

diff --git a/pkg/apis/serving/v1beta1/inference_service_validation.go b/pkg/apis/serving/v1beta1/inference_service_validation.go
@@ -79,8 +79,15 @@ func (v *InferenceServiceValidator) ValidateUpdate(ctx context.Context, oldObj,
 		validatorLogger.Error(err, "Unable to convert object to InferenceService")
 		return nil, err
 	}
+	oldIsvc, err := convertToInferenceService(oldObj)
+	if err != nil {
+		validatorLogger.Error(err, "Unable to convert object to InferenceService")
+	}
 	validatorLogger.Info("validate update", "name", isvc.Name)
-
+	err = validateDeploymentMode(isvc, oldIsvc)
+	if err != nil {
+		return nil, err
+	}
 	return validateInferenceService(isvc)
 }
 
@@ -353,6 +360,19 @@ func validateCollocationStorageURI(predictorSpec PredictorSpec) error {
 	return nil
 }
 
+// validates if the deploymentMode specified in the annotation is not different from the one recorded in the status
+func validateDeploymentMode(newIsvc *InferenceService, oldIsvc *InferenceService) error {
+	statusDeploymentMode := oldIsvc.Status.DeploymentMode
+	if len(statusDeploymentMode) != 0 {
+		annotations := newIsvc.Annotations
+		annotationDeploymentMode, ok := annotations[constants.DeploymentMode]
+		if ok && annotationDeploymentMode != statusDeploymentMode {
+			return fmt.Errorf("update rejected: deploymentMode cannot be changed from '%s' to '%s'", statusDeploymentMode, annotationDeploymentMode)
+		}
+	}
+	return nil
+}
+
 // Convert runtime.Object into InferenceService
 func convertToInferenceService(obj runtime.Object) (*InferenceService, error) {
 	isvc, ok := obj.(*InferenceService)

diff --git a/pkg/apis/serving/v1beta1/inference_service_validation_test.go b/pkg/apis/serving/v1beta1/inference_service_validation_test.go
@@ -803,6 +803,32 @@ func TestValidateMultiNodeVariables(t *testing.T) {
 	}
 }
 
+func TestDeploymentModeUpdate(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	oldIsvc := makeTestInferenceService()
+	oldIsvc.Status = InferenceServiceStatus{
+		DeploymentMode: "Serverless",
+	}
+	updatedIsvc := oldIsvc.DeepCopy()
+	updatedIsvc.Annotations = map[string]string{
+		constants.DeploymentMode: "RawDeployment",
+	}
+	validator := InferenceServiceValidator{}
+	warnings, err := validator.ValidateUpdate(context.Background(), &oldIsvc, updatedIsvc)
+	// Annotation does not match status, update should be rejected
+	g.Expect(warnings).Should(gomega.BeEmpty())
+	g.Expect(err).ShouldNot(gomega.Succeed())
+
+	updatedIsvc1 := oldIsvc.DeepCopy()
+	updatedIsvc1.Annotations = map[string]string{
+		constants.DeploymentMode: "Serverless",
+	}
+	warnings, err = validator.ValidateUpdate(context.Background(), &oldIsvc, updatedIsvc1)
+	// Annotation matches status, update is accepted
+	g.Expect(warnings).Should(gomega.BeEmpty())
+	g.Expect(err).Should(gomega.Succeed())
+}
+
 func intPtr(i int) *int {
 	return &i
 }
diff --git a/pkg/controller/v1alpha1/inferencegraph/controller.go b/pkg/controller/v1alpha1/inferencegraph/controller.go
@@ -170,7 +170,7 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 		return reconcile.Result{}, errors.Wrapf(err, "fails to create DeployConfig")
 	}
 
-	deploymentMode := isvcutils.GetDeploymentMode(graph.ObjectMeta.Annotations, deployConfig)
+	deploymentMode := isvcutils.GetDeploymentMode(graph.Status.DeploymentMode, graph.ObjectMeta.Annotations, deployConfig)
 	r.Log.Info("Inference graph deployment ", "deployment mode ", deploymentMode)
 	if deploymentMode == constants.RawDeployment {
 		// Create inference graph resources such as deployment, service, hpa in raw deployment mode

diff --git a/pkg/controller/v1beta1/inferenceservice/controller.go b/pkg/controller/v1beta1/inferenceservice/controller.go
@@ -121,7 +121,7 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
 		return reconcile.Result{}, errors.Wrapf(err, "fails to create DeployConfig")
 	}
 
-	deploymentMode := isvcutils.GetDeploymentMode(annotations, deployConfig)
+	deploymentMode := isvcutils.GetDeploymentMode(isvc.Status.DeploymentMode, annotations, deployConfig)
 	r.Log.Info("Inference service deployment mode ", "deployment mode ", deploymentMode)
 
 	if deploymentMode == constants.ModelMeshDeployment {
@@ -276,6 +276,9 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
 }
 
 func (r *InferenceServiceReconciler) updateStatus(desiredService *v1beta1api.InferenceService, deploymentMode constants.DeploymentModeType) error {
+	// set the DeploymentMode used for the InferenceService in the status
+	desiredService.Status.DeploymentMode = string(deploymentMode)
+
 	existingService := &v1beta1api.InferenceService{}
 	namespacedName := types.NamespacedName{Name: desiredService.Name, Namespace: desiredService.Namespace}
 	if err := r.Get(context.TODO(), namespacedName, existingService); err != nil {

diff --git a/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go b/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go
@@ -435,6 +435,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
 					TransitionStatus:    "InProgress",
 					ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
 				},
+				DeploymentMode: "RawDeployment",
 			}
 			Eventually(func() string {
 				isvc := &v1beta1.InferenceService{}
@@ -850,6 +851,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
 					TransitionStatus:    "InProgress",
 					ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
 				},
+				DeploymentMode: "RawDeployment",
 			}
 			Eventually(func() string {
 				isvc := &v1beta1.InferenceService{}
@@ -1256,6 +1258,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
 					TransitionStatus:    "InProgress",
 					ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
 				},
+				DeploymentMode: "RawDeployment",
 			}
 			Eventually(func() string {
 				isvc := &v1beta1.InferenceService{}
@@ -1733,6 +1736,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
 					TransitionStatus:    "InProgress",
 					ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
 				},
+				DeploymentMode: "RawDeployment",
 			}
 			Eventually(func() string {
 				isvc := &v1beta1.InferenceService{}
@@ -2166,6 +2170,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
 					TransitionStatus:    "InProgress",
 					ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
 				},
+				DeploymentMode: "RawDeployment",
 			}
 			Eventually(func() string {
 				isvc := &v1beta1.InferenceService{}
@@ -2669,6 +2674,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
 					TransitionStatus:    "InProgress",
 					ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
 				},
+				DeploymentMode: "RawDeployment",
 			}
 			Eventually(func() string {
 				isvc := &v1beta1.InferenceService{}

diff --git a/pkg/controller/v1beta1/inferenceservice/utils/utils.go b/pkg/controller/v1beta1/inferenceservice/utils/utils.go
@@ -178,12 +178,20 @@ case 2: serving.kserve.org/deploymentMode is set
 	        if the mode is "RawDeployment", "Serverless" or "ModelMesh", return it.
 			else return config.deploy.defaultDeploymentMode
 */
-func GetDeploymentMode(annotations map[string]string, deployConfig *v1beta1.DeployConfig) constants.DeploymentModeType {
+func GetDeploymentMode(statusDeploymentMode string, annotations map[string]string, deployConfig *v1beta1.DeployConfig) constants.DeploymentModeType {
+	// First priority is the deploymentMode recorded in the status
+	if len(statusDeploymentMode) != 0 {
+		return constants.DeploymentModeType(statusDeploymentMode)
+	}
+
+	// Second priority, if the status doesn't have the deploymentMode recorded, is explicit annotations
 	deploymentMode, ok := annotations[constants.DeploymentMode]
 	if ok && (deploymentMode == string(constants.RawDeployment) || deploymentMode ==
 		string(constants.Serverless) || deploymentMode == string(constants.ModelMeshDeployment)) {
 		return constants.DeploymentModeType(deploymentMode)
 	}
+
+	// Finally, if an InferenceService is being created and does not explicitly specify a DeploymentMode
 	return constants.DeploymentModeType(deployConfig.DefaultDeploymentMode)
 }
 

diff --git a/pkg/controller/v1beta1/inferenceservice/utils/utils_test.go b/pkg/controller/v1beta1/inferenceservice/utils/utils_test.go
@@ -1291,7 +1291,7 @@ func TestGetDeploymentMode(t *testing.T) {
 
 	for name, scenario := range scenarios {
 		t.Run(name, func(t *testing.T) {
-			deploymentMode := GetDeploymentMode(scenario.annotations, scenario.deployConfig)
+			deploymentMode := GetDeploymentMode("", scenario.annotations, scenario.deployConfig)
 			if !g.Expect(deploymentMode).To(gomega.Equal(scenario.expected)) {
 				t.Errorf("got %v, want %v", deploymentMode, scenario.expected)
 			}