Skip to content

Commit

Permalink
Merge pull request #485 from VedantMahabaleshwarkar/j-16954-cp
Browse files Browse the repository at this point in the history
add DeploymentMode to InferenceService and InferenceGraph status
  • Loading branch information
Jooho authored Feb 3, 2025
2 parents 8040c06 + 83be250 commit a22a1cb
Show file tree
Hide file tree
Showing 16 changed files with 673 additions and 4,178 deletions.
4 changes: 4 additions & 0 deletions config/crd/full/serving.kserve.io_inferencegraphs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,8 @@ spec:
properties:
name:
type: string
request:
type: string
required:
- name
type: object
Expand Down Expand Up @@ -592,6 +594,8 @@ spec:
- type
type: object
type: array
deploymentMode:
type: string
observedGeneration:
format: int64
type: integer
Expand Down
156 changes: 128 additions & 28 deletions config/crd/full/serving.kserve.io_inferenceservices.yaml

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pkg/apis/serving/v1alpha1/inference_graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ type InferenceGraphStatus struct {
// Url for the InferenceGraph
// +optional
URL *apis.URL `json:"url,omitempty"`
// InferenceGraph DeploymentMode
DeploymentMode string `json:"deploymentMode,omitempty"`
}

// InferenceGraphList contains a list of InferenceGraph
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/serving/v1beta1/inference_service_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ type InferenceServiceStatus struct {
Components map[ComponentType]ComponentStatusSpec `json:"components,omitempty"`
// Model related statuses
ModelStatus ModelStatus `json:"modelStatus,omitempty"`
// InferenceService DeploymentMode
DeploymentMode string `json:"deploymentMode,omitempty"`
}

// ComponentStatusSpec describes the state of the component
Expand Down
22 changes: 21 additions & 1 deletion pkg/apis/serving/v1beta1/inference_service_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,15 @@ func (v *InferenceServiceValidator) ValidateUpdate(ctx context.Context, oldObj,
validatorLogger.Error(err, "Unable to convert object to InferenceService")
return nil, err
}
oldIsvc, err := convertToInferenceService(oldObj)
if err != nil {
validatorLogger.Error(err, "Unable to convert object to InferenceService")
}
validatorLogger.Info("validate update", "name", isvc.Name)

err = validateDeploymentMode(isvc, oldIsvc)
if err != nil {
return nil, err
}
return validateInferenceService(isvc)
}

Expand Down Expand Up @@ -387,6 +394,19 @@ func validateCollocationStorageURI(predictorSpec PredictorSpec) error {
return nil
}

// validates if the deploymentMode specified in the annotation is not different from the one recorded in the status
func validateDeploymentMode(newIsvc *InferenceService, oldIsvc *InferenceService) error {
statusDeploymentMode := oldIsvc.Status.DeploymentMode
if len(statusDeploymentMode) != 0 {
annotations := newIsvc.Annotations
annotationDeploymentMode, ok := annotations[constants.DeploymentMode]
if ok && annotationDeploymentMode != statusDeploymentMode {
return fmt.Errorf("update rejected: deploymentMode cannot be changed from '%s' to '%s'", statusDeploymentMode, annotationDeploymentMode)
}
}
return nil
}

// Convert runtime.Object into InferenceService
func convertToInferenceService(obj runtime.Object) (*InferenceService, error) {
isvc, ok := obj.(*InferenceService)
Expand Down
26 changes: 26 additions & 0 deletions pkg/apis/serving/v1beta1/inference_service_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,32 @@ func TestValidateMultiNodeVariables(t *testing.T) {
}
}

func TestDeploymentModeUpdate(t *testing.T) {
g := gomega.NewGomegaWithT(t)
oldIsvc := makeTestInferenceService()
oldIsvc.Status = InferenceServiceStatus{
DeploymentMode: "Serverless",
}
updatedIsvc := oldIsvc.DeepCopy()
updatedIsvc.Annotations = map[string]string{
constants.DeploymentMode: "RawDeployment",
}
validator := InferenceServiceValidator{}
warnings, err := validator.ValidateUpdate(context.Background(), &oldIsvc, updatedIsvc)
// Annotation does not match status, update should be rejected
g.Expect(warnings).Should(gomega.BeEmpty())
g.Expect(err).ShouldNot(gomega.Succeed())

updatedIsvc1 := oldIsvc.DeepCopy()
updatedIsvc1.Annotations = map[string]string{
constants.DeploymentMode: "Serverless",
}
warnings, err = validator.ValidateUpdate(context.Background(), &oldIsvc, updatedIsvc1)
// Annotation matches status, update is accepted
g.Expect(warnings).Should(gomega.BeEmpty())
g.Expect(err).Should(gomega.Succeed())
}

func intPtr(i int) *int {
return &i
}
2 changes: 1 addition & 1 deletion pkg/controller/v1alpha1/inferencegraph/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque
return reconcile.Result{}, errors.Wrapf(err, "fails to create DeployConfig")
}

deploymentMode := isvcutils.GetDeploymentMode(graph.ObjectMeta.Annotations, deployConfig)
deploymentMode := isvcutils.GetDeploymentMode(graph.Status.DeploymentMode, graph.ObjectMeta.Annotations, deployConfig)
r.Log.Info("Inference graph deployment ", "deployment mode ", deploymentMode)
if deploymentMode == constants.RawDeployment {
// Create inference graph resources such as deployment, service, hpa in raw deployment mode
Expand Down
5 changes: 4 additions & 1 deletion pkg/controller/v1beta1/inferenceservice/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
return reconcile.Result{}, errors.Wrapf(err, "fails to create DeployConfig")
}

deploymentMode := isvcutils.GetDeploymentMode(annotations, deployConfig)
deploymentMode := isvcutils.GetDeploymentMode(isvc.Status.DeploymentMode, annotations, deployConfig)
r.Log.Info("Inference service deployment mode ", "deployment mode ", deploymentMode)

if deploymentMode == constants.ModelMeshDeployment {
Expand Down Expand Up @@ -269,6 +269,9 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
}

func (r *InferenceServiceReconciler) updateStatus(desiredService *v1beta1api.InferenceService, deploymentMode constants.DeploymentModeType) error {
// set the DeploymentMode used for the InferenceService in the status
desiredService.Status.DeploymentMode = string(deploymentMode)

existingService := &v1beta1api.InferenceService{}
namespacedName := types.NamespacedName{Name: desiredService.Name, Namespace: desiredService.Namespace}
if err := r.Get(context.TODO(), namespacedName, existingService); err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -853,6 +854,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -1259,6 +1261,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -1736,6 +1739,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -2169,6 +2173,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -2672,6 +2677,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down
10 changes: 9 additions & 1 deletion pkg/controller/v1beta1/inferenceservice/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,20 @@ case 2: serving.kserve.org/deploymentMode is set
if the mode is "RawDeployment", "Serverless" or "ModelMesh", return it.
else return config.deploy.defaultDeploymentMode
*/
func GetDeploymentMode(annotations map[string]string, deployConfig *v1beta1.DeployConfig) constants.DeploymentModeType {
func GetDeploymentMode(statusDeploymentMode string, annotations map[string]string, deployConfig *v1beta1.DeployConfig) constants.DeploymentModeType {
// First priority is the deploymentMode recorded in the status
if len(statusDeploymentMode) != 0 {
return constants.DeploymentModeType(statusDeploymentMode)
}

// Second priority, if the status doesn't have the deploymentMode recorded, is explicit annotations
deploymentMode, ok := annotations[constants.DeploymentMode]
if ok && (deploymentMode == string(constants.RawDeployment) || deploymentMode ==
string(constants.Serverless) || deploymentMode == string(constants.ModelMeshDeployment)) {
return constants.DeploymentModeType(deploymentMode)
}

// Finally, if an InferenceService is being created and does not explicitly specify a DeploymentMode
return constants.DeploymentModeType(deployConfig.DefaultDeploymentMode)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1268,7 +1268,7 @@ func TestGetDeploymentMode(t *testing.T) {

for name, scenario := range scenarios {
t.Run(name, func(t *testing.T) {
deploymentMode := GetDeploymentMode(scenario.annotations, scenario.deployConfig)
deploymentMode := GetDeploymentMode("", scenario.annotations, scenario.deployConfig)
if !g.Expect(deploymentMode).To(gomega.Equal(scenario.expected)) {
t.Errorf("got %v, want %v", deploymentMode, scenario.expected)
}
Expand Down
Loading

0 comments on commit a22a1cb

Please sign in to comment.