Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add DeploymentMode to InferenceService and InferenceGraph status #479

Merged
merged 3 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config/crd/full/serving.kserve.io_inferencegraphs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,8 @@ spec:
properties:
name:
type: string
request:
type: string
required:
- name
type: object
Expand Down Expand Up @@ -592,6 +594,8 @@ spec:
- type
type: object
type: array
deploymentMode:
type: string
observedGeneration:
format: int64
type: integer
Expand Down
156 changes: 128 additions & 28 deletions config/crd/full/serving.kserve.io_inferenceservices.yaml

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pkg/apis/serving/v1alpha1/inference_graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ type InferenceGraphStatus struct {
// Url for the InferenceGraph
// +optional
URL *apis.URL `json:"url,omitempty"`
// InferenceGraph DeploymentMode
DeploymentMode string `json:"deploymentMode,omitempty"`
}

// InferenceGraphList contains a list of InferenceGraph
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/serving/v1beta1/inference_service_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ type InferenceServiceStatus struct {
Components map[ComponentType]ComponentStatusSpec `json:"components,omitempty"`
// Model related statuses
ModelStatus ModelStatus `json:"modelStatus,omitempty"`
// InferenceService DeploymentMode
DeploymentMode string `json:"deploymentMode,omitempty"`
}

// ComponentStatusSpec describes the state of the component
Expand Down
22 changes: 21 additions & 1 deletion pkg/apis/serving/v1beta1/inference_service_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,15 @@ func (v *InferenceServiceValidator) ValidateUpdate(ctx context.Context, oldObj,
validatorLogger.Error(err, "Unable to convert object to InferenceService")
return nil, err
}
oldIsvc, err := convertToInferenceService(oldObj)
if err != nil {
validatorLogger.Error(err, "Unable to convert object to InferenceService")
}
validatorLogger.Info("validate update", "name", isvc.Name)

err = validateDeploymentMode(isvc, oldIsvc)
if err != nil {
return nil, err
}
return validateInferenceService(isvc)
}

Expand Down Expand Up @@ -353,6 +360,19 @@ func validateCollocationStorageURI(predictorSpec PredictorSpec) error {
return nil
}

// validates if the deploymentMode specified in the annotation is not different from the one recorded in the status
func validateDeploymentMode(newIsvc *InferenceService, oldIsvc *InferenceService) error {
statusDeploymentMode := oldIsvc.Status.DeploymentMode
if len(statusDeploymentMode) != 0 {
annotations := newIsvc.Annotations
annotationDeploymentMode, ok := annotations[constants.DeploymentMode]
if ok && annotationDeploymentMode != statusDeploymentMode {
return fmt.Errorf("update rejected: deploymentMode cannot be changed from '%s' to '%s'", statusDeploymentMode, annotationDeploymentMode)
}
}
return nil
}

// Convert runtime.Object into InferenceService
func convertToInferenceService(obj runtime.Object) (*InferenceService, error) {
isvc, ok := obj.(*InferenceService)
Expand Down
26 changes: 26 additions & 0 deletions pkg/apis/serving/v1beta1/inference_service_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,32 @@ func TestValidateMultiNodeVariables(t *testing.T) {
}
}

func TestDeploymentModeUpdate(t *testing.T) {
g := gomega.NewGomegaWithT(t)
oldIsvc := makeTestInferenceService()
oldIsvc.Status = InferenceServiceStatus{
DeploymentMode: "Serverless",
}
updatedIsvc := oldIsvc.DeepCopy()
updatedIsvc.Annotations = map[string]string{
constants.DeploymentMode: "RawDeployment",
}
validator := InferenceServiceValidator{}
warnings, err := validator.ValidateUpdate(context.Background(), &oldIsvc, updatedIsvc)
// Annotation does not match status, update should be rejected
g.Expect(warnings).Should(gomega.BeEmpty())
g.Expect(err).ShouldNot(gomega.Succeed())

updatedIsvc1 := oldIsvc.DeepCopy()
updatedIsvc1.Annotations = map[string]string{
constants.DeploymentMode: "Serverless",
}
warnings, err = validator.ValidateUpdate(context.Background(), &oldIsvc, updatedIsvc1)
// Annotation matches status, update is accepted
g.Expect(warnings).Should(gomega.BeEmpty())
g.Expect(err).Should(gomega.Succeed())
}

func intPtr(i int) *int {
return &i
}
2 changes: 1 addition & 1 deletion pkg/controller/v1alpha1/inferencegraph/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque
return reconcile.Result{}, errors.Wrapf(err, "fails to create DeployConfig")
}

deploymentMode := isvcutils.GetDeploymentMode(graph.ObjectMeta.Annotations, deployConfig)
deploymentMode := isvcutils.GetDeploymentMode(graph.Status.DeploymentMode, graph.ObjectMeta.Annotations, deployConfig)
r.Log.Info("Inference graph deployment ", "deployment mode ", deploymentMode)
if deploymentMode == constants.RawDeployment {
// Create inference graph resources such as deployment, service, hpa in raw deployment mode
Expand Down
5 changes: 4 additions & 1 deletion pkg/controller/v1beta1/inferenceservice/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
return reconcile.Result{}, errors.Wrapf(err, "fails to create DeployConfig")
}

deploymentMode := isvcutils.GetDeploymentMode(annotations, deployConfig)
deploymentMode := isvcutils.GetDeploymentMode(isvc.Status.DeploymentMode, annotations, deployConfig)
r.Log.Info("Inference service deployment mode ", "deployment mode ", deploymentMode)

if deploymentMode == constants.ModelMeshDeployment {
Expand Down Expand Up @@ -276,6 +276,9 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
}

func (r *InferenceServiceReconciler) updateStatus(desiredService *v1beta1api.InferenceService, deploymentMode constants.DeploymentModeType) error {
// set the DeploymentMode used for the InferenceService in the status
desiredService.Status.DeploymentMode = string(deploymentMode)

existingService := &v1beta1api.InferenceService{}
namespacedName := types.NamespacedName{Name: desiredService.Name, Namespace: desiredService.Namespace}
if err := r.Get(context.TODO(), namespacedName, existingService); err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -850,6 +851,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -1256,6 +1258,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -1733,6 +1736,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -2166,6 +2170,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down Expand Up @@ -2669,6 +2674,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
TransitionStatus: "InProgress",
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
},
DeploymentMode: "RawDeployment",
}
Eventually(func() string {
isvc := &v1beta1.InferenceService{}
Expand Down
10 changes: 9 additions & 1 deletion pkg/controller/v1beta1/inferenceservice/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,20 @@ case 2: serving.kserve.org/deploymentMode is set
if the mode is "RawDeployment", "Serverless" or "ModelMesh", return it.
else return config.deploy.defaultDeploymentMode
*/
func GetDeploymentMode(annotations map[string]string, deployConfig *v1beta1.DeployConfig) constants.DeploymentModeType {
func GetDeploymentMode(statusDeploymentMode string, annotations map[string]string, deployConfig *v1beta1.DeployConfig) constants.DeploymentModeType {
// First priority is the deploymentMode recorded in the status
if len(statusDeploymentMode) != 0 {
return constants.DeploymentModeType(statusDeploymentMode)
}

// Second priority, if the status doesn't have the deploymentMode recorded, is explicit annotations
deploymentMode, ok := annotations[constants.DeploymentMode]
if ok && (deploymentMode == string(constants.RawDeployment) || deploymentMode ==
string(constants.Serverless) || deploymentMode == string(constants.ModelMeshDeployment)) {
return constants.DeploymentModeType(deploymentMode)
}

// Finally, if an InferenceService is being created and does not explicitly specify a DeploymentMode
return constants.DeploymentModeType(deployConfig.DefaultDeploymentMode)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1291,7 +1291,7 @@ func TestGetDeploymentMode(t *testing.T) {

for name, scenario := range scenarios {
t.Run(name, func(t *testing.T) {
deploymentMode := GetDeploymentMode(scenario.annotations, scenario.deployConfig)
deploymentMode := GetDeploymentMode("", scenario.annotations, scenario.deployConfig)
if !g.Expect(deploymentMode).To(gomega.Equal(scenario.expected)) {
t.Errorf("got %v, want %v", deploymentMode, scenario.expected)
}
Expand Down
Loading
Loading