Update

Signed-off-by: Yi Chen <[email protected]>
kubeflow · Jun 25, 2024 · 38476e7 · 38476e7
1 parent ca24864
commit 38476e7
Show file tree

Hide file tree

Showing 54 changed files with 4,632 additions and 7,038 deletions.
diff --git a/Makefile b/Makefile
@@ -111,12 +111,9 @@ helm-docs: ## Generates markdown documentation for helm charts from requirements
 
 ##@ Build
 
-.PHONY: all
-all: clean-sparkctl build-sparkctl install-sparkctl
-
-.PHONY: clean-sparkctl
-clean-sparkctl: ## Clean sparkctl binary
-	rm -f sparkctl/sparkctl-darwin-amd64 sparkctl/sparkctl-linux-amd64
+.PHONY: build-operator
+build-operator: ## Build Spark operator
+	go build -o bin/spark-operator cmd/main.go
 
 .PHONY: build-sparkctl
 build-sparkctl: ## Build sparkctl binary
@@ -142,6 +139,10 @@ install-sparkctl: | sparkctl/sparkctl-darwin-amd64 sparkctl/sparkctl-linux-amd64
 		echo "$(UNAME) not supported"; \
 	fi
 
+.PHONY: clean-sparkctl
+clean-sparkctl: ## Clean sparkctl binary
+	rm -f sparkctl/sparkctl-darwin-amd64 sparkctl/sparkctl-linux-amd64
+
 .PHONY: build-api-docs
 build-api-docs: gen-crd-api-reference-docs
 	$(GEN_CRD_API_REFERENCE_DOCS) \

diff --git a/api/v1beta2/groupversion_info.go b/api/v1beta2/groupversion_info.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2024.
+Copyright 2024 The Kubeflow authors.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

diff --git a/api/v1beta2/scheduledsparkapplication_types.go b/api/v1beta2/scheduledsparkapplication_types.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2024.
+Copyright 2024 The Kubeflow authors.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,6 +23,10 @@ import (
 // EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN!
 // NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
 
+func init() {
+	SchemeBuilder.Register(&ScheduledSparkApplication{}, &ScheduledSparkApplicationList{})
+}
+
 // ScheduledSparkApplicationSpec defines the desired state of ScheduledSparkApplication
 type ScheduledSparkApplicationSpec struct {
 	// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
@@ -99,6 +103,23 @@ type ScheduledSparkApplicationList struct {
 	Items           []ScheduledSparkApplication `json:"items"`
 }
 
-func init() {
-	SchemeBuilder.Register(&ScheduledSparkApplication{}, &ScheduledSparkApplicationList{})
-}
+type ConcurrencyPolicy string
+
+const (
+	// ConcurrencyAllow allows SparkApplications to run concurrently.
+	ConcurrencyAllow ConcurrencyPolicy = "Allow"
+	// ConcurrencyForbid forbids concurrent runs of SparkApplications, skipping the next run if the previous
+	// one hasn't finished yet.
+	ConcurrencyForbid ConcurrencyPolicy = "Forbid"
+	// ConcurrencyReplace kills the currently running SparkApplication instance and replaces it with a new one.
+	ConcurrencyReplace ConcurrencyPolicy = "Replace"
+)
+
+type ScheduleState string
+
+const (
+	ScheduleStateNew              ScheduleState = ""
+	ScheduleStateValidating       ScheduleState = "Validating"
+	ScheduleStateScheduled        ScheduleState = "Scheduled"
+	ScheduleStateFailedValidation ScheduleState = "FailedValidation"
+)
diff --git a/api/v1beta2/sparkapplication_types.go b/api/v1beta2/sparkapplication_types.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2024.
+Copyright 2024 The Kubeflow authors.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@ limitations under the License.
 package v1beta2
 
 import (
-	apiv1 "k8s.io/api/core/v1"
+	corev1 "k8s.io/api/core/v1"
 	networkingv1 "k8s.io/api/networking/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
@@ -86,7 +86,7 @@ type SparkApplicationSpec struct {
 	HadoopConfigMap *string `json:"hadoopConfigMap,omitempty"`
 	// Volumes is the list of Kubernetes volumes that can be mounted by the driver and/or executors.
 	// +optional
-	Volumes []apiv1.Volume `json:"volumes,omitempty"`
+	Volumes []corev1.Volume `json:"volumes,omitempty"`
 	// Driver is the driver specification.
 	Driver DriverSpec `json:"driver"`
 	// Executor is the executor specification.
@@ -261,25 +261,6 @@ const (
 	Always    RestartPolicyType = "Always"
 )
 
-type ConcurrencyPolicy string
-
-const (
-	// ConcurrencyAllow allows SparkApplications to run concurrently.
-	ConcurrencyAllow ConcurrencyPolicy = "Allow"
-	// ConcurrencyForbid forbids concurrent runs of SparkApplications, skipping the next run if the previous
-	// one hasn't finished yet.
-	ConcurrencyForbid ConcurrencyPolicy = "Forbid"
-	// ConcurrencyReplace kills the currently running SparkApplication instance and replaces it with a new one.
-	ConcurrencyReplace ConcurrencyPolicy = "Replace"
-)
-
-type ScheduleState string
-
-const (
-	FailedValidationState ScheduleState = "FailedValidation"
-	ScheduledState        ScheduleState = "Scheduled"
-)
-
 // BatchSchedulerConfiguration used to configure how to batch scheduling Spark Application
 type BatchSchedulerConfiguration struct {
 	// Queue stands for the resource queue which the application belongs to, it's being used in Volcano batch scheduler.
@@ -291,7 +272,7 @@ type BatchSchedulerConfiguration struct {
 	// Resources stands for the resource list custom request for. Usually it is used to define the lower-bound limit.
 	// If specified, volcano scheduler will consider it as the resources requested.
 	// +optional
-	Resources apiv1.ResourceList `json:"resources,omitempty"`
+	Resources corev1.ResourceList `json:"resources,omitempty"`
 }
 
 // SparkUIConfiguration is for driver UI specific configuration parameters.
@@ -307,7 +288,7 @@ type SparkUIConfiguration struct {
 	ServicePortName *string `json:"servicePortName"`
 	// ServiceType allows configuring the type of the service. Defaults to ClusterIP.
 	// +optional
-	ServiceType *apiv1.ServiceType `json:"serviceType"`
+	ServiceType *corev1.ServiceType `json:"serviceType"`
 	// ServiceAnnotations is a map of key,value pairs of annotations that might be added to the service object.
 	// +optional
 	ServiceAnnotations map[string]string `json:"serviceAnnotations,omitempty"`
@@ -331,7 +312,7 @@ type DriverIngressConfiguration struct {
 	ServicePortName *string `json:"servicePortName"`
 	// ServiceType allows configuring the type of the service. Defaults to ClusterIP.
 	// +optional
-	ServiceType *apiv1.ServiceType `json:"serviceType"`
+	ServiceType *corev1.ServiceType `json:"serviceType"`
 	// ServiceAnnotations is a map of key,value pairs of annotations that might be added to the service object.
 	// +optional
 	ServiceAnnotations map[string]string `json:"serviceAnnotations,omitempty"`
@@ -453,14 +434,14 @@ type SparkPodSpec struct {
 	Secrets []SecretInfo `json:"secrets,omitempty"`
 	// Env carries the environment variables to add to the pod.
 	// +optional
-	Env []apiv1.EnvVar `json:"env,omitempty"`
+	Env []corev1.EnvVar `json:"env,omitempty"`
 	// EnvVars carries the environment variables to add to the pod.
 	// Deprecated. Consider using `env` instead.
 	// +optional
 	EnvVars map[string]string `json:"envVars,omitempty"`
 	// EnvFrom is a list of sources to populate environment variables in the container.
 	// +optional
-	EnvFrom []apiv1.EnvFromSource `json:"envFrom,omitempty"`
+	EnvFrom []corev1.EnvFromSource `json:"envFrom,omitempty"`
 	// EnvSecretKeyRefs holds a mapping from environment variable names to SecretKeyRefs.
 	// Deprecated. Consider using `env` instead.
 	// +optional
@@ -473,28 +454,28 @@ type SparkPodSpec struct {
 	Annotations map[string]string `json:"annotations,omitempty"`
 	// VolumeMounts specifies the volumes listed in ".spec.volumes" to mount into the main container's filesystem.
 	// +optional
-	VolumeMounts []apiv1.VolumeMount `json:"volumeMounts,omitempty"`
+	VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"`
 	// Affinity specifies the affinity/anti-affinity settings for the pod.
 	// +optional
-	Affinity *apiv1.Affinity `json:"affinity,omitempty"`
+	Affinity *corev1.Affinity `json:"affinity,omitempty"`
 	// Tolerations specifies the tolerations listed in ".spec.tolerations" to be applied to the pod.
 	// +optional
-	Tolerations []apiv1.Toleration `json:"tolerations,omitempty"`
+	Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
 	// PodSecurityContext specifies the PodSecurityContext to apply.
 	// +optional
-	PodSecurityContext *apiv1.PodSecurityContext `json:"podSecurityContext,omitempty"`
+	PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"`
 	// SecurityContext specifies the container's SecurityContext to apply.
 	// +optional
-	SecurityContext *apiv1.SecurityContext `json:"securityContext,omitempty"`
+	SecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"`
 	// SchedulerName specifies the scheduler that will be used for scheduling
 	// +optional
 	SchedulerName *string `json:"schedulerName,omitempty"`
 	// Sidecars is a list of sidecar containers that run along side the main Spark container.
 	// +optional
-	Sidecars []apiv1.Container `json:"sidecars,omitempty"`
+	Sidecars []corev1.Container `json:"sidecars,omitempty"`
 	// InitContainers is a list of init-containers that run to completion before the main Spark container.
 	// +optional
-	InitContainers []apiv1.Container `json:"initContainers,omitempty"`
+	InitContainers []corev1.Container `json:"initContainers,omitempty"`
 	// HostNetwork indicates whether to request host networking for the pod or not.
 	// +optional
 	HostNetwork *bool `json:"hostNetwork,omitempty"`
@@ -504,7 +485,7 @@ type SparkPodSpec struct {
 	NodeSelector map[string]string `json:"nodeSelector,omitempty"`
 	// DnsConfig dns settings for the pod, following the Kubernetes specifications.
 	// +optional
-	DNSConfig *apiv1.PodDNSConfig `json:"dnsConfig,omitempty"`
+	DNSConfig *corev1.PodDNSConfig `json:"dnsConfig,omitempty"`
 	// Termination grace period seconds for the pod
 	// +optional
 	TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty"`
@@ -513,7 +494,7 @@ type SparkPodSpec struct {
 	ServiceAccount *string `json:"serviceAccount,omitempty"`
 	// HostAliases settings for the pod, following the Kubernetes specifications.
 	// +optional
-	HostAliases []apiv1.HostAlias `json:"hostAliases,omitempty"`
+	HostAliases []corev1.HostAlias `json:"hostAliases,omitempty"`
 	// ShareProcessNamespace settings for the pod, following the Kubernetes specifications.
 	// +optional
 	ShareProcessNamespace *bool `json:"shareProcessNamespace,omitempty"`
@@ -539,7 +520,7 @@ type DriverSpec struct {
 	JavaOptions *string `json:"javaOptions,omitempty"`
 	// Lifecycle for running preStop or postStart commands
 	// +optional
-	Lifecycle *apiv1.Lifecycle `json:"lifecycle,omitempty"`
+	Lifecycle *corev1.Lifecycle `json:"lifecycle,omitempty"`
 	// KubernetesMaster is the URL of the Kubernetes master used by the driver to manage executor pods and
 	// other Kubernetes resources. Default to https://kubernetes.default.svc.
 	// +optional
@@ -574,7 +555,7 @@ type ExecutorSpec struct {
 	JavaOptions *string `json:"javaOptions,omitempty"`
 	// Lifecycle for running preStop or postStart commands
 	// +optional
-	Lifecycle *apiv1.Lifecycle `json:"lifecycle,omitempty"`
+	Lifecycle *corev1.Lifecycle `json:"lifecycle,omitempty"`
 	// DeleteOnTermination specify whether executor pods should be deleted in case of failure or normal termination.
 	// Maps to `spark.kubernetes.executor.deleteOnTermination` that is available since Spark 3.0.
 	// +optional

diff --git a/charts/spark-operator-chart/Chart.yaml b/charts/spark-operator-chart/Chart.yaml
@@ -17,8 +17,8 @@
 apiVersion: v2
 name: spark-operator
 description: A Helm chart for Spark on Kubernetes operator
-version: 1.5.0
-appVersion: v1beta2-1.7.0-3.5.0
+version: 2.0.0
+appVersion: v1beta2-2.0.0-3.5.0
 keywords:
   - spark
 home: https://github.com/kubeflow/spark-operator

diff --git a/charts/spark-operator-chart/README.md b/charts/spark-operator-chart/README.md
@@ -1,6 +1,6 @@
 # spark-operator
 
-![Version: 1.5.0](https://img.shields.io/badge/Version-1.5.0-informational?style=flat-square) ![AppVersion: v1beta2-1.7.0-3.5.0](https://img.shields.io/badge/AppVersion-v1beta2--1.7.0--3.5.0-informational?style=flat-square)
+![Version: 2.0.0](https://img.shields.io/badge/Version-2.0.0-informational?style=flat-square) ![AppVersion: v1beta2-2.0.0-3.5.0](https://img.shields.io/badge/AppVersion-v1beta2--2.0.0--3.5.0-informational?style=flat-square)
 
 A Helm chart for Spark on Kubernetes operator
 
@@ -82,7 +82,7 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum
 | affinity | object | `{}` | Affinity for pod assignment |
 | batchScheduler.enable | bool | `false` | Enable batch scheduler for spark jobs scheduling. If enabled, users can specify batch scheduler name in spark application |
 | commonLabels | object | `{}` | Common labels to add to the resources |
-| controller.ingressUrlFormat | string | `""` | Ingress URL format. Requires the UI service to be enabled by setting `uiService.enable` to true. |
+| controller.ingressUrlFormat | string | `""` | Ingress URL format. Requires the UI service to be enabled by setting `controller.uiService.enable` to true. |
 | controller.labelSelectorFilter | string | `""` | A comma-separated list of key=value, or key labels to filter resources during watch and list based on the specified labels. |
 | controller.leaderElection | object | `{"lockName":"spark-operator-lock","lockNamespace":""}` | Leader election lock name. |
 | controller.logLevel | int | `2` | Set higher levels for more verbose logging |
@@ -93,8 +93,8 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum
 | controller.serviceAccount.annotations | object | `{}` | Optional annotations for the controller service account |
 | controller.serviceAccount.create | bool | `true` | Specifies whether to create a service account for the controller |
 | controller.serviceAccount.name | string | `""` | Optional name for the controller service account |
-| controller.threads | int | `10` | Operator concurrency, higher values might increase memory usage |
 | controller.uiService.enable | bool | `true` | Enable UI service creation for Spark application |
+| controller.workers | int | `10` | Operator concurrency, higher values might increase memory usage |
 | envFrom | list | `[]` | Pod environment variable sources |
 | fullnameOverride | string | `""` | String to override release name |
 | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy |

diff --git a/charts/spark-operator-chart/values.yaml b/charts/spark-operator-chart/values.yaml
@@ -50,11 +50,11 @@ controller:
     enable: true
 
   # -- Ingress URL format.
-  # Requires the UI service to be enabled by setting `uiService.enable` to true.
+  # Requires the UI service to be enabled by setting `controller.uiService.enable` to true.
   ingressUrlFormat: ""
 
   # -- Operator concurrency, higher values might increase memory usage
-  threads: 10
+  workers: 10
 
   # -- Operator resync interval. Note that the operator will respond to events (e.g. create, update)
   # unrelated to this setting