From 664b9d01c42a04a5327e582cc23215c34e9a5020 Mon Sep 17 00:00:00 2001 From: Yi Chen Date: Wed, 11 Dec 2024 13:40:03 +0800 Subject: [PATCH] Release v2.1.0 (#2354) * Allow setting automountServiceAccountToken (#2298) * Allow setting automountServiceAccountToken on workloads and serviceAccounts Signed-off-by: Aran Shavit * update helm docs Signed-off-by: Aran Shavit --------- Signed-off-by: Aran Shavit (cherry picked from commit 515d805b8a745df1541b6c2993de6e0cd05e3bf7) * Fix: executor container security context does not work (#2306) Signed-off-by: Yi Chen (cherry picked from commit 171e429706214645611f6921e31db8652d921219) * Fix: should not add emptyDir sizeLimit conf if it is nil (#2305) Signed-off-by: Yi Chen (cherry picked from commit 763682dfe645b27dd6f65318130c665cf272f559) * Allow the Controller and Webhook Containers to run with the securityContext: readOnlyRootfilesystem: true (#2282) * create a tmp dir for the controller to write Spark artifacts to and set the controller to readOnlyRootFilesystem Signed-off-by: Nick Gretzon * mount a dir for the webhook container to generate its certificates in and set readOnlyRootFilesystem: true for the webhook pod Signed-off-by: Nick Gretzon * update the securityContext in the controller deployment test Signed-off-by: Nick Gretzon * update securityContext of the webhook container in the deployment_test Signed-off-by: Nick Gretzon * update README Signed-off-by: Nick Gretzon * remove -- so comments are not rendered in the README.md Signed-off-by: Nick Gretzon * recreate README.md after removal of comments for volumes and volumeMounts Signed-off-by: Nick Gretzon * make indentation for volumes and volumeMounts consistent with rest of values.yaml Signed-off-by: Nick Gretzon * Revert "make indentation for volumes and volumeMounts consistent with rest of values.yaml" This reverts commit dba97fc3d9458e5addfff79d021d23b30938cbb9. Signed-off-by: Nick Gretzon * fix indentation in webhook and controller deployment templates for volumes and volumeMounts Signed-off-by: Nick Gretzon * Update charts/spark-operator-chart/values.yaml Co-authored-by: Yi Chen Signed-off-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> * Update charts/spark-operator-chart/values.yaml Co-authored-by: Yi Chen Signed-off-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> * Update charts/spark-operator-chart/values.yaml Co-authored-by: Yi Chen Signed-off-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> * Update charts/spark-operator-chart/values.yaml Co-authored-by: Yi Chen Signed-off-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> * Update charts/spark-operator-chart/templates/controller/deployment.yaml Co-authored-by: Yi Chen Signed-off-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> * Update charts/spark-operator-chart/templates/controller/deployment.yaml Co-authored-by: Yi Chen Signed-off-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> * Update charts/spark-operator-chart/templates/webhook/deployment.yaml Co-authored-by: Yi Chen Signed-off-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> * Update charts/spark-operator-chart/templates/webhook/deployment.yaml Co-authored-by: Yi Chen Signed-off-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> * add additional securityContext to the controller deployment_test.yaml Signed-off-by: Nick Gretzon --------- Signed-off-by: Nick Gretzon Signed-off-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> Co-authored-by: Yi Chen (cherry picked from commit 72107fd7b877bf795ea3eab425774272c8034909) * Fix: should not add emptyDir sizeLimit conf on executor pods if it is nil (#2316) Signed-off-by: Cian Gallagher (cherry picked from commit 2999546dc6f720dfa9e63f6060d357cf1ef94b89) * Bump `volcano.sh/apis` to 1.10.0 (#2320) Signed-off-by: Jacob Salway (cherry picked from commit 22e4fb8e48b02ece2919a82394385b517e15ee04) * Truncate UI service name if over 63 characters (#2311) * Truncate UI service name if over 63 characters Signed-off-by: Jacob Salway * Also truncate ingress name Signed-off-by: Jacob Salway --------- Signed-off-by: Jacob Salway (cherry picked from commit 43c1888c9d20ba208222e522d23565656ad65741) * Bump aquasecurity/trivy-action from 0.28.0 to 0.29.0 (#2332) Bumps [aquasecurity/trivy-action](https://github.com/aquasecurity/trivy-action) from 0.28.0 to 0.29.0. - [Release notes](https://github.com/aquasecurity/trivy-action/releases) - [Commits](https://github.com/aquasecurity/trivy-action/compare/0.28.0...0.29.0) --- updated-dependencies: - dependency-name: aquasecurity/trivy-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 270b09e4c7d98fbbbc295bd87a5829506e65fc8f) * Bump github.com/onsi/ginkgo/v2 from 2.20.2 to 2.22.0 (#2335) Bumps [github.com/onsi/ginkgo/v2](https://github.com/onsi/ginkgo) from 2.20.2 to 2.22.0. - [Release notes](https://github.com/onsi/ginkgo/releases) - [Changelog](https://github.com/onsi/ginkgo/blob/master/CHANGELOG.md) - [Commits](https://github.com/onsi/ginkgo/compare/v2.20.2...v2.22.0) --- updated-dependencies: - dependency-name: github.com/onsi/ginkgo/v2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 40423d5501c3407545dea8d34f6527f07464476f) * The webhook-key-name command-line param isn't taking effect (#2344) Signed-off-by: C. H. Afzal (cherry picked from commit a261523144afd59bc0aa3c56d796997de368f78f) * Robustness to driver pod taking time to create (#2315) * Retry after driver pod now found if recent submission Signed-off-by: Thomas Newton * Add a test Signed-off-by: Thomas Newton * Make grace period configurable Signed-off-by: Thomas Newton * Update test Signed-off-by: Thomas Newton * Add an extra test with the driver pod Signed-off-by: Thomas Newton * Separate context to create and delete the driver pod Signed-off-by: Thomas Newton * Tidy Signed-off-by: Thomas Newton * Autoformat Signed-off-by: Thomas Newton * Update error message Signed-off-by: Thomas Newton * Add helm paramater Signed-off-by: Thomas Newton * Update internal/controller/sparkapplication/controller.go Co-authored-by: Yi Chen Signed-off-by: Thomas Newton * Newlines between helm tests Signed-off-by: Thomas Newton --------- Signed-off-by: Thomas Newton Co-authored-by: Yi Chen (cherry picked from commit d815e78c2196ce2f34e61b8b6163cc3b30db2c44) * Use NSS_WRAPPER_PASSWD instead of /etc/passwd as in spark-operator image entrypoint.sh (#2312) Signed-off-by: Aakcht (cherry picked from commit 5dd91c4bf2bc32680ebf6c2df7b053c266497016) * Move sparkctl to cmd directory (#2347) * Move spark-operator Signed-off-by: Yi Chen * Move sparkctl to cmd directory Signed-off-by: Yi Chen * Remove unnecessary app package/directory Signed-off-by: Yi Chen --------- Signed-off-by: Yi Chen (cherry picked from commit 2375a306f9c0e85fb486ef58aa6d5aebba8dc8b2) * Spark Operator Official Release v2.1.0 Signed-off-by: Yi Chen --------- Signed-off-by: Yi Chen Co-authored-by: Aran Shavit Co-authored-by: Nicholas Gretzon <50811947+npgretz@users.noreply.github.com> Co-authored-by: Cian (Keen) Gallagher Co-authored-by: Jacob Salway Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: C. H. Afzal <41106172+c-h-afzal@users.noreply.github.com> Co-authored-by: Thomas Newton Co-authored-by: Aakcht --- .github/workflows/trivy-image-scanning.yaml | 2 +- Makefile | 6 +- VERSION | 2 +- charts/spark-operator-chart/Chart.yaml | 4 +- charts/spark-operator-chart/README.md | 18 ++- .../templates/controller/deployment.yaml | 4 + .../templates/controller/serviceaccount.yaml | 1 + .../templates/spark/serviceaccount.yaml | 1 + .../templates/webhook/deployment.yaml | 5 +- .../templates/webhook/serviceaccount.yaml | 1 + .../tests/controller/deployment_test.yaml | 24 +++ .../tests/webhook/deployment_test.yaml | 4 + charts/spark-operator-chart/values.yaml | 37 ++++- cmd/main.go | 31 ---- cmd/operator/controller/start.go | 21 ++- cmd/operator/{root.go => main.go} | 14 +- cmd/operator/webhook/start.go | 1 + {sparkctl => cmd/sparkctl}/README.md | 0 {sparkctl/cmd => cmd/sparkctl/app}/client.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/create.go | 2 +- .../cmd => cmd/sparkctl/app}/create_test.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/delete.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/event.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/forward.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/gcs.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/list.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/log.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/root.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/s3.go | 2 +- {sparkctl/cmd => cmd/sparkctl/app}/status.go | 2 +- .../app}/testdata/hadoop-conf/binary.dat | 0 .../app}/testdata/hadoop-conf/core-site.xml | 0 .../sparkctl/app}/testdata/test-app.yaml | 0 {sparkctl/cmd => cmd/sparkctl/app}/utils.go | 2 +- {sparkctl => cmd/sparkctl}/main.go | 4 +- entrypoint.sh | 32 ++-- go.mod | 10 +- go.sum | 20 +-- .../controller/sparkapplication/controller.go | 13 +- .../sparkapplication/controller_test.go | 143 ++++++++++++++++++ .../controller/sparkapplication/submission.go | 52 ++++--- internal/webhook/sparkpod_defaulter.go | 2 +- pkg/util/sparkapplication.go | 18 ++- pkg/util/sparkapplication_test.go | 28 ++++ 44 files changed, 387 insertions(+), 137 deletions(-) delete mode 100644 cmd/main.go rename cmd/operator/{root.go => main.go} (84%) rename {sparkctl => cmd/sparkctl}/README.md (100%) rename {sparkctl/cmd => cmd/sparkctl/app}/client.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/create.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/create_test.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/delete.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/event.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/forward.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/gcs.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/list.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/log.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/root.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/s3.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/status.go (99%) rename {sparkctl/cmd => cmd/sparkctl/app}/testdata/hadoop-conf/binary.dat (100%) rename {sparkctl/cmd => cmd/sparkctl/app}/testdata/hadoop-conf/core-site.xml (100%) rename {sparkctl/cmd => cmd/sparkctl/app}/testdata/test-app.yaml (100%) rename {sparkctl/cmd => cmd/sparkctl/app}/utils.go (98%) rename {sparkctl => cmd/sparkctl}/main.go (90%) diff --git a/.github/workflows/trivy-image-scanning.yaml b/.github/workflows/trivy-image-scanning.yaml index e0ba85c8b9..cacdd1c3fd 100644 --- a/.github/workflows/trivy-image-scanning.yaml +++ b/.github/workflows/trivy-image-scanning.yaml @@ -15,7 +15,7 @@ jobs: run: make print-IMAGE >> $GITHUB_ENV - name: trivy scan for github security tab - uses: aquasecurity/trivy-action@0.28.0 + uses: aquasecurity/trivy-action@0.29.0 with: image-ref: '${{ env.IMAGE }}' format: 'sarif' diff --git a/Makefile b/Makefile index 49a2713c14..255a9ba9d5 100644 --- a/Makefile +++ b/Makefile @@ -173,12 +173,12 @@ override LDFLAGS += \ .PHONY: build-operator build-operator: ## Build Spark operator. echo "Building spark-operator binary..." - go build -o $(SPARK_OPERATOR) -ldflags '${LDFLAGS}' cmd/main.go + go build -o $(SPARK_OPERATOR) -ldflags '${LDFLAGS}' cmd/operator/main.go .PHONY: build-sparkctl build-sparkctl: ## Build sparkctl binary. echo "Building sparkctl binary..." - CGO_ENABLED=0 go build -o $(SPARKCTL) -buildvcs=false sparkctl/main.go + CGO_ENABLED=0 go build -o $(SPARKCTL) -buildvcs=false cmd/sparkctl/main.go .PHONY: install-sparkctl install-sparkctl: build-sparkctl ## Install sparkctl binary. @@ -191,7 +191,7 @@ clean: ## Clean spark-operator and sparktcl binaries. rm -f $(SPARKCTL) .PHONY: build-api-docs -build-api-docs: gen-crd-api-reference-docs ## Build api documentaion. +build-api-docs: gen-crd-api-reference-docs ## Build api documentation. $(GEN_CRD_API_REFERENCE_DOCS) \ -config hack/api-docs/config.json \ -api-dir github.com/kubeflow/spark-operator/api/v1beta2 \ diff --git a/VERSION b/VERSION index 29ef23ac1d..852700e118 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v2.1.0-rc.0 \ No newline at end of file +v2.1.0 \ No newline at end of file diff --git a/charts/spark-operator-chart/Chart.yaml b/charts/spark-operator-chart/Chart.yaml index 5172f7f8cd..5f9f42b0b4 100644 --- a/charts/spark-operator-chart/Chart.yaml +++ b/charts/spark-operator-chart/Chart.yaml @@ -20,9 +20,9 @@ name: spark-operator description: A Helm chart for Spark on Kubernetes operator. -version: 2.1.0-rc.0 +version: 2.1.0 -appVersion: 2.1.0-rc.0 +appVersion: 2.1.0 keywords: - apache spark diff --git a/charts/spark-operator-chart/README.md b/charts/spark-operator-chart/README.md index 7d772030c8..8cdc5d16b1 100644 --- a/charts/spark-operator-chart/README.md +++ b/charts/spark-operator-chart/README.md @@ -1,6 +1,6 @@ # spark-operator -![Version: 2.1.0-rc.0](https://img.shields.io/badge/Version-2.1.0--rc.0-informational?style=flat-square) ![AppVersion: 2.1.0-rc.0](https://img.shields.io/badge/AppVersion-2.1.0--rc.0-informational?style=flat-square) +![Version: 2.1.0](https://img.shields.io/badge/Version-2.1.0-informational?style=flat-square) ![AppVersion: 2.1.0](https://img.shields.io/badge/AppVersion-2.1.0-informational?style=flat-square) A Helm chart for Spark on Kubernetes operator. @@ -86,6 +86,7 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | controller.replicas | int | `1` | Number of replicas of controller. | | controller.workers | int | `10` | Reconcile concurrency, higher values might increase memory usage. | | controller.logLevel | string | `"info"` | Configure the verbosity of logging, can be one of `debug`, `info`, `error`. | +| controller.driverPodCreationGracePeriod | string | `"10s"` | Grace period after a successful spark-submit when driver pod not found errors will be retried. Useful if the driver pod can take some time to be created. | | controller.maxTrackedExecutorPerApp | int | `1000` | Specifies the maximum number of Executor pods that can be tracked by the controller per SparkApplication. | | controller.uiService.enable | bool | `true` | Specifies whether to create service for Spark web UI. | | controller.uiIngress.enable | bool | `false` | Specifies whether to create ingress for Spark web UI. `controller.uiService.enable` must be `true` to enable ingress. | @@ -97,11 +98,12 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | controller.serviceAccount.create | bool | `true` | Specifies whether to create a service account for the controller. | | controller.serviceAccount.name | string | `""` | Optional name for the controller service account. | | controller.serviceAccount.annotations | object | `{}` | Extra annotations for the controller service account. | +| controller.serviceAccount.automountServiceAccountToken | bool | `true` | Auto-mount service account token to the controller pods. | | controller.rbac.create | bool | `true` | Specifies whether to create RBAC resources for the controller. | | controller.rbac.annotations | object | `{}` | Extra annotations for the controller RBAC resources. | | controller.labels | object | `{}` | Extra labels for controller pods. | | controller.annotations | object | `{}` | Extra annotations for controller pods. | -| controller.volumes | list | `[]` | Volumes for controller pods. | +| controller.volumes | list | `[{"emptyDir":{"sizeLimit":"1Gi"},"name":"tmp"}]` | Volumes for controller pods. | | controller.nodeSelector | object | `{}` | Node selector for controller pods. | | controller.affinity | object | `{}` | Affinity for controller pods. | | controller.tolerations | list | `[]` | List of node taints to tolerate for controller pods. | @@ -110,9 +112,9 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | controller.topologySpreadConstraints | list | `[]` | Topology spread constraints rely on node labels to identify the topology domain(s) that each Node is in. Ref: [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The labelSelector field in topology spread constraint will be set to the selector labels for controller pods if not specified. | | controller.env | list | `[]` | Environment variables for controller containers. | | controller.envFrom | list | `[]` | Environment variable sources for controller containers. | -| controller.volumeMounts | list | `[]` | Volume mounts for controller containers. | +| controller.volumeMounts | list | `[{"mountPath":"/tmp","name":"tmp","readOnly":false}]` | Volume mounts for controller containers. | | controller.resources | object | `{}` | Pod resource requests and limits for controller containers. Note, that each job submission will spawn a JVM within the controller pods using "/usr/local/openjdk-11/bin/java -Xmx128m". Kubernetes may kill these Java processes at will to enforce resource limits. When that happens, you will see the following error: 'failed to run spark-submit for SparkApplication [...]: signal: killed' - when this happens, you may want to increase memory limits. | -| controller.securityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"privileged":false,"runAsNonRoot":true}` | Security context for controller containers. | +| controller.securityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"privileged":false,"readOnlyRootFilesystem":true,"runAsNonRoot":true}` | Security context for controller containers. | | controller.sidecars | list | `[]` | Sidecar containers for controller pods. | | controller.podDisruptionBudget.enable | bool | `false` | Specifies whether to create pod disruption budget for controller. Ref: [Specifying a Disruption Budget for your Application](https://kubernetes.io/docs/tasks/run-application/configure-pdb/) | | controller.podDisruptionBudget.minAvailable | int | `1` | The number of pods that must be available. Require `controller.replicas` to be greater than 1 | @@ -134,12 +136,13 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | webhook.serviceAccount.create | bool | `true` | Specifies whether to create a service account for the webhook. | | webhook.serviceAccount.name | string | `""` | Optional name for the webhook service account. | | webhook.serviceAccount.annotations | object | `{}` | Extra annotations for the webhook service account. | +| webhook.serviceAccount.automountServiceAccountToken | bool | `true` | Auto-mount service account token to the webhook pods. | | webhook.rbac.create | bool | `true` | Specifies whether to create RBAC resources for the webhook. | | webhook.rbac.annotations | object | `{}` | Extra annotations for the webhook RBAC resources. | | webhook.labels | object | `{}` | Extra labels for webhook pods. | | webhook.annotations | object | `{}` | Extra annotations for webhook pods. | | webhook.sidecars | list | `[]` | Sidecar containers for webhook pods. | -| webhook.volumes | list | `[]` | Volumes for webhook pods. | +| webhook.volumes | list | `[{"emptyDir":{"sizeLimit":"500Mi"},"name":"serving-certs"}]` | Volumes for webhook pods. | | webhook.nodeSelector | object | `{}` | Node selector for webhook pods. | | webhook.affinity | object | `{}` | Affinity for webhook pods. | | webhook.tolerations | list | `[]` | List of node taints to tolerate for webhook pods. | @@ -148,15 +151,16 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | webhook.topologySpreadConstraints | list | `[]` | Topology spread constraints rely on node labels to identify the topology domain(s) that each Node is in. Ref: [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The labelSelector field in topology spread constraint will be set to the selector labels for webhook pods if not specified. | | webhook.env | list | `[]` | Environment variables for webhook containers. | | webhook.envFrom | list | `[]` | Environment variable sources for webhook containers. | -| webhook.volumeMounts | list | `[]` | Volume mounts for webhook containers. | +| webhook.volumeMounts | list | `[{"mountPath":"/etc/k8s-webhook-server/serving-certs","name":"serving-certs","readOnly":false,"subPath":"serving-certs"}]` | Volume mounts for webhook containers. | | webhook.resources | object | `{}` | Pod resource requests and limits for webhook pods. | -| webhook.securityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"privileged":false,"runAsNonRoot":true}` | Security context for webhook containers. | +| webhook.securityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"privileged":false,"readOnlyRootFilesystem":true,"runAsNonRoot":true}` | Security context for webhook containers. | | webhook.podDisruptionBudget.enable | bool | `false` | Specifies whether to create pod disruption budget for webhook. Ref: [Specifying a Disruption Budget for your Application](https://kubernetes.io/docs/tasks/run-application/configure-pdb/) | | webhook.podDisruptionBudget.minAvailable | int | `1` | The number of pods that must be available. Require `webhook.replicas` to be greater than 1 | | spark.jobNamespaces | list | `["default"]` | List of namespaces where to run spark jobs. If empty string is included, all namespaces will be allowed. Make sure the namespaces have already existed. | | spark.serviceAccount.create | bool | `true` | Specifies whether to create a service account for spark applications. | | spark.serviceAccount.name | string | `""` | Optional name for the spark service account. | | spark.serviceAccount.annotations | object | `{}` | Optional annotations for the spark service account. | +| spark.serviceAccount.automountServiceAccountToken | bool | `true` | Auto-mount service account token to the spark applications pods. | | spark.rbac.create | bool | `true` | Specifies whether to create RBAC resources for spark applications. | | spark.rbac.annotations | object | `{}` | Optional annotations for the spark application RBAC resources. | | prometheus.metrics.enable | bool | `true` | Specifies whether to enable prometheus metrics scraping. | diff --git a/charts/spark-operator-chart/templates/controller/deployment.yaml b/charts/spark-operator-chart/templates/controller/deployment.yaml index 2a1fd5f19e..9cb27b366a 100644 --- a/charts/spark-operator-chart/templates/controller/deployment.yaml +++ b/charts/spark-operator-chart/templates/controller/deployment.yaml @@ -100,6 +100,9 @@ spec: {{- if .Values.controller.workqueueRateLimiter.maxDelay.enable }} - --workqueue-ratelimiter-max-delay={{ .Values.controller.workqueueRateLimiter.maxDelay.duration }} {{- end }} + {{- if .Values.controller.driverPodCreationGracePeriod }} + - --driver-pod-creation-grace-period={{ .Values.controller.driverPodCreationGracePeriod }} + {{- end }} {{- if .Values.controller.maxTrackedExecutorPerApp }} - --max-tracked-executor-per-app={{ .Values.controller.maxTrackedExecutorPerApp }} {{- end }} @@ -171,6 +174,7 @@ spec: priorityClassName: {{ . }} {{- end }} serviceAccountName: {{ include "spark-operator.controller.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.controller.serviceAccount.automountServiceAccountToken }} {{- with .Values.controller.podSecurityContext }} securityContext: {{- toYaml . | nindent 8 }} diff --git a/charts/spark-operator-chart/templates/controller/serviceaccount.yaml b/charts/spark-operator-chart/templates/controller/serviceaccount.yaml index 49c2358695..ead82d4fd7 100644 --- a/charts/spark-operator-chart/templates/controller/serviceaccount.yaml +++ b/charts/spark-operator-chart/templates/controller/serviceaccount.yaml @@ -17,6 +17,7 @@ limitations under the License. {{- if .Values.controller.serviceAccount.create }} apiVersion: v1 kind: ServiceAccount +automountServiceAccountToken: {{ .Values.controller.serviceAccount.automountServiceAccountToken }} metadata: name: {{ include "spark-operator.controller.serviceAccountName" . }} namespace: {{ .Release.Namespace }} diff --git a/charts/spark-operator-chart/templates/spark/serviceaccount.yaml b/charts/spark-operator-chart/templates/spark/serviceaccount.yaml index de24d801ed..e6f34b3c87 100644 --- a/charts/spark-operator-chart/templates/spark/serviceaccount.yaml +++ b/charts/spark-operator-chart/templates/spark/serviceaccount.yaml @@ -21,6 +21,7 @@ limitations under the License. --- apiVersion: v1 kind: ServiceAccount +automountServiceAccountToken: {{ $.Values.spark.serviceAccount.automountServiceAccountToken }} metadata: name: {{ include "spark-operator.spark.serviceAccountName" $ }} namespace: {{ $jobNamespace }} diff --git a/charts/spark-operator-chart/templates/webhook/deployment.yaml b/charts/spark-operator-chart/templates/webhook/deployment.yaml index ae5167a6e8..d3fa5a71d5 100644 --- a/charts/spark-operator-chart/templates/webhook/deployment.yaml +++ b/charts/spark-operator-chart/templates/webhook/deployment.yaml @@ -94,7 +94,7 @@ spec: {{- end }} {{- with .Values.webhook.volumeMounts }} volumeMounts: - {{- toYaml . | nindent 10 }} + {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.webhook.resources }} resources: @@ -123,7 +123,7 @@ spec: {{- end }} {{- with .Values.webhook.volumes }} volumes: - {{- toYaml . | nindent 8 }} + {{- toYaml . | nindent 6 }} {{- end }} {{- with .Values.webhook.nodeSelector }} nodeSelector: @@ -141,6 +141,7 @@ spec: priorityClassName: {{ . }} {{- end }} serviceAccountName: {{ include "spark-operator.webhook.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.webhook.serviceAccount.automountServiceAccountToken }} {{- with .Values.webhook.podSecurityContext }} securityContext: {{- toYaml . | nindent 8 }} diff --git a/charts/spark-operator-chart/templates/webhook/serviceaccount.yaml b/charts/spark-operator-chart/templates/webhook/serviceaccount.yaml index fea4a6bbe4..63c5044e20 100644 --- a/charts/spark-operator-chart/templates/webhook/serviceaccount.yaml +++ b/charts/spark-operator-chart/templates/webhook/serviceaccount.yaml @@ -18,6 +18,7 @@ limitations under the License. {{- if .Values.webhook.serviceAccount.create -}} apiVersion: v1 kind: ServiceAccount +automountServiceAccountToken: {{ .Values.webhook.serviceAccount.automountServiceAccountToken }} metadata: name: {{ include "spark-operator.webhook.serviceAccountName" . }} namespace: {{ .Release.Namespace }} diff --git a/charts/spark-operator-chart/tests/controller/deployment_test.yaml b/charts/spark-operator-chart/tests/controller/deployment_test.yaml index 67c475da40..b5fdc7b10a 100644 --- a/charts/spark-operator-chart/tests/controller/deployment_test.yaml +++ b/charts/spark-operator-chart/tests/controller/deployment_test.yaml @@ -355,16 +355,30 @@ tests: set: controller: securityContext: + readOnlyRootFilesystem: true runAsUser: 1000 runAsGroup: 2000 fsGroup: 3000 + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true + privileged: false asserts: - equal: path: spec.template.spec.containers[0].securityContext value: + readOnlyRootFilesystem: true runAsUser: 1000 runAsGroup: 2000 fsGroup: 3000 + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true + privileged: false - it: Should add sidecars if `controller.sidecars` is set set: @@ -637,6 +651,16 @@ tests: - notContains: path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args content: --workqueue-ratelimiter-max-delay=1h + + - it: Should contain `driver-pod-creation-grace-period` arg if `controller.driverPodCreationGracePeriod` is set + set: + controller: + driverPodCreationGracePeriod: 30s + asserts: + - contains: + path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args + content: --driver-pod-creation-grace-period=30s + - it: Should contain `--max-tracked-executor-per-app` arg if `controller.maxTrackedExecutorPerApp` is set set: controller: diff --git a/charts/spark-operator-chart/tests/webhook/deployment_test.yaml b/charts/spark-operator-chart/tests/webhook/deployment_test.yaml index bf6bc03c8b..7715cb2400 100644 --- a/charts/spark-operator-chart/tests/webhook/deployment_test.yaml +++ b/charts/spark-operator-chart/tests/webhook/deployment_test.yaml @@ -299,10 +299,14 @@ tests: set: webhook: securityContext: + readOnlyRootFilesystem: true runAsUser: 1000 runAsGroup: 2000 fsGroup: 3000 asserts: + - equal: + path: spec.template.spec.containers[0].securityContext.readOnlyRootFilesystem + value: true - equal: path: spec.template.spec.containers[0].securityContext.runAsUser value: 1000 diff --git a/charts/spark-operator-chart/values.yaml b/charts/spark-operator-chart/values.yaml index 9032087c61..b376b4968a 100644 --- a/charts/spark-operator-chart/values.yaml +++ b/charts/spark-operator-chart/values.yaml @@ -51,6 +51,9 @@ controller: # -- Configure the verbosity of logging, can be one of `debug`, `info`, `error`. logLevel: info + # -- Grace period after a successful spark-submit when driver pod not found errors will be retried. Useful if the driver pod can take some time to be created. + driverPodCreationGracePeriod: 10s + # -- Specifies the maximum number of Executor pods that can be tracked by the controller per SparkApplication. maxTrackedExecutorPerApp: 1000 @@ -87,6 +90,8 @@ controller: name: "" # -- Extra annotations for the controller service account. annotations: {} + # -- Auto-mount service account token to the controller pods. + automountServiceAccountToken: true rbac: # -- Specifies whether to create RBAC resources for the controller. @@ -105,7 +110,11 @@ controller: # key2: value2 # -- Volumes for controller pods. - volumes: [] + volumes: + # Create a tmp directory to write Spark artifacts to for deployed Spark apps. + - name: tmp + emptyDir: + sizeLimit: 1Gi # -- Node selector for controller pods. nodeSelector: {} @@ -141,7 +150,11 @@ controller: envFrom: [] # -- Volume mounts for controller containers. - volumeMounts: [] + volumeMounts: + # Mount a tmp directory to write Spark artifacts to for deployed Spark apps. + - name: tmp + mountPath: "/tmp" + readOnly: false # -- Pod resource requests and limits for controller containers. # Note, that each job submission will spawn a JVM within the controller pods using "/usr/local/openjdk-11/bin/java -Xmx128m". @@ -157,6 +170,7 @@ controller: # -- Security context for controller containers. securityContext: + readOnlyRootFilesystem: true privileged: false allowPrivilegeEscalation: false runAsNonRoot: true @@ -231,6 +245,8 @@ webhook: name: "" # -- Extra annotations for the webhook service account. annotations: {} + # -- Auto-mount service account token to the webhook pods. + automountServiceAccountToken: true rbac: # -- Specifies whether to create RBAC resources for the webhook. @@ -252,7 +268,11 @@ webhook: sidecars: [] # -- Volumes for webhook pods. - volumes: [] + volumes: + # Create a dir for the webhook to generate its certificates in. + - name: serving-certs + emptyDir: + sizeLimit: 500Mi # -- Node selector for webhook pods. nodeSelector: {} @@ -288,7 +308,13 @@ webhook: envFrom: [] # -- Volume mounts for webhook containers. - volumeMounts: [] + volumeMounts: + # Mount a dir for the webhook to generate its certificates in. + - name: serving-certs + mountPath: /etc/k8s-webhook-server/serving-certs + subPath: serving-certs + readOnly: false + # -- Pod resource requests and limits for webhook pods. resources: {} @@ -301,6 +327,7 @@ webhook: # -- Security context for webhook containers. securityContext: + readOnlyRootFilesystem: true privileged: false allowPrivilegeEscalation: false runAsNonRoot: true @@ -331,6 +358,8 @@ spark: name: "" # -- Optional annotations for the spark service account. annotations: {} + # -- Auto-mount service account token to the spark applications pods. + automountServiceAccountToken: true rbac: # -- Specifies whether to create RBAC resources for spark applications. diff --git a/cmd/main.go b/cmd/main.go deleted file mode 100644 index 38085497b7..0000000000 --- a/cmd/main.go +++ /dev/null @@ -1,31 +0,0 @@ -/* -Copyright 2024 The Kubeflow authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "fmt" - "os" - - "github.com/kubeflow/spark-operator/cmd/operator" -) - -func main() { - if err := operator.NewCommand().Execute(); err != nil { - fmt.Fprintf(os.Stderr, "%v\n", err) - os.Exit(1) - } -} diff --git a/cmd/operator/controller/start.go b/cmd/operator/controller/start.go index 01d84b8120..3a2cd0fb47 100644 --- a/cmd/operator/controller/start.go +++ b/cmd/operator/controller/start.go @@ -100,6 +100,8 @@ var ( leaderElectionRenewDeadline time.Duration leaderElectionRetryPeriod time.Duration + driverPodCreationGracePeriod time.Duration + // Metrics enableMetrics bool metricsBindAddress string @@ -163,6 +165,8 @@ func NewStartCommand() *cobra.Command { command.Flags().DurationVar(&leaderElectionRenewDeadline, "leader-election-renew-deadline", 14*time.Second, "Leader election renew deadline.") command.Flags().DurationVar(&leaderElectionRetryPeriod, "leader-election-retry-period", 4*time.Second, "Leader election retry period.") + command.Flags().DurationVar(&driverPodCreationGracePeriod, "driver-pod-creation-grace-period", 10*time.Second, "Grace period after a successful spark-submit when driver pod not found errors will be retried. Useful if the driver pod can take some time to be created.") + command.Flags().BoolVar(&enableMetrics, "enable-metrics", false, "Enable metrics.") command.Flags().StringVar(&metricsBindAddress, "metrics-bind-address", "0", "The address the metric endpoint binds to. "+ "Use the port :8080. If not set, it will be 0 in order to disable the metrics server") @@ -394,14 +398,15 @@ func newSparkApplicationReconcilerOptions() sparkapplication.Options { sparkExecutorMetrics.Register() } options := sparkapplication.Options{ - Namespaces: namespaces, - EnableUIService: enableUIService, - IngressClassName: ingressClassName, - IngressURLFormat: ingressURLFormat, - DefaultBatchScheduler: defaultBatchScheduler, - SparkApplicationMetrics: sparkApplicationMetrics, - SparkExecutorMetrics: sparkExecutorMetrics, - MaxTrackedExecutorPerApp: maxTrackedExecutorPerApp, + Namespaces: namespaces, + EnableUIService: enableUIService, + IngressClassName: ingressClassName, + IngressURLFormat: ingressURLFormat, + DefaultBatchScheduler: defaultBatchScheduler, + DriverPodCreationGracePeriod: driverPodCreationGracePeriod, + SparkApplicationMetrics: sparkApplicationMetrics, + SparkExecutorMetrics: sparkExecutorMetrics, + MaxTrackedExecutorPerApp: maxTrackedExecutorPerApp, } if enableBatchScheduler { options.KubeSchedulerNames = kubeSchedulerNames diff --git a/cmd/operator/root.go b/cmd/operator/main.go similarity index 84% rename from cmd/operator/root.go rename to cmd/operator/main.go index 2ddaa900d8..a6d41c002d 100644 --- a/cmd/operator/root.go +++ b/cmd/operator/main.go @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - https://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -14,9 +14,12 @@ See the License for the specific language governing permissions and limitations under the License. */ -package operator +package main import ( + "fmt" + "os" + "github.com/spf13/cobra" "github.com/kubeflow/spark-operator/cmd/operator/controller" @@ -37,3 +40,10 @@ func NewCommand() *cobra.Command { command.AddCommand(version.NewCommand()) return command } + +func main() { + if err := NewCommand().Execute(); err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + os.Exit(1) + } +} diff --git a/cmd/operator/webhook/start.go b/cmd/operator/webhook/start.go index e3135bfbb8..52586edee6 100644 --- a/cmd/operator/webhook/start.go +++ b/cmd/operator/webhook/start.go @@ -198,6 +198,7 @@ func start() { Port: webhookPort, CertDir: webhookCertDir, CertName: webhookCertName, + KeyName: webhookKeyName, TLSOpts: tlsOptions, }), HealthProbeBindAddress: healthProbeBindAddress, diff --git a/sparkctl/README.md b/cmd/sparkctl/README.md similarity index 100% rename from sparkctl/README.md rename to cmd/sparkctl/README.md diff --git a/sparkctl/cmd/client.go b/cmd/sparkctl/app/client.go similarity index 99% rename from sparkctl/cmd/client.go rename to cmd/sparkctl/app/client.go index e22d26afa1..0ab01ad876 100644 --- a/sparkctl/cmd/client.go +++ b/cmd/sparkctl/app/client.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "context" diff --git a/sparkctl/cmd/create.go b/cmd/sparkctl/app/create.go similarity index 99% rename from sparkctl/cmd/create.go rename to cmd/sparkctl/app/create.go index eddad79257..9509b9d7e4 100644 --- a/sparkctl/cmd/create.go +++ b/cmd/sparkctl/app/create.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "context" diff --git a/sparkctl/cmd/create_test.go b/cmd/sparkctl/app/create_test.go similarity index 99% rename from sparkctl/cmd/create_test.go rename to cmd/sparkctl/app/create_test.go index aa3d89615d..28d62fddac 100644 --- a/sparkctl/cmd/create_test.go +++ b/cmd/sparkctl/app/create_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "strings" diff --git a/sparkctl/cmd/delete.go b/cmd/sparkctl/app/delete.go similarity index 99% rename from sparkctl/cmd/delete.go rename to cmd/sparkctl/app/delete.go index f75dc65df5..06021b2716 100644 --- a/sparkctl/cmd/delete.go +++ b/cmd/sparkctl/app/delete.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "context" diff --git a/sparkctl/cmd/event.go b/cmd/sparkctl/app/event.go similarity index 99% rename from sparkctl/cmd/event.go rename to cmd/sparkctl/app/event.go index 63a0a5b880..37ced1ad0a 100644 --- a/sparkctl/cmd/event.go +++ b/cmd/sparkctl/app/event.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "context" diff --git a/sparkctl/cmd/forward.go b/cmd/sparkctl/app/forward.go similarity index 99% rename from sparkctl/cmd/forward.go rename to cmd/sparkctl/app/forward.go index caeb7638f9..43b817daf8 100644 --- a/sparkctl/cmd/forward.go +++ b/cmd/sparkctl/app/forward.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "context" diff --git a/sparkctl/cmd/gcs.go b/cmd/sparkctl/app/gcs.go similarity index 99% rename from sparkctl/cmd/gcs.go rename to cmd/sparkctl/app/gcs.go index fc807f8927..5601497afb 100644 --- a/sparkctl/cmd/gcs.go +++ b/cmd/sparkctl/app/gcs.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "fmt" diff --git a/sparkctl/cmd/list.go b/cmd/sparkctl/app/list.go similarity index 99% rename from sparkctl/cmd/list.go rename to cmd/sparkctl/app/list.go index 5777d14db9..63c2bf3b8a 100644 --- a/sparkctl/cmd/list.go +++ b/cmd/sparkctl/app/list.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "context" diff --git a/sparkctl/cmd/log.go b/cmd/sparkctl/app/log.go similarity index 99% rename from sparkctl/cmd/log.go rename to cmd/sparkctl/app/log.go index c917ccceaf..9ccdf4a8fe 100644 --- a/sparkctl/cmd/log.go +++ b/cmd/sparkctl/app/log.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "context" diff --git a/sparkctl/cmd/root.go b/cmd/sparkctl/app/root.go similarity index 99% rename from sparkctl/cmd/root.go rename to cmd/sparkctl/app/root.go index 6ba1c5cae8..e845b8be89 100644 --- a/sparkctl/cmd/root.go +++ b/cmd/sparkctl/app/root.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "fmt" diff --git a/sparkctl/cmd/s3.go b/cmd/sparkctl/app/s3.go similarity index 99% rename from sparkctl/cmd/s3.go rename to cmd/sparkctl/app/s3.go index 28e9350ae1..4bdbeaa6c9 100644 --- a/sparkctl/cmd/s3.go +++ b/cmd/sparkctl/app/s3.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "context" diff --git a/sparkctl/cmd/status.go b/cmd/sparkctl/app/status.go similarity index 99% rename from sparkctl/cmd/status.go rename to cmd/sparkctl/app/status.go index cd773454a0..59d2a05faa 100644 --- a/sparkctl/cmd/status.go +++ b/cmd/sparkctl/app/status.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "fmt" diff --git a/sparkctl/cmd/testdata/hadoop-conf/binary.dat b/cmd/sparkctl/app/testdata/hadoop-conf/binary.dat similarity index 100% rename from sparkctl/cmd/testdata/hadoop-conf/binary.dat rename to cmd/sparkctl/app/testdata/hadoop-conf/binary.dat diff --git a/sparkctl/cmd/testdata/hadoop-conf/core-site.xml b/cmd/sparkctl/app/testdata/hadoop-conf/core-site.xml similarity index 100% rename from sparkctl/cmd/testdata/hadoop-conf/core-site.xml rename to cmd/sparkctl/app/testdata/hadoop-conf/core-site.xml diff --git a/sparkctl/cmd/testdata/test-app.yaml b/cmd/sparkctl/app/testdata/test-app.yaml similarity index 100% rename from sparkctl/cmd/testdata/test-app.yaml rename to cmd/sparkctl/app/testdata/test-app.yaml diff --git a/sparkctl/cmd/utils.go b/cmd/sparkctl/app/utils.go similarity index 98% rename from sparkctl/cmd/utils.go rename to cmd/sparkctl/app/utils.go index d2b2aa5050..0786c8a5ba 100644 --- a/sparkctl/cmd/utils.go +++ b/cmd/sparkctl/app/utils.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cmd +package app import ( "time" diff --git a/sparkctl/main.go b/cmd/sparkctl/main.go similarity index 90% rename from sparkctl/main.go rename to cmd/sparkctl/main.go index 80c89a81b6..4f0e00e654 100644 --- a/sparkctl/main.go +++ b/cmd/sparkctl/main.go @@ -19,9 +19,9 @@ package main import ( _ "k8s.io/client-go/plugin/pkg/client/auth" - "github.com/kubeflow/spark-operator/sparkctl/cmd" + "github.com/kubeflow/spark-operator/cmd/sparkctl/app" ) func main() { - cmd.Execute() + app.Execute() } diff --git a/entrypoint.sh b/entrypoint.sh index 0ca8730123..38ee7e9264 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -4,21 +4,23 @@ set -ex # Check whether there is a passwd entry for the container UID -uid=$(id -u) -gid=$(id -g) - -# turn off -e for getent because it will return error code in anonymous uid case -set +e -uidentry=$(getent passwd $uid) -set -e - -# If there is no passwd entry for the container UID, attempt to create one -if [[ -z "$uidentry" ]] ; then - if [[ -w /etc/passwd ]] ; then - echo "$uid:x:$uid:$gid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd - else - echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID" - fi +myuid="$(id -u)" +# If there is no passwd entry for the container UID, attempt to fake one +# You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 +# It's to resolve OpenShift random UID case. +# See also: https://github.com/docker-library/postgres/pull/448 +if ! getent passwd "$myuid" &> /dev/null; then + for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do + if [ -s "$wrapper" ]; then + NSS_WRAPPER_PASSWD="$(mktemp)" + NSS_WRAPPER_GROUP="$(mktemp)" + export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP + mygid="$(id -g)" + printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" + printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" + break + fi + done fi exec /usr/bin/tini -s -- /usr/bin/spark-operator "$@" diff --git a/go.mod b/go.mod index 11af87ff3b..df4ebf3746 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/golang/glog v1.2.2 github.com/google/uuid v1.6.0 github.com/olekukonko/tablewriter v0.0.5 - github.com/onsi/ginkgo/v2 v2.20.2 + github.com/onsi/ginkgo/v2 v2.22.0 github.com/onsi/gomega v1.34.2 github.com/prometheus/client_golang v1.20.5 github.com/robfig/cron/v3 v3.0.1 @@ -19,7 +19,7 @@ require ( github.com/stretchr/testify v1.9.0 go.uber.org/zap v1.27.0 gocloud.dev v0.40.0 - golang.org/x/mod v0.20.0 + golang.org/x/mod v0.21.0 golang.org/x/net v0.30.0 golang.org/x/time v0.7.0 helm.sh/helm/v3 v3.16.2 @@ -32,7 +32,7 @@ require ( sigs.k8s.io/controller-runtime v0.17.5 sigs.k8s.io/scheduler-plugins v0.29.8 sigs.k8s.io/yaml v1.4.0 - volcano.sh/apis v1.9.0 + volcano.sh/apis v1.10.0 ) require ( @@ -119,7 +119,7 @@ require ( github.com/google/gnostic-models v0.6.9-0.20230804172637-c7be7c783f49 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 // indirect + github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect github.com/google/s2a-go v0.1.8 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/wire v0.6.0 // indirect @@ -205,7 +205,7 @@ require ( golang.org/x/sys v0.26.0 // indirect golang.org/x/term v0.25.0 // indirect golang.org/x/text v0.19.0 // indirect - golang.org/x/tools v0.24.0 // indirect + golang.org/x/tools v0.26.0 // indirect golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/api v0.197.0 // indirect diff --git a/go.sum b/go.sum index 496567c078..d5efc72786 100644 --- a/go.sum +++ b/go.sum @@ -270,8 +270,8 @@ github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= -github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 h1:5iH8iuqE5apketRbSFBy+X1V0o+l+8NF1avt4HWl7cA= -github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= @@ -393,8 +393,8 @@ github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= -github.com/onsi/ginkgo/v2 v2.20.2 h1:7NVCeyIWROIAheY21RLS+3j2bb52W0W82tkberYytp4= -github.com/onsi/ginkgo/v2 v2.20.2/go.mod h1:K9gyxPIlb+aIvnZ8bd9Ak+YP18w3APlR+5coaZoE2ag= +github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= +github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= github.com/onsi/gomega v1.34.2 h1:pNCwDkzrsv7MS9kpaQvVb1aVLahQXyJ/Tv5oAZMI3i8= github.com/onsi/gomega v1.34.2/go.mod h1:v1xfxRgk0KIsG+QOdm7p8UosrOzPYRo60fd3B/1Dukc= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -555,8 +555,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= -golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -643,8 +643,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= -golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= -golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -749,5 +749,5 @@ sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+s sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= -volcano.sh/apis v1.9.0 h1:e+9yEbQOi6HvgaayAxYULT6n+59mkYvmqjKhp9Z06sY= -volcano.sh/apis v1.9.0/go.mod h1:yXNfsZRzAOq6EUyPJYFrlMorh1XsYQGonGWyr4IiznM= +volcano.sh/apis v1.10.0 h1:Z9eLwibQmhpFmYGLWxjsTWwsYeTEKvvjFcLptmP2qxE= +volcano.sh/apis v1.10.0/go.mod h1:z8hhFZ2qcUMR1JIjVYmBqL98CVaXNzsQAcqKiytQW9s= diff --git a/internal/controller/sparkapplication/controller.go b/internal/controller/sparkapplication/controller.go index e4cb78d248..5f3c6b79db 100644 --- a/internal/controller/sparkapplication/controller.go +++ b/internal/controller/sparkapplication/controller.go @@ -63,6 +63,8 @@ type Options struct { IngressURLFormat string DefaultBatchScheduler string + DriverPodCreationGracePeriod time.Duration + KubeSchedulerNames []string SparkApplicationMetrics *metrics.SparkApplicationMetrics @@ -773,10 +775,13 @@ func (r *Reconciler) updateDriverState(_ context.Context, app *v1beta2.SparkAppl } if driverPod == nil { - app.Status.AppState.State = v1beta2.ApplicationStateFailing - app.Status.AppState.ErrorMessage = "driver pod not found" - app.Status.TerminationTime = metav1.Now() - return nil + if app.Status.AppState.State != v1beta2.ApplicationStateSubmitted || metav1.Now().Sub(app.Status.LastSubmissionAttemptTime.Time) > r.options.DriverPodCreationGracePeriod { + app.Status.AppState.State = v1beta2.ApplicationStateFailing + app.Status.AppState.ErrorMessage = "driver pod not found" + app.Status.TerminationTime = metav1.Now() + return nil + } + return fmt.Errorf("driver pod not found, while inside the grace period. Grace period of %v expires at %v", r.options.DriverPodCreationGracePeriod, app.Status.LastSubmissionAttemptTime.Add(r.options.DriverPodCreationGracePeriod)) } app.Status.SparkApplicationID = util.GetSparkApplicationID(driverPod) diff --git a/internal/controller/sparkapplication/controller_test.go b/internal/controller/sparkapplication/controller_test.go index 66afe0ca4b..704ad4ee03 100644 --- a/internal/controller/sparkapplication/controller_test.go +++ b/internal/controller/sparkapplication/controller_test.go @@ -74,6 +74,149 @@ var _ = Describe("SparkApplication Controller", func() { }) }) + Context("When reconciling a submitted SparkApplication with no driver pod", func() { + ctx := context.Background() + appName := "test" + appNamespace := "default" + key := types.NamespacedName{ + Name: appName, + Namespace: appNamespace, + } + + BeforeEach(func() { + By("Creating a test SparkApplication") + app := &v1beta2.SparkApplication{} + if err := k8sClient.Get(ctx, key, app); err != nil && errors.IsNotFound(err) { + app = &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + Namespace: appNamespace, + }, + Spec: v1beta2.SparkApplicationSpec{ + MainApplicationFile: util.StringPtr("local:///dummy.jar"), + }, + } + v1beta2.SetSparkApplicationDefaults(app) + Expect(k8sClient.Create(ctx, app)).To(Succeed()) + + app.Status.AppState.State = v1beta2.ApplicationStateSubmitted + app.Status.DriverInfo.PodName = "non-existent-driver" + app.Status.LastSubmissionAttemptTime = metav1.NewTime(time.Now()) + Expect(k8sClient.Status().Update(ctx, app)).To(Succeed()) + } + }) + + AfterEach(func() { + app := &v1beta2.SparkApplication{} + Expect(k8sClient.Get(ctx, key, app)).To(Succeed()) + + By("Deleting the created test SparkApplication") + Expect(k8sClient.Delete(ctx, app)).To(Succeed()) + }) + + It("Should requeue submitted SparkApplication when driver pod not found inside the grace period", func() { + By("Reconciling the created test SparkApplication") + reconciler := sparkapplication.NewReconciler( + nil, + k8sClient.Scheme(), + k8sClient, + nil, + nil, + sparkapplication.Options{Namespaces: []string{appNamespace}, DriverPodCreationGracePeriod: 10 * time.Second}, + ) + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key}) + Expect(err).To(MatchError(ContainSubstring("driver pod not found, while inside the grace period. Grace period of"))) + app := &v1beta2.SparkApplication{} + Expect(k8sClient.Get(ctx, key, app)).To(Succeed()) + Expect(app.Status.AppState.State).To(Equal(v1beta2.ApplicationStateSubmitted)) + }) + + It("Should fail a SparkApplication when driver pod not found outside the grace period", func() { + By("Reconciling the created test SparkApplication") + reconciler := sparkapplication.NewReconciler( + nil, + k8sClient.Scheme(), + k8sClient, + nil, + nil, + sparkapplication.Options{Namespaces: []string{appNamespace}, DriverPodCreationGracePeriod: 0 * time.Second}, + ) + result, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key}) + Expect(err).NotTo(HaveOccurred()) + Expect(result.Requeue).To(BeFalse()) + + app := &v1beta2.SparkApplication{} + Expect(k8sClient.Get(ctx, key, app)).To(Succeed()) + Expect(app.Status.AppState.State).To(Equal(v1beta2.ApplicationStateFailing)) + }) + }) + + Context("When reconciling a SparkApplication with driver pod", func() { + ctx := context.Background() + appName := "test" + appNamespace := "default" + key := types.NamespacedName{ + Name: appName, + Namespace: appNamespace, + } + + BeforeEach(func() { + By("Creating a test SparkApplication") + app := &v1beta2.SparkApplication{} + if err := k8sClient.Get(ctx, key, app); err != nil && errors.IsNotFound(err) { + app = &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + Namespace: appNamespace, + }, + Spec: v1beta2.SparkApplicationSpec{ + MainApplicationFile: util.StringPtr("local:///dummy.jar"), + }, + } + v1beta2.SetSparkApplicationDefaults(app) + Expect(k8sClient.Create(ctx, app)).To(Succeed()) + + app.Status.AppState.State = v1beta2.ApplicationStateSubmitted + driverPod := createDriverPod(appName, appNamespace) + Expect(k8sClient.Create(ctx, driverPod)).To(Succeed()) + app.Status.DriverInfo.PodName = driverPod.Name + Expect(k8sClient.Status().Update(ctx, app)).To(Succeed()) + } + }) + + AfterEach(func() { + app := &v1beta2.SparkApplication{} + Expect(k8sClient.Get(ctx, key, app)).To(Succeed()) + + By("Deleting the created test SparkApplication") + Expect(k8sClient.Delete(ctx, app)).To(Succeed()) + + By("Deleting the driver pod") + driverPod := &corev1.Pod{} + Expect(k8sClient.Get(ctx, getDriverNamespacedName(appName, appNamespace), driverPod)).To(Succeed()) + Expect(k8sClient.Delete(ctx, driverPod)).To(Succeed()) + }) + + It("When reconciling a submitted SparkApplication when driver pod exists", func() { + By("Reconciling the created test SparkApplication") + reconciler := sparkapplication.NewReconciler( + nil, + k8sClient.Scheme(), + k8sClient, + nil, + nil, + sparkapplication.Options{Namespaces: []string{appNamespace}, DriverPodCreationGracePeriod: 0 * time.Second}, + ) + result, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key}) + Expect(err).NotTo(HaveOccurred()) + Expect(result.Requeue).To(BeFalse()) + + app := &v1beta2.SparkApplication{} + Expect(k8sClient.Get(ctx, key, app)).To(Succeed()) + Expect(app.Status.AppState.State).To(Equal(v1beta2.ApplicationStateSubmitted)) + }) + }) + Context("When reconciling a completed SparkApplication", func() { ctx := context.Background() appName := "test" diff --git a/internal/controller/sparkapplication/submission.go b/internal/controller/sparkapplication/submission.go index d0bb6f5781..3c2479591c 100644 --- a/internal/controller/sparkapplication/submission.go +++ b/internal/controller/sparkapplication/submission.go @@ -502,20 +502,22 @@ func driverVolumeMountsOption(app *v1beta2.SparkApplication) ([]string, error) { switch volumeType { case common.VolumeTypeEmptyDir: - args = append( - args, - "--conf", - fmt.Sprintf( - "%s=%s", + if volume.EmptyDir.SizeLimit != nil { + args = append( + args, + "--conf", fmt.Sprintf( - common.SparkKubernetesDriverVolumesOptionsTemplate, - common.VolumeTypeEmptyDir, - volume.Name, - "sizeLimit", + "%s=%s", + fmt.Sprintf( + common.SparkKubernetesDriverVolumesOptionsTemplate, + common.VolumeTypeEmptyDir, + volume.Name, + "sizeLimit", + ), + volume.EmptyDir.SizeLimit.String(), ), - volume.EmptyDir.SizeLimit.String(), - ), - ) + ) + } case common.VolumeTypeHostPath: args = append( args, @@ -838,20 +840,22 @@ func executorVolumeMountsOption(app *v1beta2.SparkApplication) ([]string, error) } switch volumeType { case common.VolumeTypeEmptyDir: - args = append( - args, - "--conf", - fmt.Sprintf( - "%s=%s", + if volume.EmptyDir.SizeLimit != nil { + args = append( + args, + "--conf", fmt.Sprintf( - common.SparkKubernetesExecutorVolumesOptionsTemplate, - common.VolumeTypeEmptyDir, - volume.Name, - "sizeLimit", + "%s=%s", + fmt.Sprintf( + common.SparkKubernetesExecutorVolumesOptionsTemplate, + common.VolumeTypeEmptyDir, + volume.Name, + "sizeLimit", + ), + volume.EmptyDir.SizeLimit.String(), ), - volume.EmptyDir.SizeLimit.String(), - ), - ) + ) + } case common.VolumeTypeHostPath: args = append( args, diff --git a/internal/webhook/sparkpod_defaulter.go b/internal/webhook/sparkpod_defaulter.go index add324b196..53e717ff32 100644 --- a/internal/webhook/sparkpod_defaulter.go +++ b/internal/webhook/sparkpod_defaulter.go @@ -537,7 +537,7 @@ func addContainerSecurityContext(pod *corev1.Pod, app *v1beta2.SparkApplication) if i < 0 { return fmt.Errorf("executor container not found in pod") } - if app.Spec.Driver.SecurityContext == nil { + if app.Spec.Executor.SecurityContext == nil { return nil } pod.Spec.Containers[i].SecurityContext = app.Spec.Executor.SecurityContext diff --git a/pkg/util/sparkapplication.go b/pkg/util/sparkapplication.go index a0aadd93f0..65bdb26922 100644 --- a/pkg/util/sparkapplication.go +++ b/pkg/util/sparkapplication.go @@ -17,6 +17,7 @@ limitations under the License. package util import ( + "crypto/md5" "fmt" "reflect" "strings" @@ -155,12 +156,25 @@ func GetExecutorLocalVolumeMounts(app *v1beta2.SparkApplication) []corev1.Volume return volumeMounts } +func generateName(name, suffix string) string { + // Some resource names are used as DNS labels, so must be 63 characters or shorter + preferredName := fmt.Sprintf("%s-%s", name, suffix) + if len(preferredName) <= 63 { + return preferredName + } + + // Truncate the name and append a hash to ensure uniqueness while staying below the limit + maxNameLength := 63 - len(suffix) - 10 // 8 for the hash, 2 for the dash + hash := fmt.Sprintf("%x", md5.Sum([]byte(preferredName))) + return fmt.Sprintf("%s-%s-%s", name[:maxNameLength], hash[:8], suffix) +} + func GetDefaultUIServiceName(app *v1beta2.SparkApplication) string { - return fmt.Sprintf("%s-ui-svc", app.Name) + return generateName(app.Name, "ui-svc") } func GetDefaultUIIngressName(app *v1beta2.SparkApplication) string { - return fmt.Sprintf("%s-ui-ingress", app.Name) + return generateName(app.Name, "ui-ingress") } func GetResourceLabels(app *v1beta2.SparkApplication) map[string]string { diff --git a/pkg/util/sparkapplication_test.go b/pkg/util/sparkapplication_test.go index 7f0ab4a464..3aaa57e82f 100644 --- a/pkg/util/sparkapplication_test.go +++ b/pkg/util/sparkapplication_test.go @@ -284,6 +284,20 @@ var _ = Describe("GetDefaultUIServiceName", func() { It("Should return the default UI service name", func() { Expect(util.GetDefaultUIServiceName(app)).To(Equal("test-app-ui-svc")) }) + + appWithLongName := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app-with-a-long-name-that-would-be-over-63-characters", + Namespace: "test-namespace", + }, + } + + It("Should truncate the app name so the service name is below 63 characters", func() { + serviceName := util.GetDefaultUIServiceName(appWithLongName) + Expect(len(serviceName)).To(BeNumerically("<=", 63)) + Expect(serviceName).To(HavePrefix(appWithLongName.Name[:47])) + Expect(serviceName).To(HaveSuffix("-ui-svc")) + }) }) var _ = Describe("GetDefaultUIIngressName", func() { @@ -297,6 +311,20 @@ var _ = Describe("GetDefaultUIIngressName", func() { It("Should return the default UI ingress name", func() { Expect(util.GetDefaultUIIngressName(app)).To(Equal("test-app-ui-ingress")) }) + + appWithLongName := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-app-with-a-long-name-that-would-be-over-63-characters", + Namespace: "test-namespace", + }, + } + + It("Should truncate the app name so the ingress name is below 63 characters", func() { + serviceName := util.GetDefaultUIIngressName(appWithLongName) + Expect(len(serviceName)).To(BeNumerically("<=", 63)) + Expect(serviceName).To(HavePrefix(appWithLongName.Name[:42])) + Expect(serviceName).To(HaveSuffix("-ui-ingress")) + }) }) var _ = Describe("IsDriverTerminated", func() {