diff --git a/Dockerfile b/Dockerfile index 93dc26c90..dad00404f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,8 @@ ARG TARGETOS ARG TARGETARCH +WORKDIR /root + RUN apt-get update \ && apt-get install -y tini \ && rm -rf /var/lib/apt/lists/* @@ -32,7 +34,7 @@ RUN set -eux && \ tar -zxvf arena-installer.tar.gz && \ mv arena-installer-*-${TARGETOS}-${TARGETARCH} arena-installer && \ arena-installer/install.sh --only-binary && \ - rm -rf arena-installer.tar.gz arena-installer + rm -rf arena-installer.tar.gz COPY entrypoint.sh /usr/local/bin/ diff --git a/VERSION b/VERSION index 78bc1abd1..71172b43a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.10.0 +0.10.1 \ No newline at end of file diff --git a/arena-artifacts/Chart.yaml b/arena-artifacts/Chart.yaml index 879c01aeb..c520cb7e4 100644 --- a/arena-artifacts/Chart.yaml +++ b/arena-artifacts/Chart.yaml @@ -15,13 +15,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.10.0 +version: 0.10.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: 0.10.0 +appVersion: 0.10.1 dependencies: - name: tf-operator diff --git a/arena-artifacts/all_crds/v1/tf-operator/kubeflow.org_tfjobs_v1.yaml b/arena-artifacts/all_crds/v1/tf-operator/kubeflow.org_tfjobs_v1.yaml index 87307d6ba..1d55199c1 100644 --- a/arena-artifacts/all_crds/v1/tf-operator/kubeflow.org_tfjobs_v1.yaml +++ b/arena-artifacts/all_crds/v1/tf-operator/kubeflow.org_tfjobs_v1.yaml @@ -6,7 +6,7 @@ metadata: controller-gen.kubebuilder.io/version: v0.15.0 git-repo: https://github.com/AliyunContainerService/tf-operator git-branch: v1.0-aliyun-branch - git-commit: a0bb318635fbf624d277e6981e62ba0b46e20ee2 + git-commit: c36c43433bccfa740b1dec5e0e7cd4091d08fd27 name: tfjobs.kubeflow.org spec: group: kubeflow.org @@ -17,7 +17,14 @@ spec: singular: tfjob scope: Namespaced versions: - - name: v1 + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: State + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: Represents a TFJob resource. @@ -4358,3 +4365,5 @@ spec: type: object served: true storage: true + subresources: + status: {} diff --git a/arena-artifacts/values.yaml b/arena-artifacts/values.yaml index 002fc67e0..67663c296 100644 --- a/arena-artifacts/values.yaml +++ b/arena-artifacts/values.yaml @@ -7,7 +7,7 @@ global: # pull image by aliyun vpc network pullImageByVPCNetwork: false # the prefix of all image - imagePrefix: registry.cn-zhangjiakou.aliyuncs.com + imagePrefix: registry-cn-zhangjiakou.ack.aliyuncs.com # the cluster type clusterProfile: "Default" # specfiy the nodeSelector for all operator pods @@ -29,7 +29,7 @@ binary: tf: enabled: true image: acs/tf_operator - tag: v1.0-aliyun-9b730a8 + tag: v1.0-aliyun-a0bb318 imagePullPolicy: IfNotPresent resources: limits: diff --git a/docs/requirements.txt b/docs/requirements.txt index 1d472e24e..388184106 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1 @@ -mkdocs-material == 9.5.38 +mkdocs-material == 9.5.40 diff --git a/go.mod b/go.mod index 9dcdf0296..0fa6ce3ca 100644 --- a/go.mod +++ b/go.mod @@ -9,14 +9,14 @@ require ( github.com/kserve/kserve v0.13.1 github.com/mitchellh/go-homedir v1.1.0 github.com/prometheus/client_golang v1.20.4 - github.com/prometheus/common v0.59.1 + github.com/prometheus/common v0.60.0 github.com/sirupsen/logrus v1.9.1 github.com/spf13/cobra v1.8.1 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.10.0 github.com/stretchr/testify v1.9.0 - golang.org/x/crypto v0.27.0 - google.golang.org/protobuf v1.34.2 + golang.org/x/crypto v0.28.0 + google.golang.org/protobuf v1.35.1 gopkg.in/yaml.v2 v2.4.0 istio.io/api v1.19.4 k8s.io/api v0.28.4 @@ -103,12 +103,12 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/oauth2 v0.22.0 // indirect + golang.org/x/net v0.29.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.25.0 // indirect - golang.org/x/term v0.24.0 // indirect - golang.org/x/text v0.18.0 // indirect + golang.org/x/sys v0.26.0 // indirect + golang.org/x/term v0.25.0 // indirect + golang.org/x/text v0.19.0 // indirect golang.org/x/time v0.6.0 // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect diff --git a/go.sum b/go.sum index 62a4ea281..aa044d04d 100644 --- a/go.sum +++ b/go.sum @@ -249,8 +249,8 @@ github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/j github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.59.1 h1:LXb1quJHWm1P6wq/U824uxYi4Sg0oGvNeUm1z5dJoX0= -github.com/prometheus/common v0.59.1/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0= +github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= +github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/prometheus/statsd_exporter v0.25.0 h1:gpVF1TMf1UqMJmBDpzBYrEaGOFMpbMBYYYUDwM38Y/I= @@ -315,8 +315,8 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= -golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ= @@ -345,13 +345,13 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= -golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -373,17 +373,17 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= -golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= -golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= +golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= +golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= -golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= @@ -456,8 +456,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/pkg/argsbuilder/submit_tfjob.go b/pkg/argsbuilder/submit_tfjob.go index f3222e470..bd28ab9ed 100644 --- a/pkg/argsbuilder/submit_tfjob.go +++ b/pkg/argsbuilder/submit_tfjob.go @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License + package argsbuilder import ( @@ -34,6 +35,7 @@ import ( const ( disableTFConfigAnnotation = "arena.kubeflow.org/disable-tf-config" + TFJobSuccessPolicyDefault = "" TFJobSuccessPolicyChiefWorker = "ChiefWorker" TFJobSuccessPolicyAllWorkers = "AllWorkers" ) @@ -268,7 +270,7 @@ func (s *SubmitTFJobArgsBuilder) setRunPolicy() error { func (s *SubmitTFJobArgsBuilder) check() error { switch s.args.SuccessPolicy { - case TFJobSuccessPolicyChiefWorker, TFJobSuccessPolicyAllWorkers: + case TFJobSuccessPolicyDefault, TFJobSuccessPolicyAllWorkers: log.Debugf("Supported successPolicy: %s", s.args.SuccessPolicy) default: return fmt.Errorf("unsupported successPolicy %s", s.args.SuccessPolicy) @@ -417,7 +419,7 @@ func (s *SubmitTFJobArgsBuilder) transform() error { if s.args.SuccessPolicy == TFJobSuccessPolicyChiefWorker { // The value of chief worker policy actually is empty string in training-operator. - s.args.SuccessPolicy = "" + s.args.SuccessPolicy = TFJobSuccessPolicyDefault } return nil