From e3f01050577d3e7a91d056dd185977e4bb79800f Mon Sep 17 00:00:00 2001 From: r2k1 Date: Tue, 21 Jan 2025 14:10:52 +1300 Subject: [PATCH] undo changes --- e2e/aks_model.go | 2 +- e2e/kube.go | 63 ++++-------------------------------------------- 2 files changed, 6 insertions(+), 59 deletions(-) diff --git a/e2e/aks_model.go b/e2e/aks_model.go index 12c43966e5b..4465e5fbd7d 100644 --- a/e2e/aks_model.go +++ b/e2e/aks_model.go @@ -44,7 +44,7 @@ func getBaseClusterModel(clusterName string) *armcontainerservice.ManagedCluster { Name: to.Ptr("nodepool1"), Count: to.Ptr[int32](1), - VMSize: to.Ptr("Standard_D2s_v3"), + VMSize: to.Ptr("Standard_D2ds_v5"), MaxPods: to.Ptr[int32](110), OSType: to.Ptr(armcontainerservice.OSTypeLinux), Type: to.Ptr(armcontainerservice.AgentPoolTypeVirtualMachineScaleSets), diff --git a/e2e/kube.go b/e2e/kube.go index 6007ed956a4..6498223934f 100644 --- a/e2e/kube.go +++ b/e2e/kube.go @@ -14,7 +14,6 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -302,6 +301,11 @@ func (k *Kubeclient) EnsureDebugDaemonsets(ctx context.Context, t *testing.T, is return err } + err = k.CreateDaemonset(ctx, t, nvidiaDevicePluginDaemonSet()) + if err != nil { + return err + } + return nil } @@ -513,32 +517,6 @@ func podRunNvidiaWorkload(s *Scenario) *corev1.Pod { } } -func podAMDGPUWorkload(s *Scenario) *corev1.Pod { - return &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-gpu-validation-pod", s.Runtime.KubeNodeName), - Namespace: defaultNamespace, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "gpu-validation-container", - Image: "mcr.microsoft.com/azuredocs/samples-tf-mnist-demo:gpu", - Args: []string{ - "--max-steps", "1", - }, - Resources: corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - "amd.com/gpu": resource.MustParse("1"), - }, - }, - }, - }, - RestartPolicy: corev1.RestartPolicyNever, - }, - } -} - func nvidiaDevicePluginDaemonSet() *appsv1.DaemonSet { return &appsv1.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ @@ -655,34 +633,3 @@ func podEnableAMDGPUResource(s *Scenario) *corev1.Pod { }, } } - -func jobAMDGPUWorkload(s *Scenario) *batchv1.Job { - return &batchv1.Job{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-gpu-validation-job", s.Runtime.KubeNodeName), - Namespace: defaultNamespace, - }, - Spec: batchv1.JobSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "gpu-validation-container", - Image: "mcr.microsoft.com/azuredocs/samples-tf-mnist-demo:gpu", - Args: []string{ - "--max-steps", "1", - }, - Resources: corev1.ResourceRequirements{ - Limits: corev1.ResourceList{ - "amd.com/gpu": resource.MustParse("1"), - }, - }, - }, - }, - RestartPolicy: corev1.RestartPolicyNever, - }, - }, - BackoffLimit: to.Ptr(int32(0)), // No retries, fail immediately if something goes wrong - }, - } -}