Skip to content

Commit

Permalink
Schedulable archs flag (#68)
Browse files Browse the repository at this point in the history
* Add `cluster-schedulable-archs` flag

 # Context

In some cases we have seen that the common architectures of the images
do not match the default preferred architecture. In those cases, one
of the common architectures is selected, which may not match the
available architectures in the cluster. In those cases, the Pod is
always pending scheduling waiting for a node that will never exist.

 # Approach

Allow passing a comma-separated list of architectures schedulable in
the cluster. As we detect image's architecture support, we ignore
those architectures that have not been specified as supported by the
cluster.

This new flag is optional and defaults to empty string (which disables
the feature and keeps the prior behaviour).

 ## Startup fail if preferred architecture is not part of the supported architectures

Startup will fail if the preferred architecture selected via
commandline is not part of the list of schedulable architectures to
prevent misconfigurations.

 ## Ignore preferred arch at Pod level if unsupported

If a preferred architecture is indicated at Pod level, but it does not
belong to the list of schedulable architectures, we ignore the preference
and treat it as not specified. A log line is generated in that case.

 # Non-goal

 ## Detect architectures present in the cluster

We do not want to prevent scheduling of Pods with architectures
supported but which have no nodes currently in the cluster
(downscaled) to avoid interfering with scale from 0 set ups.

Change-Id: I0b85ab61fb3a267b41d938bd90f2e2d2050e5447
Co-authored-by: Fabián Sellés Rosa <[email protected]>

* fixup! Add `cluster-schedulable-archs` flag

* fixup! Add `cluster-schedulable-archs` flag

---------

Co-authored-by: Fabián Sellés Rosa <[email protected]>
  • Loading branch information
miguelbernadi and Fsero authored Jan 10, 2024
1 parent 573948f commit f9a9737
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 13 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ spec:
targetRevision: HEAD
```
## Hinting preferred target architecture
## Hinting preferred and supported target architectures
By default, Noe will automatically select the appropriate architecture when only one is supported by all the containers in the Pod.
If more than one is available, Noe will select the system-defined preferred one if available. This preference can be chosen in the command line for Noe (defaults to `amd64` if unspecified):
Expand All @@ -49,3 +49,13 @@ labels:
```
Noe will always prioritize a running Pod, so if the preference is not supported by all the containers in the Pod, the common architecture will be selected.
You can restrict the acceptable common architectures in the command line for Noe:
```
./noe -cluster-schedulable-archs amd64,arm64
```
If you specify both a preferred architecture and a list of supported architectures in the command line, the default architecture must be part of the list. Otherwise Noe will fail to start.
If a preferred architecture is specified at the Pod level and is not compatible with the supported architectures listed in the command line, it will be ignored.
1 change: 1 addition & 0 deletions charts/noe/templates/webhook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ spec:
workingDir: /workdir
args:
- registry-proxies={{ .Values.proxies | join "," }}
- cluster-schedulable-archs={{ .Values.schedulableArchitectures | join "," }}
ports:
- containerPort: 8443
name: webhook
Expand Down
1 change: 1 addition & 0 deletions charts/noe/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ image:
registry: ghcr.io
repository: adevinta/noe
tag: latest
schedulableArchitectures: []
proxies: []
# - docker.io=docker-proxy.company.corp
# - quay.io=quay-proxy.company.corp
Expand Down
22 changes: 19 additions & 3 deletions cmd/noe/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package main
import (
"context"
"flag"
"fmt"
"os"
"slices"
"strings"
"time"

"github.com/adevinta/noe/pkg/httputils"
Expand All @@ -22,21 +25,33 @@ import (
)

func main() {
var preferredArch, systemOS string
var preferredArch, schedulableArchs, systemOS string
var metricsAddr string
var registryProxies, matchNodeLabels string

flag.StringVar(&preferredArch, "preferred-arch", "amd64", "Preferred architecture when placing pods")
flag.StringVar(&schedulableArchs, "cluster-schedulable-archs", "", "Comma separated list of architectures schedulable in the cluster")
flag.StringVar(&systemOS, "system-os", "linux", "Sole OS supported by the system")
flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&metricsAddr, "registry-proxies", "", "Proxies to substitute in the registry URL in the form of docker.io=docker-proxy.company.corp,quay.io=quay-proxy.company.corp")
flag.StringVar(&matchNodeLabels, "match-node-labels", "", "A set of pod label keys to match against node labels in the form of key1,key2")
flag.Parse()

Main(signals.SetupSignalHandler(), "./", preferredArch, systemOS, metricsAddr, registryProxies, matchNodeLabels)
Main(signals.SetupSignalHandler(), "./", preferredArch, schedulableArchs, systemOS, metricsAddr, registryProxies, matchNodeLabels)
}

func Main(ctx context.Context, certDir, preferredArch, systemOS, metricsAddr, registryProxies, matchNodeLabels string) {
func Main(ctx context.Context, certDir, preferredArch, schedulableArchs, systemOS, metricsAddr, registryProxies, matchNodeLabels string) {
var schedulableArchSlice []string

if schedulableArchs != "" {
schedulableArchSlice = strings.Split(schedulableArchs, ",")
}

if preferredArch != "" && schedulableArchs != "" && !slices.Contains(schedulableArchSlice, preferredArch) {
err := fmt.Errorf("preferred architecture is not schedulable in the cluster")
log.DefaultLogger.WithError(err).Error("refusing to continue")
os.Exit(1)
}

// Setup a Manager
log.DefaultLogger.WithContext(ctx).Println("setting up manager")
Expand Down Expand Up @@ -90,6 +105,7 @@ func Main(ctx context.Context, certDir, preferredArch, systemOS, metricsAddr, re
containerRegistry,
arch.WithMetricsRegistry(metrics.Registry),
arch.WithArchitecture(preferredArch),
arch.WithSchedulableArchitectures(schedulableArchSlice),
arch.WithOS(systemOS),
arch.WithDecoder(decoder),
arch.WithMatchNodeLabels(arch.ParseMatchNodeLabels(matchNodeLabels)),
Expand Down
2 changes: 1 addition & 1 deletion cmd/noe/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func TestNoeMain(t *testing.T) {
require.NoError(t, err)
os.Setenv("KUBECONFIG", env.KubeconfigFile())

go Main(ctx, "./integration-tests/", "amd64", "linux", ":8181", "", "")
go Main(ctx, "./integration-tests/", "amd64", "", "linux", ":8181", "", "")

client := http.Client{
Transport: &http.Transport{
Expand Down
39 changes: 31 additions & 8 deletions pkg/arch/hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"net/http"
"slices"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -110,13 +111,14 @@ func (w warning) Error() string {

type HandlerOption func(*Handler)
type Handler struct {
Client client.Client
Registry Registry
matchNodeLabels []string
metrics HandlerMetrics
decoder *admission.Decoder
preferredArchitecture string
systemOS string
Client client.Client
Registry Registry
matchNodeLabels []string
metrics HandlerMetrics
decoder *admission.Decoder
preferredArchitecture string
schedulableArchitectures []string
systemOS string
}

func NewHandler(client client.Client, registry Registry, opts ...HandlerOption) *Handler {
Expand All @@ -141,6 +143,12 @@ func WithArchitecture(arch string) HandlerOption {
}
}

func WithSchedulableArchitectures(archs []string) HandlerOption {
return func(h *Handler) {
h.schedulableArchitectures = archs
}
}

func WithOS(os string) HandlerOption {
return func(h *Handler) {
h.systemOS = os
Expand Down Expand Up @@ -295,12 +303,16 @@ func (h *Handler) updatePodSpec(ctx context.Context, namespace string, podLabels

var preferredArchIsDefault bool
preferredArch, preferredArchDefined := podLabels["arch.noe.adevinta.com/preferred"]
if preferredArch != "" && !h.isArchSupported(preferredArch) {
log.DefaultLogger.WithContext(ctx).WithField("preferredArch", preferredArch).Println("ignoring unsupported user preferred architecture")
preferredArch = ""
}
if preferredArch == "" && h.preferredArchitecture != "" {
preferredArch = h.preferredArchitecture
preferredArchDefined = true
preferredArchIsDefault = true
ctx = log.AddLogFieldsToContext(ctx, logrus.Fields{"preferredArch": preferredArch})
log.DefaultLogger.WithContext(ctx).Println("selecting preferred architecture")
log.DefaultLogger.WithContext(ctx).Println("selecting default preferred architecture")
}

commonArchitectures := map[string]struct{}{}
Expand All @@ -325,6 +337,10 @@ func (h *Handler) updatePodSpec(ctx context.Context, namespace string, podLabels
log.DefaultLogger.WithContext(ctx).WithField("os", platform.OS).Info("Skipped OS does not match system's")
continue
}
if !h.isArchSupported(platform.Architecture) {
log.DefaultLogger.WithContext(ctx).WithField("arch", platform.OS).Info("Skipped arch does not match system's")
continue
}
imageArchitectures[platform.Architecture] = struct{}{}
}

Expand Down Expand Up @@ -498,6 +514,13 @@ func (h *Handler) Handle(ctx context.Context, req admission.Request) admission.R
return resp
}

func (h *Handler) isArchSupported(arch string) bool {
if len(h.schedulableArchitectures) == 0 {
return true
}
return slices.Contains(h.schedulableArchitectures, arch)
}

// Handler implements admission.DecoderInjector.
// A decoder will be automatically injected.

Expand Down
87 changes: 87 additions & 0 deletions pkg/arch/hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,48 @@ func TestHookSucceedsWhenPreferredArchUnavailable(t *testing.T) {
)
}

func TestHookSucceedsWhenPreferredArchUnschedulable(t *testing.T) {
resp := runWebhookTest(
t,
NewHandler(
fake.NewClientBuilder().Build(),
RegistryFunc(func(ctx context.Context, imagePullSecret, image string) ([]registry.Platform, error) {
assert.Equal(t, "ubuntu", image)
return []registry.Platform{
{OS: "linux", Architecture: "amd64"},
{OS: "linux", Architecture: "arm64"},
{OS: "windows", Architecture: "amd64"},
}, nil
}),
WithOS("linux"),
WithSchedulableArchitectures([]string{"amd64"}),
),
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "test",
Name: "object",
Labels: map[string]string{"arch.noe.adevinta.com/preferred": "arm64"},
},

Spec: v1.PodSpec{
Containers: []v1.Container{
{
Image: "ubuntu",
},
},
},
},
)
assert.True(t, resp.Allowed)
assert.Equal(t, http.StatusOK, int(resp.Result.Code))
require.Len(t, resp.Patches, 1)
assert.Contains(
t,
resp.Patches,
archNodeSelectorPatchForArchs("amd64"),
)
}

func TestHookHonorsDefaultPreferredArch(t *testing.T) {
resp := runWebhookTest(
t,
Expand Down Expand Up @@ -567,6 +609,51 @@ func TestHookRejectsMultipleImagesWithNoCommonArch(t *testing.T) {
assert.Len(t, resp.Patch, 0)
}

func TestHookRejectsMultipleImagesWhenCommonArchIsUnschedulable(t *testing.T) {
resp := runWebhookTest(
t,
NewHandler(
fake.NewClientBuilder().Build(),
RegistryFunc(func(ctx context.Context, imagePullSecret, image string) ([]registry.Platform, error) {
switch image {
case "ubuntu":
return []registry.Platform{
{OS: "linux", Architecture: "arm64"},
{OS: "windows", Architecture: "arm64"},
}, nil
default:
return []registry.Platform{
{OS: "linux", Architecture: "arm64"},
{OS: "linux", Architecture: "amd64"},
}, nil
}
}),
WithOS("linux"),
WithSchedulableArchitectures([]string{"amd64"}),
),
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "test",
Name: "object",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Image: "ubuntu",
},
{
Image: "alpine",
},
},
},
},
)
assert.False(t, resp.Allowed)
assert.Equal(t, http.StatusForbidden, int(resp.Result.Code))
assert.Len(t, resp.Patches, 0)
assert.Len(t, resp.Patch, 0)
}

func TestUpdatePodSpecWithEmptyImage(t *testing.T) {
t.Run("When one container has missing images", func(t *testing.T) {
resp := runWebhookTest(
Expand Down

0 comments on commit f9a9737

Please sign in to comment.