diff --git a/Dockerfile.dapper b/Dockerfile.dapper index 1b3e6995..6b02c4c1 100644 --- a/Dockerfile.dapper +++ b/Dockerfile.dapper @@ -20,7 +20,7 @@ RUN go install k8s.io/code-generator/cmd/openapi-gen@v0.23.7 ENV DAPPER_ENV REPO TAG ENV DAPPER_SOURCE /go/src/github.com/harvester/node-manager/ -ENV DAPPER_OUTPUT ./bin ./manifests +ENV DAPPER_OUTPUT ./bin ./manifests ./pkg ENV DAPPER_DOCKER_SOCKET true ENV HOME ${DAPPER_SOURCE} WORKDIR ${DAPPER_SOURCE} diff --git a/manifests/crds/node.harvesterhci.io_nodeconfigs.yaml b/manifests/crds/node.harvesterhci.io_nodeconfigs.yaml index ec421130..e0777fd0 100644 --- a/manifests/crds/node.harvesterhci.io_nodeconfigs.yaml +++ b/manifests/crds/node.harvesterhci.io_nodeconfigs.yaml @@ -39,6 +39,11 @@ spec: type: object spec: properties: + longhornConfig: + properties: + enableV2DataEngine: + type: boolean + type: object ntpConfigs: properties: ntpServers: diff --git a/manifests/daemonset.yaml b/manifests/daemonset.yaml index 1238468e..ee8d5489 100644 --- a/manifests/daemonset.yaml +++ b/manifests/daemonset.yaml @@ -50,9 +50,12 @@ spec: cpu: 10m memory: 64Mi volumeMounts: - - mountPath: /sys/kernel/mm/ksm - name: ksm + - mountPath: /sys/kernel/mm + name: mm readOnly: false + - mountPath: /lib/modules + name: modules + readOnly: true - mountPath: /host/proc name: proc readOnly: true @@ -64,9 +67,12 @@ spec: - mountPath: /host/oem name: host-oem volumes: - - name: ksm + - name: mm + hostPath: + path: /sys/kernel/mm + - name: modules hostPath: - path: /sys/kernel/mm/ksm + path: /lib/modules - name: proc hostPath: path: /proc @@ -81,4 +87,4 @@ spec: - name: host-oem hostPath: path: /oem - type: "" \ No newline at end of file + type: "" diff --git a/package/Dockerfile b/package/Dockerfile index 6e3ba93d..1ca9af9e 100644 --- a/package/Dockerfile +++ b/package/Dockerfile @@ -2,7 +2,9 @@ FROM registry.suse.com/bci/bci-base:15.5 +# kmod -> for `modprobe` command RUN zypper -n rm container-suseconnect && \ + zypper -n install kmod && \ zypper -n clean -a && rm -rf /tmp/* /var/tmp/* /usr/share/doc/packages/* ARG TARGETPLATFORM diff --git a/pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go b/pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go index ab6bc3bc..67d9f92c 100644 --- a/pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go +++ b/pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go @@ -22,12 +22,18 @@ type NodeConfig struct { } type NodeConfigSpec struct { - NTPConfig *NTPConfig `json:"ntpConfigs,omitempty"` + NTPConfig *NTPConfig `json:"ntpConfigs,omitempty"` + LonghornConfig *LonghornConfig `json:"longhornConfig,omitempty"` } + type NTPConfig struct { NTPServers string `json:"ntpServers"` } +type LonghornConfig struct { + EnableV2DataEngine bool `json:"enableV2DataEngine,omitempty"` +} + type NodeConfigStatus struct { NTPConditions []ConfigStatus `json:"ntpConditions,omitempty"` } diff --git a/pkg/apis/node.harvesterhci.io/v1beta1/zz_generated_deepcopy.go b/pkg/apis/node.harvesterhci.io/v1beta1/zz_generated_deepcopy.go index 6ce53950..5175a6ce 100644 --- a/pkg/apis/node.harvesterhci.io/v1beta1/zz_generated_deepcopy.go +++ b/pkg/apis/node.harvesterhci.io/v1beta1/zz_generated_deepcopy.go @@ -277,6 +277,22 @@ func (in *KsmtunedStatus) DeepCopy() *KsmtunedStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LonghornConfig) DeepCopyInto(out *LonghornConfig) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LonghornConfig. +func (in *LonghornConfig) DeepCopy() *LonghornConfig { + if in == nil { + return nil + } + out := new(LonghornConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NTPConfig) DeepCopyInto(out *NTPConfig) { *out = *in @@ -362,6 +378,11 @@ func (in *NodeConfigSpec) DeepCopyInto(out *NodeConfigSpec) { *out = new(NTPConfig) **out = **in } + if in.LonghornConfig != nil { + in, out := &in.LonghornConfig, &out.LonghornConfig + *out = new(LonghornConfig) + **out = **in + } return } diff --git a/pkg/controller/nodeconfig/config/common.go b/pkg/controller/nodeconfig/config/common.go index ba25a471..30af3e27 100644 --- a/pkg/controller/nodeconfig/config/common.go +++ b/pkg/controller/nodeconfig/config/common.go @@ -1,11 +1,28 @@ package config +import ( + "fmt" + "os" + "slices" + + "github.com/harvester/go-common/files" + "github.com/harvester/node-manager/pkg/utils" + "github.com/mudler/yip/pkg/schema" + "github.com/sirupsen/logrus" +) + const ( // we use `99_settings.yaml` because it needs to be run after `90_custom.yaml` // with elemental works, the later change would override the previous one + yipStageInitramfs = "initramfs" +) + +// The following would ordinarily be const, but we need to override them in unit tests + +var ( + oemPath = "/host/oem/" settingsOEMPath = "/host/oem/99_settings.yaml" settingsOEMPathBackupPath = "/host/oem/99_settings.yaml.bak" - yipStageInitramfs = "initramfs" ) type NTPConfigTemplate struct { @@ -20,3 +37,89 @@ func generateNTPConfigData() string { {{- end }} ` } + +func UpdatePersistentOEMSettings(stage schema.Stage) error { + _, err := os.Stat(settingsOEMPath) + if err != nil && !os.IsNotExist(err) { + return fmt.Errorf("stat %s failed: %v", settingsOEMPath, err) + } + + settings := utils.GenerateOEMTemplate() + doBackup := true + if os.IsNotExist(err) { + // New file, we can just set the stages to whatever was passed in. + settings.Stages = make(map[string][]schema.Stage) + settings.Stages[yipStageInitramfs] = []schema.Stage{stage} + doBackup = false + } else { + // Existing file, we need to load it... + err = utils.LoadYipConfigToTarget(settingsOEMPath, settings) + if err != nil { + return fmt.Errorf("load %s to YIP format failed: %v", settingsOEMPath, err) + } + logrus.Debugf("Loaded settings from file %s, content: %+v", settingsOEMPath, settings) + // ...then merge the new stage into whatever stages are already present, + // either overwriting or appending as necessary. + existingStage := slices.IndexFunc(settings.Stages[yipStageInitramfs], func(s schema.Stage) bool { + return s.Name == stage.Name + }) + if existingStage == -1 { + settings.Stages[yipStageInitramfs] = append(settings.Stages[yipStageInitramfs], stage) + } else { + settings.Stages[yipStageInitramfs][existingStage] = stage + } + } + + return writePersistentOEMSettings(settings, doBackup) +} + +func RemovePersistentOEMSettings(stageName string) error { + yipConfig, err := utils.LoadYipConfig(settingsOEMPath) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return fmt.Errorf("load %s failed: %v", settingsOEMPath, err) + } + logrus.Debugf("Loaded yipConfig: %+v, %p", yipConfig, yipConfig) + + if _, found := yipConfig.Stages[yipStageInitramfs]; !found { + // this moment, we only have `initramfs` stage, so we could remove all OEM settings files. + logrus.Infof("No `initramfs` stage found, remove all OEM settings files.") + return files.RemoveFiles(settingsOEMPath) + } + + pos := slices.IndexFunc(yipConfig.Stages[yipStageInitramfs], func(s schema.Stage) bool { + return s.Name == stageName + }) + + if pos >= 0 { + stages := yipConfig.Stages[yipStageInitramfs] + stages = append(stages[:pos], stages[pos+1:]...) + if len(stages) == 0 { + logrus.Infof("No other stages found, remove all OEM settings files.") + return files.RemoveFiles(settingsOEMPath) + } + yipConfig.Stages[yipStageInitramfs] = stages + } + + // we still have other stages, so we need to backup/update OEM settings files + return writePersistentOEMSettings(yipConfig, true) +} + +func writePersistentOEMSettings(yipConfig *schema.YipConfig, doBackup bool) error { + if doBackup { + if _, err := files.BackupFile(settingsOEMPath); err != nil { + return fmt.Errorf("backup %s failed: %v", settingsOEMPath, err) + } + } + logrus.Infof("Prepare to update new settings to persistent files: %+v", yipConfig) + tmpFileName, err := files.GenerateYAMLTempFileWithDir(yipConfig, "settings", oemPath) + if err != nil { + return fmt.Errorf("generate temp YAML file failed: %v", err) + } + if err = os.Rename(tmpFileName, settingsOEMPath); err != nil { + return fmt.Errorf("rename temp file to %s failed: %v", settingsOEMPath, err) + } + return nil +} diff --git a/pkg/controller/nodeconfig/config/config_test.go b/pkg/controller/nodeconfig/config/config_test.go new file mode 100644 index 00000000..b8bbf5e7 --- /dev/null +++ b/pkg/controller/nodeconfig/config/config_test.go @@ -0,0 +1,185 @@ +package config + +import ( + "os" + "testing" + + "github.com/harvester/node-manager/pkg/apis/node.harvesterhci.io/v1beta1" + "github.com/harvester/node-manager/pkg/utils" + "github.com/mudler/yip/pkg/schema" + + "github.com/stretchr/testify/assert" +) + +func TestNTPConfigPersistence(t *testing.T) { + tmpDir := t.TempDir() + oemPath = tmpDir + "/host/oem/" + settingsOEMPath = tmpDir + "/host/oem/99_settings.yaml" + settingsOEMPathBackupPath = tmpDir + "/host/oem/99_settings.yaml.bak" + if os.MkdirAll(oemPath, 0777) != nil { + t.Errorf("Unable to create %s", oemPath) + } + + ntpConfig := v1beta1.NTPConfig{ + NTPServers: "0.suse.pool.ntp.org 1.suse.pool.ntp.org", + } + + // Create config for the first time + ntpConfigHandler := NewNTPConfigHandler(nil, nil, "harvester-node-0", &ntpConfig, "") + err := ntpConfigHandler.UpdateNTPConfigPersistence() + assert.Nil(t, err) + + // Settings file should exist + _, err = os.Stat(settingsOEMPath) + assert.Nil(t, err) + + // Backup file should not exist + _, err = os.Stat(settingsOEMPathBackupPath) + assert.True(t, os.IsNotExist(err)) + + // Should be able to load config + yipConfig, err := utils.LoadYipConfig(settingsOEMPath) + assert.Nil(t, err) + + // Config should be valid + assert.Equal(t, "oem_settings", yipConfig.Name) + // ...one top level stage ("initramfs"): + assert.Equal(t, 1, len(yipConfig.Stages)) + assert.Contains(t, yipConfig.Stages, yipStageInitramfs) + // ...which in turn has one stage inside ("ntp"): + assert.Equal(t, 1, len(yipConfig.Stages[yipStageInitramfs])) + assert.Equal(t, "ntp", yipConfig.Stages[yipStageInitramfs][0].Name) + // ...and the NTP servers are set as we expect: + assert.Equal(t, map[string]string{"NTP": ntpConfig.NTPServers}, yipConfig.Stages[yipStageInitramfs][0].TimeSyncd) + + // Update config with new servers + newNtpConfig := v1beta1.NTPConfig{ + NTPServers: "something different set of servers", + } + ntpConfigHandler.NTPConfig = reGenerateNTPConfig(&newNtpConfig) + err = ntpConfigHandler.UpdateNTPConfigPersistence() + assert.Nil(t, err) + + // Backup file should exist + _, err = os.Stat(settingsOEMPathBackupPath) + assert.False(t, os.IsNotExist(err)) + + // Backup config should be the same as the previous config + backupConfig, err := utils.LoadYipConfig(settingsOEMPathBackupPath) + assert.Nil(t, err) + assert.Equal(t, yipConfig, backupConfig) + + // New config should have new NTP servers + newConfig, err := utils.LoadYipConfig(settingsOEMPath) + assert.Nil(t, err) + assert.Equal(t, map[string]string{"NTP": newNtpConfig.NTPServers}, newConfig.Stages[yipStageInitramfs][0].TimeSyncd) + + // Remove the NTP settings + err = RemovePersistentNTPConfig() + assert.Nil(t, err) + + // Settings file should be gone + _, err = os.Stat(settingsOEMPath) + assert.True(t, os.IsNotExist(err)) + + // Backup file should remain + _, err = os.Stat(settingsOEMPathBackupPath) + assert.False(t, os.IsNotExist(err)) +} + +func TestExtraConfigPersistence(t *testing.T) { + tmpDir := t.TempDir() + oemPath = tmpDir + "/host/oem/" + settingsOEMPath = tmpDir + "/host/oem/99_settings.yaml" + settingsOEMPathBackupPath = tmpDir + "/host/oem/99_settings.yaml.bak" + if os.MkdirAll(oemPath, 0777) != nil { + t.Errorf("Unable to create %s", oemPath) + } + + ntpConfig := v1beta1.NTPConfig{ + NTPServers: "0.suse.pool.ntp.org 1.suse.pool.ntp.org", + } + + // Create config for the first time, with NTP as in TestNTPConfigPersistence() + ntpConfigHandler := NewNTPConfigHandler(nil, nil, "harvester-node-0", &ntpConfig, "") + err := ntpConfigHandler.UpdateNTPConfigPersistence() + assert.Nil(t, err) + + // Settings file should exist + _, err = os.Stat(settingsOEMPath) + assert.Nil(t, err) + + // Backup file should not exist + _, err = os.Stat(settingsOEMPathBackupPath) + assert.True(t, os.IsNotExist(err)) + + // Add an extra stage + extraStage := schema.Stage{ + Name: "extra", + } + err = UpdatePersistentOEMSettings(extraStage) + assert.Nil(t, err) + + // Backup file should exist + _, err = os.Stat(settingsOEMPathBackupPath) + assert.False(t, os.IsNotExist(err)) + + // Should be able to load config + yipConfig, err := utils.LoadYipConfig(settingsOEMPath) + assert.Nil(t, err) + + // Config should be valid + assert.Equal(t, "oem_settings", yipConfig.Name) + // ...one top level stage ("initramfs"): + assert.Equal(t, 1, len(yipConfig.Stages)) + assert.Contains(t, yipConfig.Stages, yipStageInitramfs) + // ...which in turn has _two_ stages inside ("ntp" and "extra"): + assert.Equal(t, 2, len(yipConfig.Stages[yipStageInitramfs])) + assert.Equal(t, "ntp", yipConfig.Stages[yipStageInitramfs][0].Name) + assert.Equal(t, map[string]string{"NTP": ntpConfig.NTPServers}, yipConfig.Stages[yipStageInitramfs][0].TimeSyncd) + assert.Equal(t, "extra", yipConfig.Stages[yipStageInitramfs][1].Name) + + // Should be able to update the extra stage + newExtraStage := schema.Stage{ + Name: "extra", + Commands: []string{"/bin/true"}, + } + err = UpdatePersistentOEMSettings(newExtraStage) + assert.Nil(t, err) + + // Should be able to load config and see the updated commands in the extra stage, + // while the ntp stage should still be there unchanged + yipConfig, err = utils.LoadYipConfig(settingsOEMPath) + assert.Nil(t, err) + assert.Equal(t, 1, len(yipConfig.Stages)) + assert.Contains(t, yipConfig.Stages, yipStageInitramfs) + assert.Equal(t, 2, len(yipConfig.Stages[yipStageInitramfs])) + assert.Equal(t, "ntp", yipConfig.Stages[yipStageInitramfs][0].Name) + assert.Equal(t, map[string]string{"NTP": ntpConfig.NTPServers}, yipConfig.Stages[yipStageInitramfs][0].TimeSyncd) + assert.Equal(t, "extra", yipConfig.Stages[yipStageInitramfs][1].Name) + assert.Equal(t, newExtraStage.Commands, yipConfig.Stages[yipStageInitramfs][1].Commands) + + // Remove the NTP settings + err = RemovePersistentNTPConfig() + assert.Nil(t, err) + + // Read the config again and we should only have the extra stage now + yipConfig, err = utils.LoadYipConfig(settingsOEMPath) + assert.Nil(t, err) + assert.Equal(t, 1, len(yipConfig.Stages)) + assert.Contains(t, yipConfig.Stages, yipStageInitramfs) + assert.Equal(t, 1, len(yipConfig.Stages[yipStageInitramfs])) + assert.Equal(t, "extra", yipConfig.Stages[yipStageInitramfs][0].Name) + + // Remove the extra stage + err = RemovePersistentOEMSettings("extra") + assert.Nil(t, err) + + // Settings file should be gone + _, err = os.Stat(settingsOEMPath) + assert.True(t, os.IsNotExist(err)) + + // Backup file should remain + _, err = os.Stat(settingsOEMPathBackupPath) + assert.False(t, os.IsNotExist(err)) +} diff --git a/pkg/controller/nodeconfig/config/longhorn.go b/pkg/controller/nodeconfig/config/longhorn.go new file mode 100644 index 00000000..79124a0f --- /dev/null +++ b/pkg/controller/nodeconfig/config/longhorn.go @@ -0,0 +1,176 @@ +package config + +import ( + "fmt" + "os" + "os/exec" + "strconv" + "strings" + + "github.com/harvester/go-common/sys" + "github.com/mudler/yip/pkg/schema" + "github.com/sirupsen/logrus" +) + +const ( + spdkStageName = "Runtime SPDK Prerequisites" + hugepagesPath = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" + hugepagesToAllocate = 1024 +) + +var ( + modulesToLoad = []string{"vfio_pci", "uio_pci_generic", "nvme_tcp"} +) + +func modprobe(modules []string, load bool) error { + args := []string{"-a"} + if !load { + args = append(args, "-r") + } + args = append(args, modules...) + out, err := exec.Command("/usr/sbin/modprobe", args...).CombinedOutput() + if err != nil { + // This ensures we capture some helpful information if modules can't + // be loaded. For example, if /lib/modules isn't actually mounted in + // the container, we'll see something like this: + // modprobe failed: exit status 1 (output: 'modprobe: WARNING: Module + // vfio_pci not found in directory /lib/modules/5.14.21-150500.55.68-default[...]') + return fmt.Errorf("modprobe failed: %v (output: '%s')", err, out) + } + return nil +} + +func setNrHugepages(n uint64) error { + if err := os.WriteFile(hugepagesPath, []byte(strconv.FormatUint(n, 10)), 0644); err != nil { + return fmt.Errorf("unable to write %d to %s: %v", n, hugepagesPath, err) + } + return nil +} + +func getNrHugepages() (uint64, error) { + data, err := os.ReadFile(hugepagesPath) + if err != nil { + return 0, fmt.Errorf("unable to read %s: %v", hugepagesPath, err) + } + n, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) + if err != nil { + return 0, err + } + return n, nil +} + +func restartKubelet() error { + // This is safe because TryRestartService will only restart + // services that are already running, i.e. this will restart + // whichever of rke2-server or rke2-agent happens to be active + // on this host + for _, service := range []string{"rke2-server.service", "rke2-agent.service"} { + if err := sys.TryRestartService(service); err != nil { + return err + } + } + return nil +} + +func EnableV2DataEngine() error { + origHugepages, err := getNrHugepages() + if err != nil { + return err + } + + // Write the persistent config first, so we know it's saved... + if err := UpdatePersistentOEMSettings(schema.Stage{ + Name: spdkStageName, + Sysctl: map[string]string{ + "vm.nr_hugepages": fmt.Sprintf("%d", hugepagesToAllocate), + }, + Commands: []string{ + "modprobe vfio_pci", + "modprobe uio_pci_generic", + "modprobe nvme_tcp", + }, + }); err != nil { + return err + } + + // ...then try to do the runtime activation (which may not succeed) + if err := modprobe(modulesToLoad, true); err != nil { + return fmt.Errorf("unable to load kernel modules %v: %v", modulesToLoad, err) + } + + if origHugepages >= hugepagesToAllocate { + // We've already got enough hugepages, and don't want to unnecessarily + // restart the kubelet, so no further action required + return nil + } + + if err := setNrHugepages(hugepagesToAllocate); err != nil { + return err + } + + nrHugepages, err := getNrHugepages() + if err != nil { + return err + } + if nrHugepages == hugepagesToAllocate { + // We've successfully allocated the hugepages, but still need to restart + // the kubelet in order for Longhorn to see the allocation. + // TODO: handle possible corner case where setNrHugepages() succeeds but + // getNrHugepages() fails, in which case the kubelet is never restarted. + // One option we investigated was: + // - Add a NodeConfigStatus.KubeletNeedsRestart flag. + // - Set that flag to true if the kubelet needs restarting. + // - Make OnNodeConfigChange() restart the kubelet and clear the flag. + // Unfortunately this results in a restart loop in the single master case + // (you can't clear the flag if the kubelet is currently restarting...) + // Another possible corner case is where kubelet restart just fails for + // some reason, but in this case the best (or least worst) choice + // so far is to let the admin figure out what is causing the kubelet + // restart to fail, fix that thing, and restart it manually. + logrus.Infof("Restarting kubelet to set nr_hugepages=%d", hugepagesToAllocate) + return restartKubelet() + } + + // We didn't get enough hugepages (not enough available unfragmented memory) + // but the system is now configured correctly so that if it's rebooted we should + // get the required allocation. + // TODO: record this somewhere (an event?) so that it can be picked up in the GUI + // Note that if there aren't enough hugepages, when harvester tries to enable the + // v2 data engine setting in Longhorn, the validator.longhorn.io admission webhook + // will pick up the failure and an error will be displayed on the harvester settings + // page, so we might not need to separately record this. + logrus.Errorf("Unable to allocate %d hugepages (only got %d)", hugepagesToAllocate, nrHugepages) + + return nil +} + +func DisableV2DataEngine() error { + origHugepages, err := getNrHugepages() + if err != nil { + return err + } + + // Write the persistent config first, so we know it's saved... + if err := RemovePersistentOEMSettings(spdkStageName); err != nil { + return err + } + + // ...then try to do the runtime deactivation + if err := modprobe(modulesToLoad, false); err != nil { + return fmt.Errorf("unable to unload kernel modules %v: %v", modulesToLoad, err) + } + + if origHugepages == 0 { + // We already don't have any hugepages, and don't want to unnecessarily + // restart the kubelet, so no further action required + return nil + } + + if err := setNrHugepages(0); err != nil { + return err + } + + logrus.Info("Restarting kubelet to set nr_hugepages=0") + // TODO: see comment in EnableV2DataEngine() about possible kubectl restart failure corner case + return restartKubelet() +} diff --git a/pkg/controller/nodeconfig/config/ntp.go b/pkg/controller/nodeconfig/config/ntp.go index ac81646e..230975d1 100644 --- a/pkg/controller/nodeconfig/config/ntp.go +++ b/pkg/controller/nodeconfig/config/ntp.go @@ -33,7 +33,6 @@ const ( timesyncdConfigOriginPath = "/host/etc/systemd/timesyncd.conf.origin" timesyncdService = "systemd-timesyncd" timeWaitSyncService = "systemd-time-wait-sync" - oemPath = "/host/oem/" configNTPServer = "ntpServer" ) @@ -220,49 +219,8 @@ func (handler *NTPHandler) RestartService() error { func (handler *NTPHandler) UpdateNTPConfigPersistence() error { logrus.Infof("Prepare to make NTP configuration persistence ...") ntpServer := handler.NTPConfig.NTPServers - _, err := os.Stat(settingsOEMPath) - if err != nil && !os.IsNotExist(err) { - return fmt.Errorf("stat NTP OEM file failed. err: %v", err) - } - ntpStages := generateNTPStages(ntpServer) - settings := utils.GenerateOEMTemplate() - settings.Stages = make(map[string][]schema.Stage) - if os.IsNotExist(err) { - if _, found := settings.Stages[yipStageInitramfs]; !found { - settings.Stages[yipStageInitramfs] = []schema.Stage{ntpStages} - } else { - settings.Stages[yipStageInitramfs] = append(settings.Stages[yipStageInitramfs], ntpStages) - } - } else { - // backup current config - if _, err := files.BackupFile(settingsOEMPath); err != nil { - return fmt.Errorf("backup NTP OEM file failed. err: %v", err) - } - // load and overwrite - err = utils.LoadYipConfigToTarget(settingsOEMPath, settings) - if err != nil { - return fmt.Errorf("load OEM file failed to YIP format. err: %v", err) - } - logrus.Debugf("Loaded settings from file %s, content: %+v", settingsOEMPath, settings) - currentInitramfs := settings.Stages[yipStageInitramfs] - for id, stage := range currentInitramfs { - if stage.Name == NTPName { - currentInitramfs[id] = ntpStages - break - } - } - } - logrus.Infof("Prepare to update settings to persistent file: %+v", settings) - tmpFileName, err := files.GenerateYAMLTempFileWithDir(settings, "settings", oemPath) - if err != nil { - return fmt.Errorf("generate temp YAML file failed. err: %v", err) - } - if err = os.Rename(tmpFileName, settingsOEMPath); err != nil { - return fmt.Errorf("rename temp file to OEM path failed. err: %v", err) - } - - return nil + return UpdatePersistentOEMSettings(ntpStages) } func generateNTPStages(ntpserver string) schema.Stage { @@ -278,53 +236,7 @@ func generateNTPStages(ntpserver string) schema.Stage { } func RemovePersistentNTPConfig() error { - yipConfig, err := utils.LoadYipConfig(settingsOEMPath) - if err != nil { - if os.IsNotExist(err) { - return nil - } - return fmt.Errorf("load OEM settings failed. err: %v", err) - } - logrus.Debugf("Loaded yipConfig: %+v, %p", yipConfig, yipConfig) - - pos := -1 - if _, found := yipConfig.Stages[yipStageInitramfs]; !found { - // this moment, we only have `initramfs` stage, so we could remove all OEM settings files. - logrus.Infof("No `initramfs` stage found, remove all OEM settings files.") - return files.RemoveFiles(settingsOEMPath, settingsOEMPathBackupPath) - } - - for id, stage := range yipConfig.Stages[yipStageInitramfs] { - if stage.Name == NTPName { - pos = id - break - } - } - - if pos >= 0 { - stages := yipConfig.Stages[yipStageInitramfs] - stages = append(stages[:pos], stages[pos+1:]...) - if len(stages) == 0 { - logrus.Infof("No other stages found, remove all OEM settings files.") - return files.RemoveFiles(settingsOEMPath, settingsOEMPathBackupPath) - } - yipConfig.Stages[yipStageInitramfs] = stages - } - - // we still have other stages, so we need to backup/update OEM settings files - if _, err := files.BackupFile(settingsOEMPath); err != nil { - return fmt.Errorf("backup NTP OEM file failed. err: %v", err) - } - - logrus.Infof("Prepare to update new settings to persistent files: %+v", yipConfig) - tmpFileName, err := files.GenerateYAMLTempFileWithDir(yipConfig, "settings", oemPath) - if err != nil { - return fmt.Errorf("generate temp YAML file failed. err: %v", err) - } - if err = os.Rename(tmpFileName, settingsOEMPath); err != nil { - return fmt.Errorf("rename temp file to OEM path failed. err: %v", err) - } - return nil + return RemovePersistentOEMSettings(NTPName) } func CheckConfigApplied(configName string, status nodeconfigv1.NodeConfigStatus) bool { diff --git a/pkg/controller/nodeconfig/controller.go b/pkg/controller/nodeconfig/controller.go index ff0b9cb7..fdabd2f8 100644 --- a/pkg/controller/nodeconfig/controller.go +++ b/pkg/controller/nodeconfig/controller.go @@ -57,6 +57,32 @@ func (c *Controller) OnNodeConfigChange(key string, nodecfg *nodeconfigv1.NodeCo return nil, nil } + // V2 Data Engine related handling. This is intentionally not bothering + // to check whether the engine is already enabled or not, it runs on any + // change to the node config, even if that change wasn't related to the + // longhorn settings. This is mostly harmless, because if the engine is + // already in the state about to be applied, re-applying that state is + // effectively a no-op, and I'd rather keep the code simple than add + // annotations for whether or not we already enabled the engine. + // The one wrinkle is that when allocating (or deallocating) hugepages, + // the kubelet needs to be restarted to pick up the change and reflect + // that in node.status.capacity.hugepages-2Mi, so that Longhorn can + // query that value when lhs/v2-data-engine is set to true. This restart + // logic is handled inside EnableV2DataEngine() and DisableV2DataEngine(). + if nodecfg.Spec.LonghornConfig != nil && nodecfg.Spec.LonghornConfig.EnableV2DataEngine { + if err := config.EnableV2DataEngine(); err != nil { + logrus.WithFields(logrus.Fields{ + "err": err.Error(), + }).Error("Failed to enable V2 Data Engine") + } + } else { + if err := config.DisableV2DataEngine(); err != nil { + logrus.WithFields(logrus.Fields{ + "err": err.Error(), + }).Error("Failed to disable V2 Data Engine") + } + } + // NTP related handling appliedConfig := nodecfg.ObjectMeta.Annotations[ConfigAppliedAnnotation] ntpConfigHandler := config.NewNTPConfigHandler(c.mtx, c.NodeClient, confName, nodecfg.Spec.NTPConfig, appliedConfig) @@ -129,6 +155,12 @@ func (c *Controller) OnNodeConfigRemove(key string, nodecfg *nodeconfigv1.NodeCo c.NodeConfigs.EnqueueAfter(nodecfg.Namespace, nodecfg.Name, enqueueJitter()) return nil, err } + if err := config.DisableV2DataEngine(); err != nil { + logrus.WithFields(logrus.Fields{ + "err": err.Error(), + }).Error("Failed to disable V2 Data Engine") + c.NodeConfigs.EnqueueAfter(nodecfg.Namespace, nodecfg.Name, enqueueJitter()) + } return nil, nil } diff --git a/scripts/generate b/scripts/generate new file mode 100755 index 00000000..863f090c --- /dev/null +++ b/scripts/generate @@ -0,0 +1,6 @@ +#!/bin/bash +set -e + +cd $(dirname $0)/.. + +go generate