Skip to content

Commit

Permalink
feat: Add ability to enable Longhorn V2 Data Engine
Browse files Browse the repository at this point in the history
Related issue: harvester/harvester#5274

Signed-off-by: Tim Serong <[email protected]>
  • Loading branch information
tserong committed Aug 27, 2024
1 parent 1e4a92c commit 49cf969
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 6 deletions.
5 changes: 5 additions & 0 deletions manifests/crds/node.harvesterhci.io_nodeconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ spec:
type: object
spec:
properties:
longhornConfig:
properties:
enableV2DataEngine:
type: boolean
type: object
ntpConfigs:
properties:
ntpServers:
Expand Down
16 changes: 11 additions & 5 deletions manifests/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,12 @@ spec:
cpu: 10m
memory: 64Mi
volumeMounts:
- mountPath: /sys/kernel/mm/ksm
name: ksm
- mountPath: /sys/kernel/mm
name: mm
readOnly: false
- mountPath: /lib/modules
name: modules
readOnly: true
- mountPath: /host/proc
name: proc
readOnly: true
Expand All @@ -64,9 +67,12 @@ spec:
- mountPath: /host/oem
name: host-oem
volumes:
- name: ksm
- name: mm
hostPath:
path: /sys/kernel/mm
- name: modules
hostPath:
path: /sys/kernel/mm/ksm
path: /lib/modules
- name: proc
hostPath:
path: /proc
Expand All @@ -81,4 +87,4 @@ spec:
- name: host-oem
hostPath:
path: /oem
type: ""
type: ""
2 changes: 2 additions & 0 deletions package/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

FROM registry.suse.com/bci/bci-base:15.5

# kmod -> for `modprobe` command
RUN zypper -n rm container-suseconnect && \
zypper -n install kmod && \
zypper -n clean -a && rm -rf /tmp/* /var/tmp/* /usr/share/doc/packages/*

ARG TARGETPLATFORM
Expand Down
8 changes: 7 additions & 1 deletion pkg/apis/node.harvesterhci.io/v1beta1/nodeconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,18 @@ type NodeConfig struct {
}

type NodeConfigSpec struct {
NTPConfig *NTPConfig `json:"ntpConfigs,omitempty"`
NTPConfig *NTPConfig `json:"ntpConfigs,omitempty"`
LonghornConfig *LonghornConfig `json:"longhornConfig,omitempty"`
}

type NTPConfig struct {
NTPServers string `json:"ntpServers"`
}

type LonghornConfig struct {
EnableV2DataEngine bool `json:"enableV2DataEngine,omitempty"`
}

type NodeConfigStatus struct {
NTPConditions []ConfigStatus `json:"ntpConditions,omitempty"`
}
Expand Down
21 changes: 21 additions & 0 deletions pkg/apis/node.harvesterhci.io/v1beta1/zz_generated_deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

163 changes: 163 additions & 0 deletions pkg/controller/nodeconfig/config/longhorn.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package config

import (
"fmt"
"os"
"os/exec"
"strconv"
"strings"

"github.com/harvester/go-common/sys"
"github.com/mudler/yip/pkg/schema"
"github.com/sirupsen/logrus"
)

const (
spdkStageName = "Runtime SPDK Prerequisites"
hugepagesPath = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
hugepagesToAllocate = 1024
)

var (
modulesToLoad = []string{"vfio_pci", "uio_pci_generic", "nvme_tcp"}
)

func modprobe(modules []string, load bool) error {
args := []string{"-a"}
if !load {
args = append(args, "-r")
}
args = append(args, modules...)
out, err := exec.Command("/usr/sbin/modprobe", args...).CombinedOutput()
if err != nil {
// This ensures we capture some helpful information if modules can't
// be loaded. For example, if /lib/modules isn't actually mounted in
// the container, we'll see something like this:
// modprobe failed: exit status 1 (output: 'modprobe: WARNING: Module
// vfio_pci not found in directory /lib/modules/5.14.21-150500.55.68-default[...]')
return fmt.Errorf("modprobe failed: %v (output: '%s')", err, out)
}
return nil
}

func setNrHugepages(n uint64) error {
if err := os.WriteFile(hugepagesPath, []byte(strconv.FormatUint(n, 10)), 0644); err != nil {
return fmt.Errorf("unable to write %d to %s: %v", n, hugepagesPath, err)
}
return nil
}

func getNrHugepages() (uint64, error) {
data, err := os.ReadFile(hugepagesPath)
if err != nil {
return 0, fmt.Errorf("unable to read %s: %v", hugepagesPath, err)
}
n, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
if err != nil {
return 0, err
}
return n, nil
}

func restartKubelet() error {
// This is safe because TryRestartService will only restart
// services that are already running, i.e. this will restart
// whichever of rke2-server or rke2-agent happens to be active
// on this host
for _, service := range []string{"rke2-server.service", "rke2-agent.service"} {
if err := sys.TryRestartService(service); err != nil {
return err
}
}
return nil
}

func EnableV2DataEngine() error {
origHugepages, err := getNrHugepages()
if err != nil {
return err
}

// Write the persistent config first, so we know it's saved...
if err := UpdatePersistentOEMSettings(schema.Stage{
Name: spdkStageName,
Sysctl: map[string]string{
"vm.nr_hugepages": fmt.Sprintf("%d", hugepagesToAllocate),
},
Commands: []string{
"modprobe vfio_pci",
"modprobe uio_pci_generic",
"modprobe nvme_tcp",
},
}); err != nil {
return err
}

// ...then try to do the runtime activation (which may not succeed)
if err := modprobe(modulesToLoad, true); err != nil {
return fmt.Errorf("unable to load kernel modules %v: %v", modulesToLoad, err)
}

if origHugepages >= hugepagesToAllocate {
// We've already got enough hugepages, and don't want to unnecessarily
// restart the kubelet, so no further action required
return nil
}

if err := setNrHugepages(hugepagesToAllocate); err != nil {
return err
}

nrHugepages, err := getNrHugepages()
if err != nil {
return err
}
if nrHugepages == hugepagesToAllocate {
// We've successfully allocated the hugepages, but still need to restart
// the kubelet in order for Longhorn to see the allocation.
logrus.Infof("Restarting kubelet to set nr_hugepages=%d", hugepagesToAllocate)
return restartKubelet()
}

// We didn't get enough hugepages (not enough available unfragmented memory)
// but the system is now configured correctly so that if it's rebooted we should
// get the required allocation.
// TODO: record this somewhere (an event?) so that it can be picked up in the GUI
// Note that if there aren't enough hugepages, when harvester tries to enable the
// v2 data engine setting in Longhorn, the validator.longhorn.io admission webhook
// will pick up the failure and an error will be displayed on the harvester settings
// page, so we might not need to separately record this.
logrus.Errorf("Unable to allocate %d hugepages (only got %d)", hugepagesToAllocate, nrHugepages)

return nil
}

func DisableV2DataEngine() error {
origHugepages, err := getNrHugepages()
if err != nil {
return err
}

// Write the persistent config first, so we know it's saved...
if err := RemovePersistentOEMSettings(spdkStageName); err != nil {
return err
}

// ...then try to do the runtime deactivation
if err := modprobe(modulesToLoad, false); err != nil {
return fmt.Errorf("unable to unload kernel modules %v: %v", modulesToLoad, err)
}

if origHugepages == 0 {
// We already don't have any hugepages, and don't want to unnecessarily
// restart the kubelet, so no further action required
return nil
}

if err := setNrHugepages(0); err != nil {
return err
}

logrus.Info("Restarting kubelet to set nr_hugepages=0")
return restartKubelet()
}
32 changes: 32 additions & 0 deletions pkg/controller/nodeconfig/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,32 @@ func (c *Controller) OnNodeConfigChange(key string, nodecfg *nodeconfigv1.NodeCo
return nil, nil
}

// V2 Data Engine related handling. This is intentionally not bothering
// to check whether the engine is already enabled or not, it runs on any
// change to the node config, even if that change wasn't related to the
// longhorn settings. This is mostly harmless, because if the engine is
// already in the state about to be applied, re-applying that state is
// effectively a no-op, and I'd rather keep the code simple than add
// annotations for whether or not we already enabled the engine.
// The one wrinkle is that when allocating (or deallocating) hugepages,
// the kubelet needs to be restarted to pick up the change and reflect
// that in node.status.capacity.hugepages-2Mi, so that Longhorn can
// query that value when lhs/v2-data-engine is set to true. This restart
// logic is handled inside EnableV2DataEngine() and DisableV2DataEngine().
if nodecfg.Spec.LonghornConfig != nil && nodecfg.Spec.LonghornConfig.EnableV2DataEngine {
if err := config.EnableV2DataEngine(); err != nil {
logrus.WithFields(logrus.Fields{
"err": err.Error(),
}).Error("Failed to enable V2 Data Engine")
}
} else {
if err := config.DisableV2DataEngine(); err != nil {
logrus.WithFields(logrus.Fields{
"err": err.Error(),
}).Error("Failed to disable V2 Data Engine")
}
}

// NTP related handling
appliedConfig := nodecfg.ObjectMeta.Annotations[ConfigAppliedAnnotation]
ntpConfigHandler := config.NewNTPConfigHandler(c.mtx, c.NodeClient, confName, nodecfg.Spec.NTPConfig, appliedConfig)
Expand Down Expand Up @@ -129,6 +155,12 @@ func (c *Controller) OnNodeConfigRemove(key string, nodecfg *nodeconfigv1.NodeCo
c.NodeConfigs.EnqueueAfter(nodecfg.Namespace, nodecfg.Name, enqueueJitter())
return nil, err
}
if err := config.DisableV2DataEngine(); err != nil {
logrus.WithFields(logrus.Fields{
"err": err.Error(),
}).Error("Failed to disable V2 Data Engine")
c.NodeConfigs.EnqueueAfter(nodecfg.Namespace, nodecfg.Name, enqueueJitter())
}
return nil, nil
}

Expand Down

0 comments on commit 49cf969

Please sign in to comment.