From 4c212efe147cb3d17d5de8ca5cf7b86a784dc6c1 Mon Sep 17 00:00:00 2001 From: Zack Zlotnik Date: Thu, 2 Feb 2023 16:32:12 -0500 Subject: [PATCH 1/8] ensures that RHCOS 9 SSH keys are in the right place --- pkg/daemon/constants/constants.go | 9 ++ pkg/daemon/update.go | 119 +++++++++++++++------------ pkg/daemon/update_test.go | 3 + test/e2e-single-node/sno_mcd_test.go | 42 ++++++++-- test/e2e/mcd_test.go | 55 ++++++++++--- test/helpers/utils.go | 98 ++++++++++++++++++++-- 6 files changed, 246 insertions(+), 80 deletions(-) diff --git a/pkg/daemon/constants/constants.go b/pkg/daemon/constants/constants.go index e51f77cffa..ab589dee26 100644 --- a/pkg/daemon/constants/constants.go +++ b/pkg/daemon/constants/constants.go @@ -84,4 +84,13 @@ const ( // changes to registries.conf will cause a crio reload and require extra logic about whether to drain ContainerRegistryConfPath = "/etc/containers/registries.conf" + + // SSH Keys for user "core" will only be written at /home/core/.ssh + CoreUserSSHPath = "/home/" + CoreUserName + "/.ssh" + + // SSH keys in RHCOS 8 will be written to /home/core/.ssh/authorized_keys + RHCOS8SSHKeyPath = CoreUserSSHPath + "/authorized_keys" + + // SSH keys in RHCOS 9 / FCOS / SCOS will be written to /home/core/.ssh/authorized_keys.d/ignition + RHCOS9SSHKeyPath = CoreUserSSHPath + "/authorized_keys.d/ignition" ) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 3a6c585186..01b539e2fb 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -5,6 +5,7 @@ import ( "context" "errors" "fmt" + "io/fs" "os" "os/exec" "os/user" @@ -36,8 +37,6 @@ const ( defaultDirectoryPermissions os.FileMode = 0o755 // defaultFilePermissions houses the default mode to use when no file permissions are provided defaultFilePermissions os.FileMode = 0o644 - // SSH Keys for user "core" will only be written at /home/core/.ssh - coreUserSSHPath = "/home/core/.ssh/" // fipsFile is the file to check if FIPS is enabled fipsFile = "/proc/sys/crypto/fips_enabled" extensionsRepo = "/etc/yum.repos.d/coreos-extensions.repo" @@ -1542,7 +1541,7 @@ func (dn *Daemon) writeFiles(files []ign3types.File) error { return writeFiles(files) } -func (dn *Daemon) atomicallyWriteSSHKey(keys string) error { +func (dn *Daemon) atomicallyWriteSSHKey(authKeyPath, keys string) error { uid, err := lookupUID(constants.CoreUserName) if err != nil { return err @@ -1553,23 +1552,16 @@ func (dn *Daemon) atomicallyWriteSSHKey(keys string) error { return err } - var authKeyPath string - if dn.os.IsEL9() || dn.os.IsFCOS() { - // In FCOS and EL9, ignition writes the SSH key to ~/.ssh/authorized_keys.d/ignition, - // and the ssh-key-dir sshd AuthorizedKeysCommand binary reads it from there. - authKeyPath = filepath.Join(coreUserSSHPath, "authorized_keys.d", "ignition") - } else { - authKeyPath = filepath.Join(coreUserSSHPath, "authorized_keys") - } - // Keys should only be written to "/home/core/.ssh" // Once Users are supported fully this should be writing to PasswdUser.HomeDir - glog.Infof("Writing SSHKeys at %q", authKeyPath) + glog.Infof("Writing SSH keys to %q", authKeyPath) - // Creating coreUserSSHPath in advance if it doesn't exist in order to ensure it is owned by core user + // Creating CoreUserSSHPath in advance if it doesn't exist in order to ensure it is owned by core user // See https://bugzilla.redhat.com/show_bug.cgi?id=2107113 - if _, err := os.Stat(coreUserSSHPath); os.IsNotExist(err) { - mkdirCoreCommand := exec.Command("runuser", "-u", constants.CoreUserName, "--", "mkdir", "-m", "0700", "-p", coreUserSSHPath) + authKeyDir := filepath.Dir(authKeyPath) + if _, err := os.Stat(authKeyDir); os.IsNotExist(err) { + glog.Infof("Creating missing SSH key dir at %s", authKeyDir) + mkdirCoreCommand := exec.Command("runuser", "-u", constants.CoreUserName, "--", "mkdir", "-m", "0700", "-p", authKeyDir) if err := mkdirCoreCommand.Run(); err != nil { return err } @@ -1579,7 +1571,7 @@ func (dn *Daemon) atomicallyWriteSSHKey(keys string) error { return err } - glog.V(2).Infof("Wrote SSHKeys at %s", authKeyPath) + glog.V(2).Infof("Wrote SSH keys to %s", authKeyPath) return nil } @@ -1642,50 +1634,69 @@ func (dn *Daemon) updateSSHKeys(newUsers []ign3types.PasswdUser) error { concatSSHKeys = concatSSHKeys + string(k) + "\n" } } - if !dn.mock { - authKeyPath := filepath.Join(coreUserSSHPath, "authorized_keys") - authKeyFragmentDirPath := filepath.Join(coreUserSSHPath, "authorized_keys.d") - - if dn.os.IsFCOS() { - // In older versions of OKD, the keys were written to `/home/core/.ssh/authorized_keys`. - // Newer versions of OKD will however expect the keys at `/home/core/.ssh/authorized_keys.d/ignition`. - // Check if the authorized_keys file at the legacy path exists. If it does, remove it. - // It will be recreated at the new fragment path by the atomicallyWriteSSHKey function - // that is called right after. - _, err := os.Stat(authKeyPath) - if err == nil { - err := os.RemoveAll(authKeyPath) - if err != nil { - return fmt.Errorf("failed to remove path '%s': %w", authKeyPath, err) - } - } else if !os.IsNotExist(err) { - // This shouldn't ever happen - return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", authKeyPath, err) - } - // Ensure authorized_keys.d/ignition is the only fragment that exists - keyFragmentsDir, err := ctrlcommon.ReadDir(authKeyFragmentDirPath) - if err == nil { - for _, fragment := range keyFragmentsDir { - if fragment.Name() != "ignition" { - keyPath := filepath.Join(authKeyFragmentDirPath, fragment.Name()) - err := os.RemoveAll(keyPath) - if err != nil { - return fmt.Errorf("failed to remove path '%s': %w", keyPath, err) - } - } - } - } else if !os.IsNotExist(err) { - // This shouldn't ever happen - return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", authKeyFragmentDirPath, err) + authKeyPath := constants.RHCOS8SSHKeyPath + + if !dn.mock { + // In RHCOS 8.6 or lower, the keys were written to `/home/core/.ssh/authorized_keys`. + // RHCOS 9.0+, FCOS, and SCOS will however expect the keys at `/home/core/.ssh/authorized_keys.d/ignition`. + // Check if the authorized_keys file at the legacy path exists. If it does, remove it. + // It will be recreated at the new fragment path by the atomicallyWriteSSHKey function + // that is called right after. + if dn.os.IsEL9() || dn.os.IsSCOS() || dn.os.IsFCOS() { + authKeyPath = constants.RHCOS9SSHKeyPath + + if err := cleanSSHKeyPaths(); err != nil { + return err } } // Note we write keys only for the core user and so this ignores the user list - if err := dn.atomicallyWriteSSHKey(concatSSHKeys); err != nil { - return err + return dn.atomicallyWriteSSHKey(authKeyPath, concatSSHKeys) + } + + return nil +} + +// Removes the old SSH key path (/home/core/.ssh/authorized_keys), if found. +func cleanSSHKeyPaths() error { + _, err := os.Stat(constants.RHCOS8SSHKeyPath) + if err == nil { + err := os.RemoveAll(constants.RHCOS8SSHKeyPath) + if err != nil { + return fmt.Errorf("failed to remove path '%s': %w", constants.RHCOS8SSHKeyPath, err) + } + } else if !errors.Is(err, fs.ErrNotExist) { + // This shouldn't ever happen + return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", constants.RHCOS8SSHKeyPath, err) + } + + return removeNonIgnitionKeyPathFragments() +} + +// Ensures authorized_keys.d/ignition is the only fragment that exists within the /home/core/.ssh dir. +func removeNonIgnitionKeyPathFragments() error { + // /home/core/.ssh/authorized_keys.d + authKeyFragmentDirPath := filepath.Dir(constants.RHCOS9SSHKeyPath) + // ignition + authKeyFragmentBasename := filepath.Base(constants.RHCOS9SSHKeyPath) + + keyFragmentsDir, err := ctrlcommon.ReadDir(authKeyFragmentDirPath) + if err == nil { + for _, fragment := range keyFragmentsDir { + if fragment.Name() != authKeyFragmentBasename { + keyPath := filepath.Join(authKeyFragmentDirPath, fragment.Name()) + err := os.RemoveAll(keyPath) + if err != nil { + return fmt.Errorf("failed to remove path '%s': %w", keyPath, err) + } + } } + } else if !errors.Is(err, fs.ErrNotExist) { + // This shouldn't ever happen + return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", constants.RHCOS9SSHKeyPath, err) } + return nil } diff --git a/pkg/daemon/update_test.go b/pkg/daemon/update_test.go index 1524127d70..3a88d52316 100644 --- a/pkg/daemon/update_test.go +++ b/pkg/daemon/update_test.go @@ -493,6 +493,9 @@ func TestWriteFiles(t *testing.T) { } } +// This test provides a false sense of security. Given the combination of the +// mock mode in the MCD coupled with the inputs into this test, it effectively +// no-ops and does not test what we think it tests. func TestUpdateSSHKeys(t *testing.T) { d := newMockDaemon() diff --git a/test/e2e-single-node/sno_mcd_test.go b/test/e2e-single-node/sno_mcd_test.go index f3e5fc12b7..a86ad1716d 100644 --- a/test/e2e-single-node/sno_mcd_test.go +++ b/test/e2e-single-node/sno_mcd_test.go @@ -3,6 +3,7 @@ package e2e_sno_test import ( "context" "fmt" + "path/filepath" "strconv" "strings" "testing" @@ -246,6 +247,7 @@ func TestExtensions(t *testing.T) { t.Logf("Node %s has successfully rolled back", node.Name) } +// TODO: Can this test be moved to e2e-shared since it is very similar to the one in e2e/mcd_test.go? func TestNoReboot(t *testing.T) { cs := framework.NewClientSet("") @@ -259,14 +261,34 @@ func TestNoReboot(t *testing.T) { oldTime := strings.Split(output, " ")[0] t.Logf("Node %s initial uptime: %s", node.Name, oldTime) + sshKeyContent := "test adding authorized key without node reboot" + + nodeOS := helpers.GetOSReleaseForNode(t, cs, node).OS + + sshPaths := helpers.GetSSHPaths(nodeOS) + + t.Logf("Expecting SSH keys to be in %s", sshPaths.Expected) + + if sshPaths.Expected == constants.RHCOS9SSHKeyPath { + // Write an SSH key to the old location on the node because the update process should remove this file. + t.Logf("Writing SSH key to %s to ensure that it will be removed later", sshPaths.NotExpected) + bashCmd := fmt.Sprintf("printf '%s' > %s", sshKeyContent, filepath.Join("/rootfs", sshPaths.NotExpected)) + helpers.ExecCmdOnNode(t, cs, node, "/bin/bash", "-c", bashCmd) + } + + // Delete the expected SSH keys directory to ensure that the directories are + // (re)created correctly by the MCD. This targets the upgrade case where that + // directory may not previously exist. + helpers.ExecCmdOnNode(t, cs, node, "rm", "-rf", filepath.Join("/rootfs", filepath.Dir(sshPaths.Expected))) + // Adding authorized key for user core testIgnConfig := ctrlcommon.NewIgnConfig() - testSSHKey := ign3types.PasswdUser{Name: "core", SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{"test adding authorized key without node reboot"}} + testSSHKey := ign3types.PasswdUser{Name: "core", SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{ign3types.SSHAuthorizedKey(sshKeyContent)}} testIgnConfig.Passwd.Users = append(testIgnConfig.Passwd.Users, testSSHKey) addAuthorizedKey := &mcfgv1.MachineConfig{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("authorzied-key-%s", uuid.NewUUID()), + Name: fmt.Sprintf("authorized-key-%s", uuid.NewUUID()), Labels: helpers.MCLabelForRole("master"), }, Spec: mcfgv1.MachineConfigSpec{ @@ -292,8 +314,13 @@ func TestNoReboot(t *testing.T) { assert.Equal(t, node.Annotations[constants.CurrentMachineConfigAnnotationKey], renderedConfig) assert.Equal(t, node.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSHKey := helpers.ExecCmdOnNode(t, cs, node, "cat", "/rootfs/home/core/.ssh/authorized_keys") - if !strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + t.Logf("Expecting SSH keys to be in %s", sshPaths.Expected) + + helpers.AssertFileOnNode(t, cs, node, sshPaths.Expected) + helpers.AssertFileNotOnNode(t, cs, node, sshPaths.NotExpected) + + foundSSHKey := helpers.ExecCmdOnNode(t, cs, node, "cat", filepath.Join("/rootfs", sshPaths.Expected)) + if !strings.Contains(foundSSHKey, sshKeyContent) { t.Fatalf("updated ssh keys not found in authorized_keys, got %s", foundSSHKey) } t.Logf("Node %s has SSH key", node.Name) @@ -332,11 +359,14 @@ func TestNoReboot(t *testing.T) { assert.Equal(t, node.Annotations[constants.CurrentMachineConfigAnnotationKey], oldMasterRenderedConfig) assert.Equal(t, node.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSHKey = helpers.ExecCmdOnNode(t, cs, node, "cat", "/rootfs/home/core/.ssh/authorized_keys") - if strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + foundSSHKey = helpers.ExecCmdOnNode(t, cs, node, "cat", filepath.Join("/rootfs", sshPaths.Expected)) + if strings.Contains(foundSSHKey, sshKeyContent) { t.Fatalf("Node %s did not rollback successfully", node.Name) } + helpers.AssertFileOnNode(t, cs, node, sshPaths.Expected) + helpers.AssertFileNotOnNode(t, cs, node, sshPaths.NotExpected) + t.Logf("Node %s has successfully rolled back", node.Name) // Ensure that node didn't reboot during rollback diff --git a/test/e2e/mcd_test.go b/test/e2e/mcd_test.go index 613f0b9243..4997d5c75d 100644 --- a/test/e2e/mcd_test.go +++ b/test/e2e/mcd_test.go @@ -3,6 +3,7 @@ package e2e_test import ( "context" "fmt" + "path/filepath" "strconv" "strings" "testing" @@ -382,6 +383,28 @@ func TestNoReboot(t *testing.T) { oldInfraRenderedConfig := helpers.GetMcName(t, cs, "infra") infraNode := helpers.GetSingleNodeByRole(t, cs, "infra") + + sshKeyContent := "test adding authorized key without node reboot" + + nodeOS := helpers.GetOSReleaseForNode(t, cs, infraNode).OS + + sshPaths := helpers.GetSSHPaths(nodeOS) + + t.Logf("Expecting SSH keys to be in %s", sshPaths.Expected) + + if sshPaths.Expected == constants.RHCOS9SSHKeyPath { + // Write an SSH key to the old location on the node because the update process should remove this file. + t.Logf("Writing SSH key to %s to ensure that it will be removed later", sshPaths.NotExpected) + bashCmd := fmt.Sprintf("printf '%s' > %s", sshKeyContent, filepath.Join("/rootfs", sshPaths.NotExpected)) + helpers.ExecCmdOnNode(t, cs, infraNode, "/bin/bash", "-c", bashCmd) + } + + // Delete the expected SSH keys directory to ensure that the directories are + // (re)created correctly by the MCD. This targets the upgrade case where that + // directory may not previously exist. Note: This will need to be revisited + // once Config Drift Monitor is aware of SSH keys. + helpers.ExecCmdOnNode(t, cs, infraNode, "rm", "-rf", filepath.Join("/rootfs", filepath.Dir(sshPaths.Expected))) + output := helpers.ExecCmdOnNode(t, cs, infraNode, "cat", "/rootfs/proc/uptime") oldTime := strings.Split(output, " ")[0] t.Logf("Node %s initial uptime: %s", infraNode.Name, oldTime) @@ -390,17 +413,18 @@ func TestNoReboot(t *testing.T) { // Adding authorized key for user core testIgnConfig := ctrlcommon.NewIgnConfig() testPasswdHash := "testpass" - testSSHKey := ign3types.PasswdUser{ - Name: "core", - SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{"test adding authorized key without node reboot"}, - PasswordHash: &testPasswdHash, - } - testIgnConfig.Passwd.Users = append(testIgnConfig.Passwd.Users, testSSHKey) + testIgnConfig.Passwd.Users = []ign3types.PasswdUser{ + { + Name: "core", + SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{ign3types.SSHAuthorizedKey(sshKeyContent)}, + PasswordHash: &testPasswdHash, + }, + } addAuthorizedKey := &mcfgv1.MachineConfig{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("authorzied-key-infra-%s", uuid.NewUUID()), + Name: fmt.Sprintf("authorized-key-infra-%s", uuid.NewUUID()), Labels: helpers.MCLabelForRole("infra"), }, Spec: mcfgv1.MachineConfigSpec{ @@ -426,8 +450,11 @@ func TestNoReboot(t *testing.T) { assert.Equal(t, infraNode.Annotations[constants.CurrentMachineConfigAnnotationKey], renderedConfig) assert.Equal(t, infraNode.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSHKey := helpers.ExecCmdOnNode(t, cs, infraNode, "cat", "/rootfs/home/core/.ssh/authorized_keys") - if !strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + helpers.AssertFileOnNode(t, cs, infraNode, sshPaths.Expected) + helpers.AssertFileNotOnNode(t, cs, infraNode, sshPaths.NotExpected) + + foundSSHKey := helpers.ExecCmdOnNode(t, cs, infraNode, "cat", filepath.Join("/rootfs", sshPaths.Expected)) + if !strings.Contains(foundSSHKey, sshKeyContent) { t.Fatalf("updated ssh keys not found in authorized_keys, got %s", foundSSHKey) } t.Logf("Node %s has SSH key", infraNode.Name) @@ -437,9 +464,10 @@ func TestNoReboot(t *testing.T) { if currentEtcShadowContents == initialEtcShadowContents { t.Fatalf("updated password hash not found in etc/shadow, got %s", currentEtcShadowContents) } + t.Logf("Node %s has Password Hash", infraNode.Name) - usernameAndGroup := strings.Split(strings.TrimSuffix(helpers.ExecCmdOnNode(t, cs, infraNode, "chroot", "/rootfs", "stat", "--format=%U %G", "/home/core/.ssh/authorized_keys"), "\n"), " ") + usernameAndGroup := strings.Split(strings.TrimSuffix(helpers.ExecCmdOnNode(t, cs, infraNode, "chroot", "/rootfs", "stat", "--format=%U %G", sshPaths.Expected), "\n"), " ") assert.Equal(t, usernameAndGroup, []string{constants.CoreUserName, constants.CoreGroupName}) output = helpers.ExecCmdOnNode(t, cs, infraNode, "cat", "/rootfs/proc/uptime") @@ -476,11 +504,14 @@ func TestNoReboot(t *testing.T) { assert.Equal(t, infraNode.Annotations[constants.CurrentMachineConfigAnnotationKey], oldInfraRenderedConfig) assert.Equal(t, infraNode.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSHKey = helpers.ExecCmdOnNode(t, cs, infraNode, "cat", "/rootfs/home/core/.ssh/authorized_keys") - if strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + foundSSHKey = helpers.ExecCmdOnNode(t, cs, infraNode, "cat", filepath.Join("/rootfs", sshPaths.Expected)) + if strings.Contains(foundSSHKey, sshKeyContent) { t.Fatalf("Node %s did not rollback successfully", infraNode.Name) } + helpers.AssertFileOnNode(t, cs, infraNode, sshPaths.Expected) + helpers.AssertFileNotOnNode(t, cs, infraNode, sshPaths.NotExpected) + t.Logf("Node %s has successfully rolled back", infraNode.Name) // Ensure that node didn't reboot during rollback diff --git a/test/helpers/utils.go b/test/helpers/utils.go index f749cf4ec7..d07708e4f7 100644 --- a/test/helpers/utils.go +++ b/test/helpers/utils.go @@ -382,6 +382,28 @@ func CreateMCP(t *testing.T, cs *framework.ClientSet, mcpName string) func() { } } +type SSHPaths struct { + // The path where SSH keys are expected to be found. + Expected string + // The path where SSH keys are *not* expected to be found. + NotExpected string +} + +// Determines where to expect SSH keys for the core user on a given node based upon the node's OS. +func GetSSHPaths(os osrelease.OperatingSystem) SSHPaths { + if os.IsEL9() || os.IsSCOS() || os.IsFCOS() { + return SSHPaths{ + Expected: constants.RHCOS9SSHKeyPath, + NotExpected: constants.RHCOS8SSHKeyPath, + } + } + + return SSHPaths{ + Expected: constants.RHCOS8SSHKeyPath, + NotExpected: constants.RHCOS9SSHKeyPath, + } +} + // MCPNameToRole converts a mcpName to a node role label func MCPNameToRole(mcpName string) string { return fmt.Sprintf("node-role.kubernetes.io/%s", mcpName) @@ -392,23 +414,87 @@ func CreateMC(name, role string) *mcfgv1.MachineConfig { return NewMachineConfig(name, MCLabelForRole(role), "", nil) } +// Asserts that a given file is present on the underlying node. +func AssertFileOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, path string) bool { + t.Helper() + + path = canonicalizeNodeFilePath(path) + + out, err := ExecCmdOnNodeWithError(cs, node, "stat", path) + + return assert.NoError(t, err, "expected to find file %s on %s, got:\n%s\nError: %s", path, node.Name, out, err) +} + +// Asserts that a given file is *not* present on the underlying node. +func AssertFileNotOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, path string) bool { + t.Helper() + + path = canonicalizeNodeFilePath(path) + + out, err := ExecCmdOnNodeWithError(cs, node, "stat", path) + + return assert.Error(t, err, "expected not to find file %s on %s, got:\n%s", path, node.Name, out) && + assert.Contains(t, out, "No such file or directory", "expected command output to contain 'No such file or directory', got: %s", out) +} + +// Adds the /rootfs onto a given file path, if not already present. +func canonicalizeNodeFilePath(path string) string { + rootfs := "/rootfs" + + if !strings.HasPrefix(path, rootfs) { + return filepath.Join(rootfs, path) + } + + return path +} + // ExecCmdOnNode finds a node's mcd, and oc rsh's into it to execute a command on the node // all commands should use /rootfs as root func ExecCmdOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, subArgs ...string) string { + t.Helper() + + cmd, err := execCmdOnNode(cs, node, subArgs...) + require.Nil(t, err, "could not prepare to exec cmd %v on node %s: %s", subArgs, node.Name, err) + cmd.Stderr = os.Stderr + + out, err := cmd.Output() + require.Nil(t, err, "failed to exec cmd %v on node %s: %s", subArgs, node.Name, string(out)) + return string(out) +} + +// ExecCmdOnNodeWithError behaves like ExecCmdOnNode, with the exception that +// any errors are returned to the caller for inspection. This allows one to +// execute a command that is expected to fail; e.g., stat /nonexistant/file. +func ExecCmdOnNodeWithError(cs *framework.ClientSet, node corev1.Node, subArgs ...string) (string, error) { + cmd, err := execCmdOnNode(cs, node, subArgs...) + if err != nil { + return "", err + } + + out, err := cmd.CombinedOutput() + return string(out), err +} + +// ExecCmdOnNode finds a node's mcd, and oc rsh's into it to execute a command on the node +// all commands should use /rootfs as root +func execCmdOnNode(cs *framework.ClientSet, node corev1.Node, subArgs ...string) (*exec.Cmd, error) { // Check for an oc binary in $PATH. path, err := exec.LookPath("oc") if err != nil { - t.Fatalf("could not locate oc command: %s", err) + return nil, fmt.Errorf("could not locate oc command: %w", err) } // Get the kubeconfig file path kubeconfig, err := cs.GetKubeconfig() if err != nil { - t.Fatalf("could not get kubeconfig: %s", err) + return nil, fmt.Errorf("could not get kubeconfig: %w", err) } mcd, err := mcdForNode(cs, &node) - require.Nil(t, err) + if err != nil { + return nil, fmt.Errorf("could not get MCD for node %s: %w", node.Name, err) + } + mcdName := mcd.ObjectMeta.Name entryPoint := path @@ -423,11 +509,7 @@ func ExecCmdOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, subA // $KUBECONFIG, oc will be unaware of it. To remedy, we explicitly set // KUBECONFIG to the value held by the clientset. cmd.Env = append(cmd.Env, "KUBECONFIG="+kubeconfig) - cmd.Stderr = os.Stderr - - out, err := cmd.Output() - require.Nil(t, err, "failed to exec cmd %v on node %s: %s", subArgs, node.Name, string(out)) - return string(out) + return cmd, nil } // IsOKDCluster checks whether the Upstream field on the CV spec references OKD's update server From fc98d02a577c9e9f52e9f9fce3f9a195ab7664c7 Mon Sep 17 00:00:00 2001 From: Zack Zlotnik Date: Thu, 2 Feb 2023 17:21:56 -0500 Subject: [PATCH 2/8] OKD release controller is out-of-date --- test/helpers/utils.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/helpers/utils.go b/test/helpers/utils.go index d07708e4f7..0525bc46fe 100644 --- a/test/helpers/utils.go +++ b/test/helpers/utils.go @@ -31,6 +31,7 @@ import ( "k8s.io/client-go/util/retry" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" ) @@ -512,16 +513,16 @@ func execCmdOnNode(cs *framework.ClientSet, node corev1.Node, subArgs ...string) return cmd, nil } -// IsOKDCluster checks whether the Upstream field on the CV spec references OKD's update server +// IsOKDCluster checks whether the Upstream field on the CV spec references an OKD release controller func IsOKDCluster(cs *framework.ClientSet) (bool, error) { cv, err := cs.ClusterVersions().Get(context.TODO(), "version", metav1.GetOptions{}) if err != nil { return false, err } - if cv.Spec.Upstream == "https://origin-release.svc.ci.openshift.org/graph" { - return true, nil - } - return false, nil + + // TODO: Adjust this as OKD becomes available for different platforms, e.g., arm64. + okdReleaseControllers := sets.NewString("https://amd64.origin.releases.ci.openshift.org/graph") + return okdReleaseControllers.Has(string(cv.Spec.Upstream)), nil } func MCLabelForRole(role string) map[string]string { From bfb9d7757737b88900fa9cafe6efa23fa68dd861 Mon Sep 17 00:00:00 2001 From: Zack Zlotnik Date: Fri, 17 Feb 2023 17:28:43 -0500 Subject: [PATCH 3/8] ensures SSH keys get moved to the correct location When we move from RHCOS 8 -> RHCOS 9, the SSH keys are not being written to the new location because: 1. When the upgrade configs are written to the node, it is still running RHCOS 8, so the keys are not being written to the new location. 2. The node reboots into RHCOS 9 to complete the upgrade. 3. The "are we on the latest config" functions detect that we are indeed on the latest config and so it does not attempt to perform an update. --- pkg/daemon/daemon.go | 72 ++++++++++++++++++++++++++++++++++++++ pkg/daemon/update.go | 82 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 139 insertions(+), 15 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 7cc194eb22..4d49217f9f 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -1499,6 +1499,71 @@ func removeIgnitionArtifacts() error { return nil } +// When we move from RHCOS 8 -> RHCOS 9, the SSH keys do not get written to the +// new location before the node reboots into RHCOS 9 because: +// +// 1. When the upgrade configs are written to the node, it is still running +// RHCOS 8, so the keys are not being written to the new location since the +// location is inferred from the currently booted OS. +// 2. The node reboots into RHCOS 9 to complete the upgrade. +// 3. The "are we on the latest config" functions detect that we are indeed on +// the latest config and so it does not attempt to perform an update. +// +// To work around that check on bootup if the we should use the new SSH key +// path and if the old SSH key path exists, we know that we need to migrate tot +// he new key path by calling dn.updateSSHKeyLocation(). +func (dn *Daemon) isSSHKeyLocationUpdateRequired() (bool, error) { + if !dn.useNewSSHKeyPath() { + // Return early because we're not using the new SSH key path. + return false, nil + } + + oldKeyExists, err := fileExists(constants.RHCOS8SSHKeyPath) + if err != nil { + return false, err + } + + newKeyExists, err := fileExists(constants.RHCOS9SSHKeyPath) + if err != nil { + return false, err + } + + // If the old key exists and the new key does not, we need to update. + return oldKeyExists && !newKeyExists, nil +} + +// Decode the Ignition config and perform the SSH key update. +func (dn *Daemon) updateSSHKeyLocation(cfg *mcfgv1.MachineConfig) error { + glog.Infof("SSH key location update required. Moving SSH keys from %q to %q.", constants.RHCOS8SSHKeyPath, constants.RHCOS9SSHKeyPath) + + ignConfig, err := ctrlcommon.ParseAndConvertConfig(cfg.Spec.Config.Raw) + if err != nil { + return fmt.Errorf("ignition failure when updating SSH key location: %w", err) + } + + if err := dn.updateSSHKeys(ignConfig.Passwd.Users); err != nil { + return fmt.Errorf("could not write SSH keys to new location: %w", err) + } + + return nil +} + +// Determines if we need to update the SSH key location and performs the +// necessary update if so. +func (dn *Daemon) updateSSHKeyLocationIfNeeded(cfg *mcfgv1.MachineConfig) error { + sshKeyLocationUpdateRequired, err := dn.isSSHKeyLocationUpdateRequired() + if err != nil { + return fmt.Errorf("unable to determine if SSH key location update is required: %w", err) + } + + if !sshKeyLocationUpdateRequired { + glog.Infof("SSH key location (%q) up-to-date!", constants.RHCOS9SSHKeyPath) + return nil + } + + return dn.updateSSHKeyLocation(cfg) +} + // checkStateOnFirstRun is a core entrypoint for our state machine. // It determines whether we're in our desired state, or if we're // transitioning between states, and whether or not we need to update @@ -1651,6 +1716,13 @@ func (dn *Daemon) checkStateOnFirstRun() error { return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig) } + // When upgrading the OS, it is possible that the SSH key location will + // change. We should detect whether that is the case and update before we + // check for any config drift. + if err := dn.updateSSHKeyLocationIfNeeded(expectedConfig); err != nil { + return err + } + if err := dn.validateOnDiskState(expectedConfig); err != nil { wErr := fmt.Errorf("unexpected on-disk state validating against %s: %w", expectedConfig.GetName(), err) dn.nodeWriter.Eventf(corev1.EventTypeWarning, "OnDiskStateValidationFailed", wErr.Error()) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 01b539e2fb..1e99da96e0 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1541,6 +1541,29 @@ func (dn *Daemon) writeFiles(files []ign3types.File) error { return writeFiles(files) } +// Ensures that both the SSH root directory (/home/core/.ssh) as well as any +// subdirectories are created with the correct (0700) permissions. +func createSSHKeyDir(authKeyDir string) error { + glog.Infof("Creating missing SSH key dir at %s", authKeyDir) + + mkdir := func(dir string) error { + return exec.Command("runuser", "-u", constants.CoreUserName, "--", "mkdir", "-m", "0700", "-p", dir).Run() + } + + // Create the root SSH key directory (/home/core/.ssh) first. + if err := mkdir(filepath.Dir(constants.RHCOS8SSHKeyPath)); err != nil { + return err + } + + // For RHCOS 8, creating /home/core/.ssh is all that is needed. + if authKeyDir == constants.RHCOS8SSHKeyPath { + return nil + } + + // Create the next level of the SSH key directory (/home/core/.ssh/authorized_keys.d) for RHCOS 9 cases. + return mkdir(filepath.Dir(constants.RHCOS9SSHKeyPath)) +} + func (dn *Daemon) atomicallyWriteSSHKey(authKeyPath, keys string) error { uid, err := lookupUID(constants.CoreUserName) if err != nil { @@ -1560,9 +1583,7 @@ func (dn *Daemon) atomicallyWriteSSHKey(authKeyPath, keys string) error { // See https://bugzilla.redhat.com/show_bug.cgi?id=2107113 authKeyDir := filepath.Dir(authKeyPath) if _, err := os.Stat(authKeyDir); os.IsNotExist(err) { - glog.Infof("Creating missing SSH key dir at %s", authKeyDir) - mkdirCoreCommand := exec.Command("runuser", "-u", constants.CoreUserName, "--", "mkdir", "-m", "0700", "-p", authKeyDir) - if err := mkdirCoreCommand.Run(); err != nil { + if err := createSSHKeyDir(authKeyDir); err != nil { return err } } @@ -1571,7 +1592,7 @@ func (dn *Daemon) atomicallyWriteSSHKey(authKeyPath, keys string) error { return err } - glog.V(2).Infof("Wrote SSH keys to %s", authKeyPath) + glog.V(2).Infof("Wrote SSH keys to %q", authKeyPath) return nil } @@ -1609,6 +1630,13 @@ func (dn *Daemon) SetPasswordHash(newUsers []ign3types.PasswdUser) error { return nil } +// Determines if we should use the new SSH key path +// (/home/core/.ssh/authorized_keys.d/ignition) or the old SSH key path +// (/home/core/.ssh/authorized_keys) +func (dn *Daemon) useNewSSHKeyPath() bool { + return dn.os.IsEL9() || dn.os.IsFCOS() || dn.os.IsSCOS() +} + // Update a given PasswdUser's SSHKey func (dn *Daemon) updateSSHKeys(newUsers []ign3types.PasswdUser) error { if len(newUsers) == 0 { @@ -1643,12 +1671,16 @@ func (dn *Daemon) updateSSHKeys(newUsers []ign3types.PasswdUser) error { // Check if the authorized_keys file at the legacy path exists. If it does, remove it. // It will be recreated at the new fragment path by the atomicallyWriteSSHKey function // that is called right after. - if dn.os.IsEL9() || dn.os.IsSCOS() || dn.os.IsFCOS() { + if dn.useNewSSHKeyPath() { authKeyPath = constants.RHCOS9SSHKeyPath if err := cleanSSHKeyPaths(); err != nil { return err } + + if err := removeNonIgnitionKeyPathFragments(); err != nil { + return err + } } // Note we write keys only for the core user and so this ignores the user list @@ -1658,20 +1690,40 @@ func (dn *Daemon) updateSSHKeys(newUsers []ign3types.PasswdUser) error { return nil } +// Determines if a file exists by checking for the presence or lack thereof of +// an error when stat'ing the file. Returns any other error. +func fileExists(path string) (bool, error) { + _, err := os.Stat(path) + // If there is no error, the file definitely exists. + if err == nil { + return true, nil + } + + // If the error matches fs.ErrNotExist, the file definitely does not exist. + if errors.Is(err, fs.ErrNotExist) { + return false, nil + } + + // An unexpected error occurred. + return false, fmt.Errorf("cannot stat file: %w", err) +} + // Removes the old SSH key path (/home/core/.ssh/authorized_keys), if found. func cleanSSHKeyPaths() error { - _, err := os.Stat(constants.RHCOS8SSHKeyPath) - if err == nil { - err := os.RemoveAll(constants.RHCOS8SSHKeyPath) - if err != nil { - return fmt.Errorf("failed to remove path '%s': %w", constants.RHCOS8SSHKeyPath, err) - } - } else if !errors.Is(err, fs.ErrNotExist) { - // This shouldn't ever happen - return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", constants.RHCOS8SSHKeyPath, err) + oldKeyExists, err := fileExists(constants.RHCOS8SSHKeyPath) + if err != nil { + return err } - return removeNonIgnitionKeyPathFragments() + if !oldKeyExists { + return nil + } + + if err := os.RemoveAll(constants.RHCOS8SSHKeyPath); err != nil { + return fmt.Errorf("failed to remove path '%s': %w", constants.RHCOS8SSHKeyPath, err) + } + + return nil } // Ensures authorized_keys.d/ignition is the only fragment that exists within the /home/core/.ssh dir. From 9412f1dd94c2cb40e78e8b736a25a08072021979 Mon Sep 17 00:00:00 2001 From: Zack Zlotnik Date: Thu, 2 Mar 2023 18:48:03 -0500 Subject: [PATCH 4/8] teaches TestIgn3Cfg about the new RHCOS 9 key path --- test/e2e/mcd_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/e2e/mcd_test.go b/test/e2e/mcd_test.go index 4997d5c75d..3798fe4347 100644 --- a/test/e2e/mcd_test.go +++ b/test/e2e/mcd_test.go @@ -796,7 +796,9 @@ func TestIgn3Cfg(t *testing.T) { assert.Equal(t, infraNode.Annotations[constants.CurrentMachineConfigAnnotationKey], renderedConfig) assert.Equal(t, infraNode.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSH := helpers.ExecCmdOnNode(t, cs, infraNode, "grep", "1234_test_ign3", "/rootfs/home/core/.ssh/authorized_keys") + sshPaths := helpers.GetSSHPaths(helpers.GetOSReleaseForNode(t, cs, infraNode).OS) + + foundSSH := helpers.ExecCmdOnNode(t, cs, infraNode, "grep", "1234_test_ign3", filepath.Join("/rootfs", sshPaths.Expected)) if !strings.Contains(foundSSH, "1234_test_ign3") { t.Fatalf("updated ssh keys not found in authorized_keys, got %s", foundSSH) } From 14d30fe570e2cd4e10b58491a38f5cd483860cc2 Mon Sep 17 00:00:00 2001 From: Zack Zlotnik Date: Mon, 6 Mar 2023 14:40:54 -0500 Subject: [PATCH 5/8] checks perms for SSH key path dirs as well --- test/e2e/mcd_test.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/test/e2e/mcd_test.go b/test/e2e/mcd_test.go index 3798fe4347..b54ce39be2 100644 --- a/test/e2e/mcd_test.go +++ b/test/e2e/mcd_test.go @@ -459,6 +459,15 @@ func TestNoReboot(t *testing.T) { } t.Logf("Node %s has SSH key", infraNode.Name) + assertExpectedPerms(t, cs, infraNode, "/home/core/.ssh", []string{constants.CoreUserName, constants.CoreGroupName, "700"}) + + if sshPaths.Expected == constants.RHCOS9SSHKeyPath { + // /home/core/.ssh/authorized_keys.d + assertExpectedPerms(t, cs, infraNode, filepath.Dir(constants.RHCOS9SSHKeyPath), []string{constants.CoreUserName, constants.CoreGroupName, "700"}) + } + + assertExpectedPerms(t, cs, infraNode, sshPaths.Expected, []string{constants.CoreUserName, constants.CoreGroupName, "600"}) + currentEtcShadowContents := helpers.ExecCmdOnNode(t, cs, infraNode, "grep", "^core:", "/rootfs/etc/shadow") if currentEtcShadowContents == initialEtcShadowContents { @@ -467,9 +476,6 @@ func TestNoReboot(t *testing.T) { t.Logf("Node %s has Password Hash", infraNode.Name) - usernameAndGroup := strings.Split(strings.TrimSuffix(helpers.ExecCmdOnNode(t, cs, infraNode, "chroot", "/rootfs", "stat", "--format=%U %G", sshPaths.Expected), "\n"), " ") - assert.Equal(t, usernameAndGroup, []string{constants.CoreUserName, constants.CoreGroupName}) - output = helpers.ExecCmdOnNode(t, cs, infraNode, "cat", "/rootfs/proc/uptime") newTime := strings.Split(output, " ")[0] @@ -842,3 +848,12 @@ func createMCToAddFileForRole(name, role, filename, data string) *mcfgv1.Machine func createMCToAddFile(name, filename, data string) *mcfgv1.MachineConfig { return createMCToAddFileForRole(name, "worker", filename, data) } + +// Checks that a file or directory on a given node matches the expected +// permissions in the form of [username, groupname, octal file permissions]. +func assertExpectedPerms(t *testing.T, cs *framework.ClientSet, node corev1.Node, path string, expectedPerms []string) { + t.Helper() + + actualPerms := strings.Split(strings.TrimSuffix(helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "stat", "--format=%U %G %a", path), "\n"), " ") + assert.Equal(t, expectedPerms, actualPerms, "expected %s to have perms %v, got: %v", path, expectedPerms, actualPerms) +} From 3d7f3c1ce026aac7808d53e05a027f5240e2607f Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 12 Jan 2023 15:35:40 -0500 Subject: [PATCH 6/8] Switch to rhel-coreos-9 ref: https://issues.redhat.com/browse/COS-1983 We introduced a new `rhel-coreos-9` to aid having a switch be an atomic operation. --- Dockerfile | 6 +++--- cmd/machine-config-operator/bootstrap.go | 6 +++--- docs/UsingLayering.md | 8 ++++---- .../0000_80_machine-config-operator_05_osimageurl.yaml | 4 ++-- install/image-references | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index 90dfbab482..85d50f88a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,12 +14,12 @@ COPY install /manifests RUN if [[ "${TAGS}" == "fcos" ]] || [[ "${TAGS}" == "scos" ]]; then \ # comment out non-base/extensions image-references entirely for fcos/scos - sed -i '/- name: rhel-coreos-8-/,+3 s/^/#/' /manifests/image-references && \ + sed -i '/- name: rhel-coreos-[89]-/,+3 s/^/#/' /manifests/image-references && \ # also remove extensions from the osimageurl configmap (if we don't, oc won't rewrite it, and the placeholder value will survive and get used) sed -i '/baseOSExtensionsContainerImage:/ s/^/#/' /manifests/0000_80_machine-config-operator_05_osimageurl.yaml; fi && \ # rewrite image names for fcos/scos - if [[ "${TAGS}" == "fcos" ]]; then sed -i 's/rhel-coreos-8/fedora-coreos/g' /manifests/*; \ - elif [[ "${TAGS}" == "scos" ]]; then sed -i 's/rhel-coreos-8/centos-stream-coreos-9/g' /manifests/*; fi && \ + if [[ "${TAGS}" == "fcos" ]]; then sed -i 's/rhel-coreos-9/fedora-coreos/g' /manifests/*; \ + elif [[ "${TAGS}" == "scos" ]]; then sed -i 's/rhel-coreos-9/centos-stream-coreos-9/g' /manifests/*; fi && \ if ! rpm -q util-linux; then yum install -y util-linux && yum clean all && rm -rf /var/cache/yum/*; fi COPY templates /etc/mcc/templates ENTRYPOINT ["/usr/bin/machine-config-operator"] diff --git a/cmd/machine-config-operator/bootstrap.go b/cmd/machine-config-operator/bootstrap.go index a9f7c0aa27..2612681fda 100644 --- a/cmd/machine-config-operator/bootstrap.go +++ b/cmd/machine-config-operator/bootstrap.go @@ -79,8 +79,8 @@ func init() { bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.haproxyImage, "haproxy-image", "", "Image for haproxy.") bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baremetalRuntimeCfgImage, "baremetal-runtimecfg-image", "", "Image for baremetal-runtimecfg.") bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.oauthProxyImage, "oauth-proxy-image", "", "Image for origin oauth proxy.") - bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baseOSContainerImage, "baseos-image", "", "Image for rhel-coreos-8") - bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baseOSExtensionsContainerImage, "baseos-extensions-image", "", "Image for rhel-coreos-8-extensions") + bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baseOSContainerImage, "baseos-image", "", "ostree-bootable container image reference") + bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baseOSExtensionsContainerImage, "baseos-extensions-image", "", "Image with extensions") bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.imageReferences, "image-references", "", "File containing imagestreams (from cluster-version-operator)") bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.cloudProviderCAFile, "cloud-provider-ca-file", "", "path to cloud provider CA certificate") @@ -114,7 +114,7 @@ func runBootstrapCmd(cmd *cobra.Command, args []string) { // To help debugging, immediately log version glog.Infof("Version: %+v (%s)", version.Raw, version.Hash) - baseOSContainerImageTag := "rhel-coreos-8" + baseOSContainerImageTag := "rhel-coreos-9" if version.IsFCOS() { baseOSContainerImageTag = "fedora-coreos" } else if version.IsSCOS() { diff --git a/docs/UsingLayering.md b/docs/UsingLayering.md index 3118a3ca9e..0b19c4c512 100644 --- a/docs/UsingLayering.md +++ b/docs/UsingLayering.md @@ -8,8 +8,8 @@ Layering lets you "layer" additional content on top of a Base OS Image using "co As of 4.12: -- The MCO uses the `rhel-coreos-8` [native format](https://coreos.github.io/rpm-ostree/container/) base OS image by default instead of `machine-os-content` -- You can "layer" user content on top of that `rhel-coreos-8` image using a container build, and that content will be applied during a rebase +- The MCO uses the `rhel-coreos-8` [native format](https://coreos.github.io/rpm-ostree/container/) base OS image by default instead of `machine-os-content` (and in 4.13+, the image is `rhel-coreos-9`) +- You can "layer" user content on top of that `rhel-coreos-9` image using a container build, and that content will be applied during a rebase - The MCO will allow `OSImageURL` to be overridden *on a per-pool basis* with such an layered image While layering is powerful, it's also an "advanced" use of the MCO, and it comes with some trade-offs. @@ -36,13 +36,13 @@ Nothing will stop you at this point from using a completely arbitrary image, but #### On an existing cluster ```bash -oc adm release info --image-for rhel-coreos-8 +oc adm release info --image-for rhel-coreos-9 ``` #### Or before you build your cluster ```bash -oc adm release info --image-for rhel-coreos-8 quay.io/openshift-release-dev/ocp-release:your_release_here +oc adm release info --image-for rhel-coreos-9 quay.io/openshift-release-dev/ocp-release:your_release_here ``` ### 2. "Layer" Some Content On Top Of It diff --git a/install/0000_80_machine-config-operator_05_osimageurl.yaml b/install/0000_80_machine-config-operator_05_osimageurl.yaml index bf49ba613b..dc0f402703 100644 --- a/install/0000_80_machine-config-operator_05_osimageurl.yaml +++ b/install/0000_80_machine-config-operator_05_osimageurl.yaml @@ -11,8 +11,8 @@ data: releaseVersion: 0.0.1-snapshot # This (will eventually) replace the below when https://github.com/openshift/enhancements/pull/1032 # progresses towards the default. - baseOSContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8" - baseOSExtensionsContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions" + baseOSContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-9" + baseOSExtensionsContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-9-extensions" # The OS payload used for 4.10 and below; more information in # https://github.com/openshift/machine-config-operator/blob/master/docs/OSUpgrades.md # (The original issue was https://github.com/openshift/machine-config-operator/issues/183 ) diff --git a/install/image-references b/install/image-references index 4170492251..a71a6f952d 100644 --- a/install/image-references +++ b/install/image-references @@ -23,14 +23,14 @@ spec: from: kind: DockerImage name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:machine-os-content - - name: rhel-coreos-8 + - name: rhel-coreos-9 from: kind: DockerImage - name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8 - - name: rhel-coreos-8-extensions + name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-9 + - name: rhel-coreos-9-extensions from: kind: DockerImage - name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions + name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-9-extensions - name: keepalived-ipfailover from: kind: DockerImage From 8728c7cb12a076d3e414e0bf36f8acac265a73d0 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 2 Mar 2023 08:38:35 -0500 Subject: [PATCH 7/8] daemon: Also override `kernel-modules-core` Unfortunately rpm-ostree requires this right now; we have an issue and code to provide a better API in https://github.com/coreos/rpm-ostree/issues/2542 But using that will require shipping the updated rpm-ostree in RHEL 8.6.z or at least OCP 4.12.z, which is problematic. Because we know the new MCD will always be upgrading to RHEL9, for now let's update this hardcoded list. In the future we can detect when the running host has `--remove-installed-kernel` and use it instead. --- pkg/daemon/update.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 1e99da96e0..6e65ccaee3 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1167,7 +1167,7 @@ func (dn *CoreOSDaemon) switchKernel(oldConfig, newConfig *mcfgv1.MachineConfig) } // TODO: Drop this code and use https://github.com/coreos/rpm-ostree/issues/2542 instead - defaultKernel := []string{"kernel", "kernel-core", "kernel-modules", "kernel-modules-extra"} + defaultKernel := []string{"kernel", "kernel-core", "kernel-modules", "kernel-modules-core", "kernel-modules-extra"} // Note this list explicitly does *not* include kernel-rt as that is a meta-package that tries to pull in a lot // of other dependencies we don't want for historical reasons. // kernel-rt also has a split off kernel-rt-kvm subpackage because it's in a separate subscription in RHEL. From d47101b08c13ee71bb1399fc51bfc4268d91f54b Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 2 Mar 2023 15:18:15 -0500 Subject: [PATCH 8/8] openshift-azure-routes: Avoid synchronizing too quickly Rapid file changes triggering the path unit can start the service here frequently, and then this can cause the start limit to be hit, and then systemd will refuse further activations (unless we bumped the limit). I don't think we need to synchronize the iptables rules more than once every 3 seconds. --- .../azure/files/opt-libexec-openshift-azure-routes-sh.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/templates/master/00-master/azure/files/opt-libexec-openshift-azure-routes-sh.yaml b/templates/master/00-master/azure/files/opt-libexec-openshift-azure-routes-sh.yaml index 75ec6a8925..c204690069 100644 --- a/templates/master/00-master/azure/files/opt-libexec-openshift-azure-routes-sh.yaml +++ b/templates/master/00-master/azure/files/opt-libexec-openshift-azure-routes-sh.yaml @@ -174,6 +174,12 @@ contents: remove_stale add_rules echo "done applying vip rules" + # Arbitrary delay to avoid synchronizing too quickly on file changes; the VIP + # file could change multiple times quickly, but we don't need to react instantly + # to every change. Most crucially we want to be sure we don't trip over the + # default systemd StartLimitBurst/StartLimitIterval settings which are 5 and 10s + # respectively. + sleep 3 ;; cleanup) clear_rules