diff --git a/Dockerfile b/Dockerfile index 90dfbab482..85d50f88a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,12 +14,12 @@ COPY install /manifests RUN if [[ "${TAGS}" == "fcos" ]] || [[ "${TAGS}" == "scos" ]]; then \ # comment out non-base/extensions image-references entirely for fcos/scos - sed -i '/- name: rhel-coreos-8-/,+3 s/^/#/' /manifests/image-references && \ + sed -i '/- name: rhel-coreos-[89]-/,+3 s/^/#/' /manifests/image-references && \ # also remove extensions from the osimageurl configmap (if we don't, oc won't rewrite it, and the placeholder value will survive and get used) sed -i '/baseOSExtensionsContainerImage:/ s/^/#/' /manifests/0000_80_machine-config-operator_05_osimageurl.yaml; fi && \ # rewrite image names for fcos/scos - if [[ "${TAGS}" == "fcos" ]]; then sed -i 's/rhel-coreos-8/fedora-coreos/g' /manifests/*; \ - elif [[ "${TAGS}" == "scos" ]]; then sed -i 's/rhel-coreos-8/centos-stream-coreos-9/g' /manifests/*; fi && \ + if [[ "${TAGS}" == "fcos" ]]; then sed -i 's/rhel-coreos-9/fedora-coreos/g' /manifests/*; \ + elif [[ "${TAGS}" == "scos" ]]; then sed -i 's/rhel-coreos-9/centos-stream-coreos-9/g' /manifests/*; fi && \ if ! rpm -q util-linux; then yum install -y util-linux && yum clean all && rm -rf /var/cache/yum/*; fi COPY templates /etc/mcc/templates ENTRYPOINT ["/usr/bin/machine-config-operator"] diff --git a/cmd/machine-config-operator/bootstrap.go b/cmd/machine-config-operator/bootstrap.go index a9f7c0aa27..2612681fda 100644 --- a/cmd/machine-config-operator/bootstrap.go +++ b/cmd/machine-config-operator/bootstrap.go @@ -79,8 +79,8 @@ func init() { bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.haproxyImage, "haproxy-image", "", "Image for haproxy.") bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baremetalRuntimeCfgImage, "baremetal-runtimecfg-image", "", "Image for baremetal-runtimecfg.") bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.oauthProxyImage, "oauth-proxy-image", "", "Image for origin oauth proxy.") - bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baseOSContainerImage, "baseos-image", "", "Image for rhel-coreos-8") - bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baseOSExtensionsContainerImage, "baseos-extensions-image", "", "Image for rhel-coreos-8-extensions") + bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baseOSContainerImage, "baseos-image", "", "ostree-bootable container image reference") + bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.baseOSExtensionsContainerImage, "baseos-extensions-image", "", "Image with extensions") bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.imageReferences, "image-references", "", "File containing imagestreams (from cluster-version-operator)") bootstrapCmd.PersistentFlags().StringVar(&bootstrapOpts.cloudProviderCAFile, "cloud-provider-ca-file", "", "path to cloud provider CA certificate") @@ -114,7 +114,7 @@ func runBootstrapCmd(cmd *cobra.Command, args []string) { // To help debugging, immediately log version glog.Infof("Version: %+v (%s)", version.Raw, version.Hash) - baseOSContainerImageTag := "rhel-coreos-8" + baseOSContainerImageTag := "rhel-coreos-9" if version.IsFCOS() { baseOSContainerImageTag = "fedora-coreos" } else if version.IsSCOS() { diff --git a/docs/UsingLayering.md b/docs/UsingLayering.md index 3118a3ca9e..0b19c4c512 100644 --- a/docs/UsingLayering.md +++ b/docs/UsingLayering.md @@ -8,8 +8,8 @@ Layering lets you "layer" additional content on top of a Base OS Image using "co As of 4.12: -- The MCO uses the `rhel-coreos-8` [native format](https://coreos.github.io/rpm-ostree/container/) base OS image by default instead of `machine-os-content` -- You can "layer" user content on top of that `rhel-coreos-8` image using a container build, and that content will be applied during a rebase +- The MCO uses the `rhel-coreos-8` [native format](https://coreos.github.io/rpm-ostree/container/) base OS image by default instead of `machine-os-content` (and in 4.13+, the image is `rhel-coreos-9`) +- You can "layer" user content on top of that `rhel-coreos-9` image using a container build, and that content will be applied during a rebase - The MCO will allow `OSImageURL` to be overridden *on a per-pool basis* with such an layered image While layering is powerful, it's also an "advanced" use of the MCO, and it comes with some trade-offs. @@ -36,13 +36,13 @@ Nothing will stop you at this point from using a completely arbitrary image, but #### On an existing cluster ```bash -oc adm release info --image-for rhel-coreos-8 +oc adm release info --image-for rhel-coreos-9 ``` #### Or before you build your cluster ```bash -oc adm release info --image-for rhel-coreos-8 quay.io/openshift-release-dev/ocp-release:your_release_here +oc adm release info --image-for rhel-coreos-9 quay.io/openshift-release-dev/ocp-release:your_release_here ``` ### 2. "Layer" Some Content On Top Of It diff --git a/install/0000_80_machine-config-operator_05_osimageurl.yaml b/install/0000_80_machine-config-operator_05_osimageurl.yaml index bf49ba613b..dc0f402703 100644 --- a/install/0000_80_machine-config-operator_05_osimageurl.yaml +++ b/install/0000_80_machine-config-operator_05_osimageurl.yaml @@ -11,8 +11,8 @@ data: releaseVersion: 0.0.1-snapshot # This (will eventually) replace the below when https://github.com/openshift/enhancements/pull/1032 # progresses towards the default. - baseOSContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8" - baseOSExtensionsContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions" + baseOSContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-9" + baseOSExtensionsContainerImage: "placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-9-extensions" # The OS payload used for 4.10 and below; more information in # https://github.com/openshift/machine-config-operator/blob/master/docs/OSUpgrades.md # (The original issue was https://github.com/openshift/machine-config-operator/issues/183 ) diff --git a/install/image-references b/install/image-references index 4170492251..a71a6f952d 100644 --- a/install/image-references +++ b/install/image-references @@ -23,14 +23,14 @@ spec: from: kind: DockerImage name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:machine-os-content - - name: rhel-coreos-8 + - name: rhel-coreos-9 from: kind: DockerImage - name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8 - - name: rhel-coreos-8-extensions + name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-9 + - name: rhel-coreos-9-extensions from: kind: DockerImage - name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-8-extensions + name: placeholder.url.oc.will.replace.this.org/placeholdernamespace:rhel-coreos-9-extensions - name: keepalived-ipfailover from: kind: DockerImage diff --git a/pkg/daemon/constants/constants.go b/pkg/daemon/constants/constants.go index e51f77cffa..ab589dee26 100644 --- a/pkg/daemon/constants/constants.go +++ b/pkg/daemon/constants/constants.go @@ -84,4 +84,13 @@ const ( // changes to registries.conf will cause a crio reload and require extra logic about whether to drain ContainerRegistryConfPath = "/etc/containers/registries.conf" + + // SSH Keys for user "core" will only be written at /home/core/.ssh + CoreUserSSHPath = "/home/" + CoreUserName + "/.ssh" + + // SSH keys in RHCOS 8 will be written to /home/core/.ssh/authorized_keys + RHCOS8SSHKeyPath = CoreUserSSHPath + "/authorized_keys" + + // SSH keys in RHCOS 9 / FCOS / SCOS will be written to /home/core/.ssh/authorized_keys.d/ignition + RHCOS9SSHKeyPath = CoreUserSSHPath + "/authorized_keys.d/ignition" ) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 7cc194eb22..4d49217f9f 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -1499,6 +1499,71 @@ func removeIgnitionArtifacts() error { return nil } +// When we move from RHCOS 8 -> RHCOS 9, the SSH keys do not get written to the +// new location before the node reboots into RHCOS 9 because: +// +// 1. When the upgrade configs are written to the node, it is still running +// RHCOS 8, so the keys are not being written to the new location since the +// location is inferred from the currently booted OS. +// 2. The node reboots into RHCOS 9 to complete the upgrade. +// 3. The "are we on the latest config" functions detect that we are indeed on +// the latest config and so it does not attempt to perform an update. +// +// To work around that check on bootup if the we should use the new SSH key +// path and if the old SSH key path exists, we know that we need to migrate tot +// he new key path by calling dn.updateSSHKeyLocation(). +func (dn *Daemon) isSSHKeyLocationUpdateRequired() (bool, error) { + if !dn.useNewSSHKeyPath() { + // Return early because we're not using the new SSH key path. + return false, nil + } + + oldKeyExists, err := fileExists(constants.RHCOS8SSHKeyPath) + if err != nil { + return false, err + } + + newKeyExists, err := fileExists(constants.RHCOS9SSHKeyPath) + if err != nil { + return false, err + } + + // If the old key exists and the new key does not, we need to update. + return oldKeyExists && !newKeyExists, nil +} + +// Decode the Ignition config and perform the SSH key update. +func (dn *Daemon) updateSSHKeyLocation(cfg *mcfgv1.MachineConfig) error { + glog.Infof("SSH key location update required. Moving SSH keys from %q to %q.", constants.RHCOS8SSHKeyPath, constants.RHCOS9SSHKeyPath) + + ignConfig, err := ctrlcommon.ParseAndConvertConfig(cfg.Spec.Config.Raw) + if err != nil { + return fmt.Errorf("ignition failure when updating SSH key location: %w", err) + } + + if err := dn.updateSSHKeys(ignConfig.Passwd.Users); err != nil { + return fmt.Errorf("could not write SSH keys to new location: %w", err) + } + + return nil +} + +// Determines if we need to update the SSH key location and performs the +// necessary update if so. +func (dn *Daemon) updateSSHKeyLocationIfNeeded(cfg *mcfgv1.MachineConfig) error { + sshKeyLocationUpdateRequired, err := dn.isSSHKeyLocationUpdateRequired() + if err != nil { + return fmt.Errorf("unable to determine if SSH key location update is required: %w", err) + } + + if !sshKeyLocationUpdateRequired { + glog.Infof("SSH key location (%q) up-to-date!", constants.RHCOS9SSHKeyPath) + return nil + } + + return dn.updateSSHKeyLocation(cfg) +} + // checkStateOnFirstRun is a core entrypoint for our state machine. // It determines whether we're in our desired state, or if we're // transitioning between states, and whether or not we need to update @@ -1651,6 +1716,13 @@ func (dn *Daemon) checkStateOnFirstRun() error { return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig) } + // When upgrading the OS, it is possible that the SSH key location will + // change. We should detect whether that is the case and update before we + // check for any config drift. + if err := dn.updateSSHKeyLocationIfNeeded(expectedConfig); err != nil { + return err + } + if err := dn.validateOnDiskState(expectedConfig); err != nil { wErr := fmt.Errorf("unexpected on-disk state validating against %s: %w", expectedConfig.GetName(), err) dn.nodeWriter.Eventf(corev1.EventTypeWarning, "OnDiskStateValidationFailed", wErr.Error()) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 3a6c585186..6e65ccaee3 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -5,6 +5,7 @@ import ( "context" "errors" "fmt" + "io/fs" "os" "os/exec" "os/user" @@ -36,8 +37,6 @@ const ( defaultDirectoryPermissions os.FileMode = 0o755 // defaultFilePermissions houses the default mode to use when no file permissions are provided defaultFilePermissions os.FileMode = 0o644 - // SSH Keys for user "core" will only be written at /home/core/.ssh - coreUserSSHPath = "/home/core/.ssh/" // fipsFile is the file to check if FIPS is enabled fipsFile = "/proc/sys/crypto/fips_enabled" extensionsRepo = "/etc/yum.repos.d/coreos-extensions.repo" @@ -1168,7 +1167,7 @@ func (dn *CoreOSDaemon) switchKernel(oldConfig, newConfig *mcfgv1.MachineConfig) } // TODO: Drop this code and use https://github.com/coreos/rpm-ostree/issues/2542 instead - defaultKernel := []string{"kernel", "kernel-core", "kernel-modules", "kernel-modules-extra"} + defaultKernel := []string{"kernel", "kernel-core", "kernel-modules", "kernel-modules-core", "kernel-modules-extra"} // Note this list explicitly does *not* include kernel-rt as that is a meta-package that tries to pull in a lot // of other dependencies we don't want for historical reasons. // kernel-rt also has a split off kernel-rt-kvm subpackage because it's in a separate subscription in RHEL. @@ -1542,7 +1541,30 @@ func (dn *Daemon) writeFiles(files []ign3types.File) error { return writeFiles(files) } -func (dn *Daemon) atomicallyWriteSSHKey(keys string) error { +// Ensures that both the SSH root directory (/home/core/.ssh) as well as any +// subdirectories are created with the correct (0700) permissions. +func createSSHKeyDir(authKeyDir string) error { + glog.Infof("Creating missing SSH key dir at %s", authKeyDir) + + mkdir := func(dir string) error { + return exec.Command("runuser", "-u", constants.CoreUserName, "--", "mkdir", "-m", "0700", "-p", dir).Run() + } + + // Create the root SSH key directory (/home/core/.ssh) first. + if err := mkdir(filepath.Dir(constants.RHCOS8SSHKeyPath)); err != nil { + return err + } + + // For RHCOS 8, creating /home/core/.ssh is all that is needed. + if authKeyDir == constants.RHCOS8SSHKeyPath { + return nil + } + + // Create the next level of the SSH key directory (/home/core/.ssh/authorized_keys.d) for RHCOS 9 cases. + return mkdir(filepath.Dir(constants.RHCOS9SSHKeyPath)) +} + +func (dn *Daemon) atomicallyWriteSSHKey(authKeyPath, keys string) error { uid, err := lookupUID(constants.CoreUserName) if err != nil { return err @@ -1553,24 +1575,15 @@ func (dn *Daemon) atomicallyWriteSSHKey(keys string) error { return err } - var authKeyPath string - if dn.os.IsEL9() || dn.os.IsFCOS() { - // In FCOS and EL9, ignition writes the SSH key to ~/.ssh/authorized_keys.d/ignition, - // and the ssh-key-dir sshd AuthorizedKeysCommand binary reads it from there. - authKeyPath = filepath.Join(coreUserSSHPath, "authorized_keys.d", "ignition") - } else { - authKeyPath = filepath.Join(coreUserSSHPath, "authorized_keys") - } - // Keys should only be written to "/home/core/.ssh" // Once Users are supported fully this should be writing to PasswdUser.HomeDir - glog.Infof("Writing SSHKeys at %q", authKeyPath) + glog.Infof("Writing SSH keys to %q", authKeyPath) - // Creating coreUserSSHPath in advance if it doesn't exist in order to ensure it is owned by core user + // Creating CoreUserSSHPath in advance if it doesn't exist in order to ensure it is owned by core user // See https://bugzilla.redhat.com/show_bug.cgi?id=2107113 - if _, err := os.Stat(coreUserSSHPath); os.IsNotExist(err) { - mkdirCoreCommand := exec.Command("runuser", "-u", constants.CoreUserName, "--", "mkdir", "-m", "0700", "-p", coreUserSSHPath) - if err := mkdirCoreCommand.Run(); err != nil { + authKeyDir := filepath.Dir(authKeyPath) + if _, err := os.Stat(authKeyDir); os.IsNotExist(err) { + if err := createSSHKeyDir(authKeyDir); err != nil { return err } } @@ -1579,7 +1592,7 @@ func (dn *Daemon) atomicallyWriteSSHKey(keys string) error { return err } - glog.V(2).Infof("Wrote SSHKeys at %s", authKeyPath) + glog.V(2).Infof("Wrote SSH keys to %q", authKeyPath) return nil } @@ -1617,6 +1630,13 @@ func (dn *Daemon) SetPasswordHash(newUsers []ign3types.PasswdUser) error { return nil } +// Determines if we should use the new SSH key path +// (/home/core/.ssh/authorized_keys.d/ignition) or the old SSH key path +// (/home/core/.ssh/authorized_keys) +func (dn *Daemon) useNewSSHKeyPath() bool { + return dn.os.IsEL9() || dn.os.IsFCOS() || dn.os.IsSCOS() +} + // Update a given PasswdUser's SSHKey func (dn *Daemon) updateSSHKeys(newUsers []ign3types.PasswdUser) error { if len(newUsers) == 0 { @@ -1642,50 +1662,93 @@ func (dn *Daemon) updateSSHKeys(newUsers []ign3types.PasswdUser) error { concatSSHKeys = concatSSHKeys + string(k) + "\n" } } + + authKeyPath := constants.RHCOS8SSHKeyPath + if !dn.mock { - authKeyPath := filepath.Join(coreUserSSHPath, "authorized_keys") - authKeyFragmentDirPath := filepath.Join(coreUserSSHPath, "authorized_keys.d") - - if dn.os.IsFCOS() { - // In older versions of OKD, the keys were written to `/home/core/.ssh/authorized_keys`. - // Newer versions of OKD will however expect the keys at `/home/core/.ssh/authorized_keys.d/ignition`. - // Check if the authorized_keys file at the legacy path exists. If it does, remove it. - // It will be recreated at the new fragment path by the atomicallyWriteSSHKey function - // that is called right after. - _, err := os.Stat(authKeyPath) - if err == nil { - err := os.RemoveAll(authKeyPath) - if err != nil { - return fmt.Errorf("failed to remove path '%s': %w", authKeyPath, err) - } - } else if !os.IsNotExist(err) { - // This shouldn't ever happen - return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", authKeyPath, err) + // In RHCOS 8.6 or lower, the keys were written to `/home/core/.ssh/authorized_keys`. + // RHCOS 9.0+, FCOS, and SCOS will however expect the keys at `/home/core/.ssh/authorized_keys.d/ignition`. + // Check if the authorized_keys file at the legacy path exists. If it does, remove it. + // It will be recreated at the new fragment path by the atomicallyWriteSSHKey function + // that is called right after. + if dn.useNewSSHKeyPath() { + authKeyPath = constants.RHCOS9SSHKeyPath + + if err := cleanSSHKeyPaths(); err != nil { + return err } - // Ensure authorized_keys.d/ignition is the only fragment that exists - keyFragmentsDir, err := ctrlcommon.ReadDir(authKeyFragmentDirPath) - if err == nil { - for _, fragment := range keyFragmentsDir { - if fragment.Name() != "ignition" { - keyPath := filepath.Join(authKeyFragmentDirPath, fragment.Name()) - err := os.RemoveAll(keyPath) - if err != nil { - return fmt.Errorf("failed to remove path '%s': %w", keyPath, err) - } - } - } - } else if !os.IsNotExist(err) { - // This shouldn't ever happen - return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", authKeyFragmentDirPath, err) + if err := removeNonIgnitionKeyPathFragments(); err != nil { + return err } } // Note we write keys only for the core user and so this ignores the user list - if err := dn.atomicallyWriteSSHKey(concatSSHKeys); err != nil { - return err + return dn.atomicallyWriteSSHKey(authKeyPath, concatSSHKeys) + } + + return nil +} + +// Determines if a file exists by checking for the presence or lack thereof of +// an error when stat'ing the file. Returns any other error. +func fileExists(path string) (bool, error) { + _, err := os.Stat(path) + // If there is no error, the file definitely exists. + if err == nil { + return true, nil + } + + // If the error matches fs.ErrNotExist, the file definitely does not exist. + if errors.Is(err, fs.ErrNotExist) { + return false, nil + } + + // An unexpected error occurred. + return false, fmt.Errorf("cannot stat file: %w", err) +} + +// Removes the old SSH key path (/home/core/.ssh/authorized_keys), if found. +func cleanSSHKeyPaths() error { + oldKeyExists, err := fileExists(constants.RHCOS8SSHKeyPath) + if err != nil { + return err + } + + if !oldKeyExists { + return nil + } + + if err := os.RemoveAll(constants.RHCOS8SSHKeyPath); err != nil { + return fmt.Errorf("failed to remove path '%s': %w", constants.RHCOS8SSHKeyPath, err) + } + + return nil +} + +// Ensures authorized_keys.d/ignition is the only fragment that exists within the /home/core/.ssh dir. +func removeNonIgnitionKeyPathFragments() error { + // /home/core/.ssh/authorized_keys.d + authKeyFragmentDirPath := filepath.Dir(constants.RHCOS9SSHKeyPath) + // ignition + authKeyFragmentBasename := filepath.Base(constants.RHCOS9SSHKeyPath) + + keyFragmentsDir, err := ctrlcommon.ReadDir(authKeyFragmentDirPath) + if err == nil { + for _, fragment := range keyFragmentsDir { + if fragment.Name() != authKeyFragmentBasename { + keyPath := filepath.Join(authKeyFragmentDirPath, fragment.Name()) + err := os.RemoveAll(keyPath) + if err != nil { + return fmt.Errorf("failed to remove path '%s': %w", keyPath, err) + } + } } + } else if !errors.Is(err, fs.ErrNotExist) { + // This shouldn't ever happen + return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", constants.RHCOS9SSHKeyPath, err) } + return nil } diff --git a/pkg/daemon/update_test.go b/pkg/daemon/update_test.go index 1524127d70..3a88d52316 100644 --- a/pkg/daemon/update_test.go +++ b/pkg/daemon/update_test.go @@ -493,6 +493,9 @@ func TestWriteFiles(t *testing.T) { } } +// This test provides a false sense of security. Given the combination of the +// mock mode in the MCD coupled with the inputs into this test, it effectively +// no-ops and does not test what we think it tests. func TestUpdateSSHKeys(t *testing.T) { d := newMockDaemon() diff --git a/templates/master/00-master/azure/files/opt-libexec-openshift-azure-routes-sh.yaml b/templates/master/00-master/azure/files/opt-libexec-openshift-azure-routes-sh.yaml index 75ec6a8925..c204690069 100644 --- a/templates/master/00-master/azure/files/opt-libexec-openshift-azure-routes-sh.yaml +++ b/templates/master/00-master/azure/files/opt-libexec-openshift-azure-routes-sh.yaml @@ -174,6 +174,12 @@ contents: remove_stale add_rules echo "done applying vip rules" + # Arbitrary delay to avoid synchronizing too quickly on file changes; the VIP + # file could change multiple times quickly, but we don't need to react instantly + # to every change. Most crucially we want to be sure we don't trip over the + # default systemd StartLimitBurst/StartLimitIterval settings which are 5 and 10s + # respectively. + sleep 3 ;; cleanup) clear_rules diff --git a/test/e2e-single-node/sno_mcd_test.go b/test/e2e-single-node/sno_mcd_test.go index f3e5fc12b7..a86ad1716d 100644 --- a/test/e2e-single-node/sno_mcd_test.go +++ b/test/e2e-single-node/sno_mcd_test.go @@ -3,6 +3,7 @@ package e2e_sno_test import ( "context" "fmt" + "path/filepath" "strconv" "strings" "testing" @@ -246,6 +247,7 @@ func TestExtensions(t *testing.T) { t.Logf("Node %s has successfully rolled back", node.Name) } +// TODO: Can this test be moved to e2e-shared since it is very similar to the one in e2e/mcd_test.go? func TestNoReboot(t *testing.T) { cs := framework.NewClientSet("") @@ -259,14 +261,34 @@ func TestNoReboot(t *testing.T) { oldTime := strings.Split(output, " ")[0] t.Logf("Node %s initial uptime: %s", node.Name, oldTime) + sshKeyContent := "test adding authorized key without node reboot" + + nodeOS := helpers.GetOSReleaseForNode(t, cs, node).OS + + sshPaths := helpers.GetSSHPaths(nodeOS) + + t.Logf("Expecting SSH keys to be in %s", sshPaths.Expected) + + if sshPaths.Expected == constants.RHCOS9SSHKeyPath { + // Write an SSH key to the old location on the node because the update process should remove this file. + t.Logf("Writing SSH key to %s to ensure that it will be removed later", sshPaths.NotExpected) + bashCmd := fmt.Sprintf("printf '%s' > %s", sshKeyContent, filepath.Join("/rootfs", sshPaths.NotExpected)) + helpers.ExecCmdOnNode(t, cs, node, "/bin/bash", "-c", bashCmd) + } + + // Delete the expected SSH keys directory to ensure that the directories are + // (re)created correctly by the MCD. This targets the upgrade case where that + // directory may not previously exist. + helpers.ExecCmdOnNode(t, cs, node, "rm", "-rf", filepath.Join("/rootfs", filepath.Dir(sshPaths.Expected))) + // Adding authorized key for user core testIgnConfig := ctrlcommon.NewIgnConfig() - testSSHKey := ign3types.PasswdUser{Name: "core", SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{"test adding authorized key without node reboot"}} + testSSHKey := ign3types.PasswdUser{Name: "core", SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{ign3types.SSHAuthorizedKey(sshKeyContent)}} testIgnConfig.Passwd.Users = append(testIgnConfig.Passwd.Users, testSSHKey) addAuthorizedKey := &mcfgv1.MachineConfig{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("authorzied-key-%s", uuid.NewUUID()), + Name: fmt.Sprintf("authorized-key-%s", uuid.NewUUID()), Labels: helpers.MCLabelForRole("master"), }, Spec: mcfgv1.MachineConfigSpec{ @@ -292,8 +314,13 @@ func TestNoReboot(t *testing.T) { assert.Equal(t, node.Annotations[constants.CurrentMachineConfigAnnotationKey], renderedConfig) assert.Equal(t, node.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSHKey := helpers.ExecCmdOnNode(t, cs, node, "cat", "/rootfs/home/core/.ssh/authorized_keys") - if !strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + t.Logf("Expecting SSH keys to be in %s", sshPaths.Expected) + + helpers.AssertFileOnNode(t, cs, node, sshPaths.Expected) + helpers.AssertFileNotOnNode(t, cs, node, sshPaths.NotExpected) + + foundSSHKey := helpers.ExecCmdOnNode(t, cs, node, "cat", filepath.Join("/rootfs", sshPaths.Expected)) + if !strings.Contains(foundSSHKey, sshKeyContent) { t.Fatalf("updated ssh keys not found in authorized_keys, got %s", foundSSHKey) } t.Logf("Node %s has SSH key", node.Name) @@ -332,11 +359,14 @@ func TestNoReboot(t *testing.T) { assert.Equal(t, node.Annotations[constants.CurrentMachineConfigAnnotationKey], oldMasterRenderedConfig) assert.Equal(t, node.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSHKey = helpers.ExecCmdOnNode(t, cs, node, "cat", "/rootfs/home/core/.ssh/authorized_keys") - if strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + foundSSHKey = helpers.ExecCmdOnNode(t, cs, node, "cat", filepath.Join("/rootfs", sshPaths.Expected)) + if strings.Contains(foundSSHKey, sshKeyContent) { t.Fatalf("Node %s did not rollback successfully", node.Name) } + helpers.AssertFileOnNode(t, cs, node, sshPaths.Expected) + helpers.AssertFileNotOnNode(t, cs, node, sshPaths.NotExpected) + t.Logf("Node %s has successfully rolled back", node.Name) // Ensure that node didn't reboot during rollback diff --git a/test/e2e/mcd_test.go b/test/e2e/mcd_test.go index 613f0b9243..b54ce39be2 100644 --- a/test/e2e/mcd_test.go +++ b/test/e2e/mcd_test.go @@ -3,6 +3,7 @@ package e2e_test import ( "context" "fmt" + "path/filepath" "strconv" "strings" "testing" @@ -382,6 +383,28 @@ func TestNoReboot(t *testing.T) { oldInfraRenderedConfig := helpers.GetMcName(t, cs, "infra") infraNode := helpers.GetSingleNodeByRole(t, cs, "infra") + + sshKeyContent := "test adding authorized key without node reboot" + + nodeOS := helpers.GetOSReleaseForNode(t, cs, infraNode).OS + + sshPaths := helpers.GetSSHPaths(nodeOS) + + t.Logf("Expecting SSH keys to be in %s", sshPaths.Expected) + + if sshPaths.Expected == constants.RHCOS9SSHKeyPath { + // Write an SSH key to the old location on the node because the update process should remove this file. + t.Logf("Writing SSH key to %s to ensure that it will be removed later", sshPaths.NotExpected) + bashCmd := fmt.Sprintf("printf '%s' > %s", sshKeyContent, filepath.Join("/rootfs", sshPaths.NotExpected)) + helpers.ExecCmdOnNode(t, cs, infraNode, "/bin/bash", "-c", bashCmd) + } + + // Delete the expected SSH keys directory to ensure that the directories are + // (re)created correctly by the MCD. This targets the upgrade case where that + // directory may not previously exist. Note: This will need to be revisited + // once Config Drift Monitor is aware of SSH keys. + helpers.ExecCmdOnNode(t, cs, infraNode, "rm", "-rf", filepath.Join("/rootfs", filepath.Dir(sshPaths.Expected))) + output := helpers.ExecCmdOnNode(t, cs, infraNode, "cat", "/rootfs/proc/uptime") oldTime := strings.Split(output, " ")[0] t.Logf("Node %s initial uptime: %s", infraNode.Name, oldTime) @@ -390,17 +413,18 @@ func TestNoReboot(t *testing.T) { // Adding authorized key for user core testIgnConfig := ctrlcommon.NewIgnConfig() testPasswdHash := "testpass" - testSSHKey := ign3types.PasswdUser{ - Name: "core", - SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{"test adding authorized key without node reboot"}, - PasswordHash: &testPasswdHash, - } - testIgnConfig.Passwd.Users = append(testIgnConfig.Passwd.Users, testSSHKey) + testIgnConfig.Passwd.Users = []ign3types.PasswdUser{ + { + Name: "core", + SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{ign3types.SSHAuthorizedKey(sshKeyContent)}, + PasswordHash: &testPasswdHash, + }, + } addAuthorizedKey := &mcfgv1.MachineConfig{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("authorzied-key-infra-%s", uuid.NewUUID()), + Name: fmt.Sprintf("authorized-key-infra-%s", uuid.NewUUID()), Labels: helpers.MCLabelForRole("infra"), }, Spec: mcfgv1.MachineConfigSpec{ @@ -426,21 +450,31 @@ func TestNoReboot(t *testing.T) { assert.Equal(t, infraNode.Annotations[constants.CurrentMachineConfigAnnotationKey], renderedConfig) assert.Equal(t, infraNode.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSHKey := helpers.ExecCmdOnNode(t, cs, infraNode, "cat", "/rootfs/home/core/.ssh/authorized_keys") - if !strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + helpers.AssertFileOnNode(t, cs, infraNode, sshPaths.Expected) + helpers.AssertFileNotOnNode(t, cs, infraNode, sshPaths.NotExpected) + + foundSSHKey := helpers.ExecCmdOnNode(t, cs, infraNode, "cat", filepath.Join("/rootfs", sshPaths.Expected)) + if !strings.Contains(foundSSHKey, sshKeyContent) { t.Fatalf("updated ssh keys not found in authorized_keys, got %s", foundSSHKey) } t.Logf("Node %s has SSH key", infraNode.Name) + assertExpectedPerms(t, cs, infraNode, "/home/core/.ssh", []string{constants.CoreUserName, constants.CoreGroupName, "700"}) + + if sshPaths.Expected == constants.RHCOS9SSHKeyPath { + // /home/core/.ssh/authorized_keys.d + assertExpectedPerms(t, cs, infraNode, filepath.Dir(constants.RHCOS9SSHKeyPath), []string{constants.CoreUserName, constants.CoreGroupName, "700"}) + } + + assertExpectedPerms(t, cs, infraNode, sshPaths.Expected, []string{constants.CoreUserName, constants.CoreGroupName, "600"}) + currentEtcShadowContents := helpers.ExecCmdOnNode(t, cs, infraNode, "grep", "^core:", "/rootfs/etc/shadow") if currentEtcShadowContents == initialEtcShadowContents { t.Fatalf("updated password hash not found in etc/shadow, got %s", currentEtcShadowContents) } - t.Logf("Node %s has Password Hash", infraNode.Name) - usernameAndGroup := strings.Split(strings.TrimSuffix(helpers.ExecCmdOnNode(t, cs, infraNode, "chroot", "/rootfs", "stat", "--format=%U %G", "/home/core/.ssh/authorized_keys"), "\n"), " ") - assert.Equal(t, usernameAndGroup, []string{constants.CoreUserName, constants.CoreGroupName}) + t.Logf("Node %s has Password Hash", infraNode.Name) output = helpers.ExecCmdOnNode(t, cs, infraNode, "cat", "/rootfs/proc/uptime") newTime := strings.Split(output, " ")[0] @@ -476,11 +510,14 @@ func TestNoReboot(t *testing.T) { assert.Equal(t, infraNode.Annotations[constants.CurrentMachineConfigAnnotationKey], oldInfraRenderedConfig) assert.Equal(t, infraNode.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSHKey = helpers.ExecCmdOnNode(t, cs, infraNode, "cat", "/rootfs/home/core/.ssh/authorized_keys") - if strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + foundSSHKey = helpers.ExecCmdOnNode(t, cs, infraNode, "cat", filepath.Join("/rootfs", sshPaths.Expected)) + if strings.Contains(foundSSHKey, sshKeyContent) { t.Fatalf("Node %s did not rollback successfully", infraNode.Name) } + helpers.AssertFileOnNode(t, cs, infraNode, sshPaths.Expected) + helpers.AssertFileNotOnNode(t, cs, infraNode, sshPaths.NotExpected) + t.Logf("Node %s has successfully rolled back", infraNode.Name) // Ensure that node didn't reboot during rollback @@ -765,7 +802,9 @@ func TestIgn3Cfg(t *testing.T) { assert.Equal(t, infraNode.Annotations[constants.CurrentMachineConfigAnnotationKey], renderedConfig) assert.Equal(t, infraNode.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) - foundSSH := helpers.ExecCmdOnNode(t, cs, infraNode, "grep", "1234_test_ign3", "/rootfs/home/core/.ssh/authorized_keys") + sshPaths := helpers.GetSSHPaths(helpers.GetOSReleaseForNode(t, cs, infraNode).OS) + + foundSSH := helpers.ExecCmdOnNode(t, cs, infraNode, "grep", "1234_test_ign3", filepath.Join("/rootfs", sshPaths.Expected)) if !strings.Contains(foundSSH, "1234_test_ign3") { t.Fatalf("updated ssh keys not found in authorized_keys, got %s", foundSSH) } @@ -809,3 +848,12 @@ func createMCToAddFileForRole(name, role, filename, data string) *mcfgv1.Machine func createMCToAddFile(name, filename, data string) *mcfgv1.MachineConfig { return createMCToAddFileForRole(name, "worker", filename, data) } + +// Checks that a file or directory on a given node matches the expected +// permissions in the form of [username, groupname, octal file permissions]. +func assertExpectedPerms(t *testing.T, cs *framework.ClientSet, node corev1.Node, path string, expectedPerms []string) { + t.Helper() + + actualPerms := strings.Split(strings.TrimSuffix(helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "stat", "--format=%U %G %a", path), "\n"), " ") + assert.Equal(t, expectedPerms, actualPerms, "expected %s to have perms %v, got: %v", path, expectedPerms, actualPerms) +} diff --git a/test/helpers/utils.go b/test/helpers/utils.go index f749cf4ec7..0525bc46fe 100644 --- a/test/helpers/utils.go +++ b/test/helpers/utils.go @@ -31,6 +31,7 @@ import ( "k8s.io/client-go/util/retry" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" ) @@ -382,6 +383,28 @@ func CreateMCP(t *testing.T, cs *framework.ClientSet, mcpName string) func() { } } +type SSHPaths struct { + // The path where SSH keys are expected to be found. + Expected string + // The path where SSH keys are *not* expected to be found. + NotExpected string +} + +// Determines where to expect SSH keys for the core user on a given node based upon the node's OS. +func GetSSHPaths(os osrelease.OperatingSystem) SSHPaths { + if os.IsEL9() || os.IsSCOS() || os.IsFCOS() { + return SSHPaths{ + Expected: constants.RHCOS9SSHKeyPath, + NotExpected: constants.RHCOS8SSHKeyPath, + } + } + + return SSHPaths{ + Expected: constants.RHCOS8SSHKeyPath, + NotExpected: constants.RHCOS9SSHKeyPath, + } +} + // MCPNameToRole converts a mcpName to a node role label func MCPNameToRole(mcpName string) string { return fmt.Sprintf("node-role.kubernetes.io/%s", mcpName) @@ -392,23 +415,87 @@ func CreateMC(name, role string) *mcfgv1.MachineConfig { return NewMachineConfig(name, MCLabelForRole(role), "", nil) } +// Asserts that a given file is present on the underlying node. +func AssertFileOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, path string) bool { + t.Helper() + + path = canonicalizeNodeFilePath(path) + + out, err := ExecCmdOnNodeWithError(cs, node, "stat", path) + + return assert.NoError(t, err, "expected to find file %s on %s, got:\n%s\nError: %s", path, node.Name, out, err) +} + +// Asserts that a given file is *not* present on the underlying node. +func AssertFileNotOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, path string) bool { + t.Helper() + + path = canonicalizeNodeFilePath(path) + + out, err := ExecCmdOnNodeWithError(cs, node, "stat", path) + + return assert.Error(t, err, "expected not to find file %s on %s, got:\n%s", path, node.Name, out) && + assert.Contains(t, out, "No such file or directory", "expected command output to contain 'No such file or directory', got: %s", out) +} + +// Adds the /rootfs onto a given file path, if not already present. +func canonicalizeNodeFilePath(path string) string { + rootfs := "/rootfs" + + if !strings.HasPrefix(path, rootfs) { + return filepath.Join(rootfs, path) + } + + return path +} + // ExecCmdOnNode finds a node's mcd, and oc rsh's into it to execute a command on the node // all commands should use /rootfs as root func ExecCmdOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, subArgs ...string) string { + t.Helper() + + cmd, err := execCmdOnNode(cs, node, subArgs...) + require.Nil(t, err, "could not prepare to exec cmd %v on node %s: %s", subArgs, node.Name, err) + cmd.Stderr = os.Stderr + + out, err := cmd.Output() + require.Nil(t, err, "failed to exec cmd %v on node %s: %s", subArgs, node.Name, string(out)) + return string(out) +} + +// ExecCmdOnNodeWithError behaves like ExecCmdOnNode, with the exception that +// any errors are returned to the caller for inspection. This allows one to +// execute a command that is expected to fail; e.g., stat /nonexistant/file. +func ExecCmdOnNodeWithError(cs *framework.ClientSet, node corev1.Node, subArgs ...string) (string, error) { + cmd, err := execCmdOnNode(cs, node, subArgs...) + if err != nil { + return "", err + } + + out, err := cmd.CombinedOutput() + return string(out), err +} + +// ExecCmdOnNode finds a node's mcd, and oc rsh's into it to execute a command on the node +// all commands should use /rootfs as root +func execCmdOnNode(cs *framework.ClientSet, node corev1.Node, subArgs ...string) (*exec.Cmd, error) { // Check for an oc binary in $PATH. path, err := exec.LookPath("oc") if err != nil { - t.Fatalf("could not locate oc command: %s", err) + return nil, fmt.Errorf("could not locate oc command: %w", err) } // Get the kubeconfig file path kubeconfig, err := cs.GetKubeconfig() if err != nil { - t.Fatalf("could not get kubeconfig: %s", err) + return nil, fmt.Errorf("could not get kubeconfig: %w", err) } mcd, err := mcdForNode(cs, &node) - require.Nil(t, err) + if err != nil { + return nil, fmt.Errorf("could not get MCD for node %s: %w", node.Name, err) + } + mcdName := mcd.ObjectMeta.Name entryPoint := path @@ -423,23 +510,19 @@ func ExecCmdOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, subA // $KUBECONFIG, oc will be unaware of it. To remedy, we explicitly set // KUBECONFIG to the value held by the clientset. cmd.Env = append(cmd.Env, "KUBECONFIG="+kubeconfig) - cmd.Stderr = os.Stderr - - out, err := cmd.Output() - require.Nil(t, err, "failed to exec cmd %v on node %s: %s", subArgs, node.Name, string(out)) - return string(out) + return cmd, nil } -// IsOKDCluster checks whether the Upstream field on the CV spec references OKD's update server +// IsOKDCluster checks whether the Upstream field on the CV spec references an OKD release controller func IsOKDCluster(cs *framework.ClientSet) (bool, error) { cv, err := cs.ClusterVersions().Get(context.TODO(), "version", metav1.GetOptions{}) if err != nil { return false, err } - if cv.Spec.Upstream == "https://origin-release.svc.ci.openshift.org/graph" { - return true, nil - } - return false, nil + + // TODO: Adjust this as OKD becomes available for different platforms, e.g., arm64. + okdReleaseControllers := sets.NewString("https://amd64.origin.releases.ci.openshift.org/graph") + return okdReleaseControllers.Has(string(cv.Spec.Upstream)), nil } func MCLabelForRole(role string) map[string]string {