diff --git a/microcloud/test/includes/check.sh b/microcloud/test/includes/check.sh index e70bd9c9..1529e060 100644 --- a/microcloud/test/includes/check.sh +++ b/microcloud/test/includes/check.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Miscellaneous test checks. check_dependencies() { @@ -5,6 +6,9 @@ check_dependencies() { local dep missing missing="" + # XXX: make sure we don't detect lxd-installer wrapper by accident + [ -x /usr/sbin/lxc ] && chmod -x /usr/sbin/lxc /usr/sbin/lxd + for dep in "$@"; do if ! command -v "$dep" >/dev/null 2>&1; then [ "$missing" ] && missing="$missing $dep" || missing="$dep" @@ -15,6 +19,12 @@ check_dependencies() { echo "Missing dependencies: $missing" >&2 exit 1 fi + + # Instances need to be able to self-report on their state + if ! lxc info | sed -ne '/^api_extensions:/,/^[^-]/ s/^- //p' | grep -qxF "instance_ready_state"; then + echo "Missing LXD instance_ready_state extension" >&2 + exit 1 + fi } check_empty() { @@ -24,3 +34,24 @@ check_empty() { false fi } + +check_snap_channels() { + if [ "${LXD_SNAP_CHANNEL}" != "5.21/edge" ]; then + echo "::warning::lxd channel not set to 5.21/edge, continuing anyway" + fi + + non_edge="" + if [ "${MICROCEPH_SNAP_CHANNEL}" != "latest/edge" ]; then + non_edge="${non_edge} microceph" + fi + if [ "${MICROCLOUD_SNAP_CHANNEL}" != "latest/edge" ]; then + non_edge="${non_edge} microcloud" + fi + if [ "${MICROOVN_SNAP_CHANNEL}" != "latest/edge" ]; then + non_edge="${non_edge} microovn" + fi + + for snap in ${non_edge}; do + echo "::warning::${snap} channel not set to latest/edge, continuing anyway" + done +} diff --git a/microcloud/test/includes/microcloud.sh b/microcloud/test/includes/microcloud.sh index 13bf02cd..f2e2c46d 100644 --- a/microcloud/test/includes/microcloud.sh +++ b/microcloud/test/includes/microcloud.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # unset_interactive_vars: Unsets all variables related to the test console. unset_interactive_vars() { unset LOOKUP_IFACE LIMIT_SUBNET SKIP_SERVICE EXPECT_PEERS \ @@ -93,24 +95,58 @@ fi echo "${setup}" | sed -e '/^\s*#/d' -e '/^\s*$/d' } +set_debug_binaries() { + name="${1}" + + if [ -n "${MICROOVN_SNAP_PATH}" ]; then + echo "==> Add local build of MicroOVN snap." + lxc file push "${MICROOVN_SNAP_PATH}" "${name}/root/microovn.snap" + lxc exec "${name}" -- snap install --dangerous "/root/microovn.snap" + fi + + if [ -n "${MICROCEPH_SNAP_PATH}" ]; then + echo "==> Add local build of MicroCeph snap." + lxc file push "${MICROCEPH_SNAP_PATH}" "${name}/root/microceph.snap" + lxc exec "${name}" -- snap install --dangerous "/root/microceph.snap" + fi + + if [ -n "${MICROCLOUD_DEBUG_PATH}" ] && [ -n "${MICROCLOUDD_DEBUG_PATH}" ]; then + echo "==> Add debug binaries for MicroCloud." + lxc exec "${name}" -- rm -f /var/snap/microcloud/common/microcloudd.debug + lxc exec "${name}" -- rm -f /var/snap/microcloud/common/microcloud.debug + + lxc file push --quiet "${MICROCLOUDD_DEBUG_PATH}" "${name}"/var/snap/microcloud/common/microcloudd.debug --mode 0755 + lxc file push --quiet "${MICROCLOUD_DEBUG_PATH}" "${name}"/var/snap/microcloud/common/microcloud.debug --mode 0755 + + lxc exec "${name}" -- systemctl restart snap.microcloud.daemon || true + fi + + if [ -n "${LXD_DEBUG_PATH}" ]; then + echo "==> Add a debug binary for LXD." + lxc exec "${name}" -- rm -f /var/snap/lxd/common/lxd.debug + lxc file push --quiet "${LXD_DEBUG_PATH}" "${name}"/var/snap/lxd/common/lxd.debug + lxc exec "${name}" -- systemctl reload snap.lxd.daemon || true + lxc exec "${name}" -- lxd waitready + fi +} + # set_remote: Adds and switches to the remote for the MicroCloud node with the given name. set_remote() { remote="${1}" name="${2}" + client="${3}" lxc remote switch local - addr="$(lxc exec "${name}" -- lxc config get cluster.https_address)" - if lxc remote list -f csv | cut -d',' -f1 | grep -qwF "${remote}" ; then lxc remote remove "${remote}" fi - lxc exec "${name}" -- lxc config set core.trust_password test + token="$(lxc exec "${name}" -- lxc config trust add --name "${client}" --quiet)" # Suppress the confirmation as it's noisy. - lxc remote add "${remote}" "https://${addr}" --password "test" --accept-certificate > /dev/null 2>&1 + lxc remote add "${remote}" "${token}" > /dev/null 2>&1 lxc remote switch "${remote}" } @@ -144,54 +180,54 @@ validate_system_microceph() { # validate_system_microovn: Ensures the node with the given name has correctly set up MicroOVN with the given resources. validate_system_microovn() { name=${1} + shift 1 echo "==> ${name} Validating MicroOVN" lxc remote switch local - lxc exec "${name}" -- sh -ceu "microovn cluster list | grep -q ${name}" -} + lxc exec "${name}" -- microovn cluster list | grep -q "${name}" +} # validate_system_lxd_zfs: Ensures the node with the given name has the given disk set up for ZFS storage. validate_system_lxd_zfs() { name=${1} local_disk=${2:-} echo " ${name} Validating ZFS storage" - lxc config get "storage.backups_volume" --target "${name}" | grep -q '^local/backups$' - lxc config get "storage.images_volume" --target "${name}" | grep -q '^local/images$' - - cfg=$(lxc storage show local) - echo "${cfg}" | grep -q "config: {}" - echo "${cfg}" | grep -q "status: Created" - - cfg=$(lxc storage show local --target "${name}") - echo "${cfg}" | grep -q "source: local" - echo "${cfg}" | grep -q "volatile.initial_source: .*${local_disk}" - echo "${cfg}" | grep -q "zfs.pool_name: local" - echo "${cfg}" | grep -q "driver: zfs" - echo "${cfg}" | grep -q "status: Created" - echo "${cfg}" | grep -q "/1.0/storage-pools/local/volumes/custom/backups?target=${name}" - echo "${cfg}" | grep -q "/1.0/storage-pools/local/volumes/custom/images?target=${name}" + [ "$(lxc config get storage.backups_volume --target "${name}")" = "local/backups" ] + [ "$(lxc config get storage.images_volume --target "${name}")" = "local/images" ] + + cfg="$(lxc storage show local)" + grep -q "config: {}" <<< "${cfg}" + grep -q "status: Created" <<< "${cfg}" + + cfg="$(lxc storage show local --target "${name}")" + grep -q "source: local" <<< "${cfg}" + grep -q "volatile.initial_source: .*${local_disk}" <<< "${cfg}" + grep -q "zfs.pool_name: local" <<< "${cfg}" + grep -q "driver: zfs" <<< "${cfg}" + grep -q "status: Created" <<< "${cfg}" + grep -q "/1.0/storage-pools/local/volumes/custom/backups?target=${name}" <<< "${cfg}" + grep -q "/1.0/storage-pools/local/volumes/custom/images?target=${name}" <<< "${cfg}" } # validate_system_lxd_ceph: Ensures the node with the given name has ceph storage set up. validate_system_lxd_ceph() { name=${1} - remote_disks=${2:-0} echo " ${name} Validating Ceph storage" - cfg=$(lxc storage show remote) - echo "${cfg}" | grep -q "ceph.cluster_name: ceph" - echo "${cfg}" | grep -q "ceph.osd.pg_num: \"32\"" - echo "${cfg}" | grep -q "ceph.osd.pool_name: lxd_remote" - echo "${cfg}" | grep -q "ceph.rbd.du: \"false\"" - echo "${cfg}" | grep -q "ceph.rbd.features: layering,striping,exclusive-lock,object-map,fast-diff,deep-flatten" - echo "${cfg}" | grep -q "ceph.user.name: admin" - echo "${cfg}" | grep -q "volatile.pool.pristine: \"true\"" - echo "${cfg}" | grep -q "status: Created" - echo "${cfg}" | grep -q "driver: ceph" - - cfg=$(lxc storage show remote --target "${name}") - echo "${cfg}" | grep -q "source: lxd_remote" - echo "${cfg}" | grep -q "status: Created" + cfg="$(lxc storage show remote)" + grep -q "ceph.cluster_name: ceph" <<< "${cfg}" + grep -q "ceph.osd.pg_num: \"32\"" <<< "${cfg}" + grep -q "ceph.osd.pool_name: lxd_remote" <<< "${cfg}" + grep -q "ceph.rbd.du: \"false\"" <<< "${cfg}" + grep -q "ceph.rbd.features: layering,striping,exclusive-lock,object-map,fast-diff,deep-flatten" <<< "${cfg}" + grep -q "ceph.user.name: admin" <<< "${cfg}" + grep -q "volatile.pool.pristine: \"true\"" <<< "${cfg}" + grep -q "status: Created" <<< "${cfg}" + grep -q "driver: ceph" <<< "${cfg}" + + cfg="$(lxc storage show remote --target "${name}")" + grep -q "source: lxd_remote" <<< "${cfg}" + grep -q "status: Created" <<< "${cfg}" } # validate_system_lxd_ovn: Ensures the node with the given name and config has ovn network set up correctly. @@ -204,51 +240,50 @@ validate_system_lxd_ovn() { ipv6_gateway=${6:-} echo " ${name} Validating OVN network" - addr=$(lxc exec local:"${name}" -- lxc config get cluster.https_address) num_conns=3 if [ "${num_peers}" -lt "${num_conns}" ]; then num_conns="${num_peers}" fi - lxc config get "network.ovn.northbound_connection" --target "${name}" | sed -e 's/,/\n/g' | wc -l | grep -q "${num_conns}" + [ "$(lxc config get network.ovn.northbound_connection --target "${name}" | sed -e 's/,/\n/g' | wc -l)" = "${num_conns}" ] # Make sure there's no empty addresses. - ! lxc config get "network.ovn.northbound_connection" --target "${name}" | sed -e 's/,/\n/g' | grep -q '^ssl:$' || false - ! lxc config get "network.ovn.northbound_connection" --target "${name}" | sed -e 's/,/\n/g' | grep -q '^ssl::' || false + ! lxc config get network.ovn.northbound_connection --target "${name}" | sed -e 's/,/\n/g' | grep -q '^ssl:$' || false + ! lxc config get network.ovn.northbound_connection --target "${name}" | sed -e 's/,/\n/g' | grep -q '^ssl::' || false - cfg=$(lxc network show UPLINK) - echo "${cfg}" | grep -q "status: Created" - echo "${cfg}" | grep -q "type: physical" + cfg="$(lxc network show UPLINK)" + grep -q "status: Created" <<< "${cfg}" + grep -q "type: physical" <<< "${cfg}" if [ -n "${ipv4_gateway}" ] ; then - echo "${cfg}" | grep -q "ipv4.gateway: ${ipv4_gateway}" + grep -qF "ipv4.gateway: ${ipv4_gateway}" <<< "${cfg}" fi if [ -n "${ipv4_ranges}" ] ; then - echo "${cfg}" | grep -q "ipv4.ovn.ranges: ${ipv4_ranges}" + grep -qF "ipv4.ovn.ranges: ${ipv4_ranges}" <<< "${cfg}" fi if [ -n "${ipv6_gateway}" ] ; then - echo "${cfg}" | grep -q "ipv6.gateway: ${ipv6_gateway}" + grep -qF "ipv6.gateway: ${ipv6_gateway}" <<< "${cfg}" fi - lxc network show UPLINK --target "${name}" | grep -q "parent: ${ovn_interface}" + lxc network show UPLINK --target "${name}" | grep -qF "parent: ${ovn_interface}" - cfg=$(lxc network show default) - echo "${cfg}" | grep -q "status: Created" - echo "${cfg}" | grep -q "type: ovn" - echo "${cfg}" | grep -q 'network: UPLINK' + cfg="$(lxc network show default)" + grep -q "status: Created" <<< "${cfg}" + grep -q "type: ovn" <<< "${cfg}" + grep -q "network: UPLINK" <<< "${cfg}" } # validate_system_lxd_fan: Ensures the node with the given name has the Ubuntu FAN network set up correctly. validate_system_lxd_fan() { name=${1} echo " ${name} Validating FAN network" - cfg=$(lxc network show lxdfan0) - echo "${cfg}" | grep -q "status: Created" - echo "${cfg}" | grep -q "type: bridge" - echo "${cfg}" | grep -q 'bridge.mode: fan' + cfg="$(lxc network show lxdfan0)" + grep -q "status: Created" <<< "${cfg}" + grep -q "type: bridge" <<< "${cfg}" + grep -q "bridge.mode: fan" <<< "${cfg}" } # validate_system_lxd: Ensures the node with the given name has correctly set up LXD with the given resources. @@ -269,15 +304,19 @@ validate_system_lxd() { lxc remote switch local - # Call lxc list once to supress the welcome message. - lxc exec "${name}" -- lxc list > /dev/null 2>&1 - - # Add the peer as a remote. - set_remote microcloud-test "${name}" + # Add the peer as a remote using the name test for the trust. + set_remote microcloud-test "${name}" test # Ensure we are clustered and online. - lxc cluster list -f csv | sed -e 's/,\?database-leader,\?//' | cut -d',' -f1,7 | grep -q "${name}" - lxc cluster list -f csv | wc -l | grep -q "${num_peers}" + lxc cluster list -f csv | sed -e 's/,\?database-leader,\?//' | cut -d',' -f1,7 | grep -qxF "${name},ONLINE" + [ "$(lxc cluster list -f csv | wc -l)" = "${num_peers}" ] + + # Check core config options + lxd_address="$(lxc config get core.https_address)" + # There was a bug in MicroCloud 1 that set different addresses. + # See https://github.com/canonical/microcloud/issues/214 + system_address="$(lxc ls local:"${name}" -f json -c4 | jq -r '.[0].state.network.enp5s0.addresses[] | select(.family == "inet") | .address')" + [ "${lxd_address}" = "${system_address}:8443" ] || [ "${lxd_address}" = "[::]:8443" ] has_microovn=0 has_microceph=0 @@ -299,7 +338,7 @@ validate_system_lxd() { fi if [ "${has_microceph}" = 1 ] && [ "${remote_disks}" -gt 0 ] ; then - validate_system_lxd_ceph "${name}" "${remote_disks}" + validate_system_lxd_ceph "${name}" fi echo " ${name} Validating Profiles" @@ -308,7 +347,7 @@ validate_system_lxd() { elif [ -n "${local_disk}" ] ; then lxc profile device get default root pool | grep -q "local" else - ! lxc profile device list default | grep -q root || false + ! lxc profile device list default | grep -q "root" || false fi if [ "${has_microovn}" = 1 ] && [ -n "${ovn_interface}" ] ; then @@ -318,6 +357,12 @@ validate_system_lxd() { fi lxc remote switch local + + # Remove the trust on the remote which was added when adding the remote. + fingerprint="$(lxc query microcloud-test:/1.0/certificates?recursion=1 | jq -r '.[] | select(.name=="test") | .fingerprint')" + lxc config trust remove "microcloud-test:${fingerprint}" + + # Remove the remote. lxc remote remove microcloud-test echo "==> ${name} Validated LXD" @@ -337,16 +382,17 @@ reset_snaps() { # These are set to always pass in case the snaps are already disabled. echo "Disabling LXD and MicroCloud for ${name}" lxc exec "${name}" -- sh -c " - if pidof -q lxd ; then - kill -9 \$(pidof lxd) - fi + rm -f /var/snap/lxd/common/lxd.debug + rm -f /var/snap/microcloud/common/microcloudd.debug + rm -f /var/snap/microcloud/common/microcloud.debug - snap disable lxd > /dev/null 2>&1 || true - - if pidof -q microcloud ; then - kill -9 \$(pidof microcloud) - fi + for app in lxd lxd.debug microcloud microcloud.debug microcloudd microcloudd.debug ; do + if pidof -q \${app} > /dev/null; then + kill -9 \$(pidof \${app}) + fi + done + snap disable lxd > /dev/null 2>&1 || true snap disable microcloud > /dev/null 2>&1 || true systemctl stop snap.lxd.daemon snap.lxd.daemon.unix.socket > /dev/null 2>&1 || true @@ -359,13 +405,14 @@ reset_snaps() { " echo "Resetting MicroCeph for ${name}" - lxc exec "${name}" -- sh -c " - if snap list | grep -q microceph ; then + if lxc exec "${name}" -- snap list microceph > /dev/null 2>&1; then + lxc exec "${name}" -- sh -c " snap disable microceph > /dev/null 2>&1 || true # Kill any remaining processes. - if ps -e -o '%p %a' | grep -v grep | grep -qe 'ceph-' -qe 'microceph' ; then - kill -9 \$(ps -e -o '%p %a' | grep -e 'ceph-' -e 'microceph' | grep -v grep | awk '{print \$1}') || true + # Filter out the subshell too to not kill our own invocation as it shows as 'sh -c ...microceph...' in the process list. + if ps -e -o '%p %a' | grep -Ev '(grep|sh)' | grep -qe 'ceph-' -qe 'microceph' ; then + kill -9 \$(ps -e -o '%p %a' | grep -Ev '(grep|sh)' | grep -e 'ceph-' -e 'microceph' | awk '{print \$1}') || true fi # Remove modules to get rid of any kernel owned processes. @@ -379,38 +426,37 @@ reset_snaps() { # OSDs won't show up and ceph will freeze creating volumes without it, so make it here. mkdir -p /var/snap/microceph/current/run snap run --shell microceph -c 'snapctl restart microceph.osd' || true - fi - " + " + fi echo "Resetting MicroOVN for ${name}" - lxc exec "${name}" -- sh -c " - if snap list | grep -q microovn ; then + if lxc exec "${name}" -- snap list microovn > /dev/null 2>&1; then + lxc exec "${name}" -- sh -c " microovn.ovn-appctl exit || true microovn.ovs-appctl exit --cleanup || true microovn.ovs-dpctl del-dp system@ovs-system || true snap disable microovn > /dev/null 2>&1 || true # Kill any remaining processes. - if ps -e -o '%p %a' | grep -v grep | grep -qe 'ovs-' -qe 'ovn-' -qe 'microovn' ; then - kill -9 \$(ps -e -o '%p %a' | grep -e 'ovs-' -e 'ovn-' -e 'microovn' | grep -v grep | awk '{print \$1}') || true + # Filter out the subshell too to not kill our own invocation as it shows as 'sh -c ...microovn...' in the process list. + if ps -e -o '%p %a' | grep -Ev '(grep|sh)' | grep -qe 'ovs-' -qe 'ovn-' -qe 'microovn' ; then + kill -9 \$(ps -e -o '%p %a' | grep -Ev '(grep|sh)' | grep -e 'ovs-' -e 'ovn-' -e 'microovn' | awk '{print \$1}') || true fi # Wipe the snap state so we can start fresh. rm -rf /var/snap/microovn/*/* snap enable microovn > /dev/null 2>&1 || true - fi - " + " + fi echo "Enabling LXD and MicroCloud for ${name}" - lxc exec "${name}" -- sh -c " - snap enable lxd > /dev/null 2>&1 || true - snap enable microcloud > /dev/null 2>&1 || true - snap start lxd > /dev/null 2>&1 || true - snap start microcloud > /dev/null 2>&1 || true - snap refresh lxd --channel latest/stable --cohort=+ + lxc exec "${name}" -- snap enable lxd > /dev/null 2>&1 || true + lxc exec "${name}" -- snap enable microcloud > /dev/null 2>&1 || true + lxc exec "${name}" -- snap start lxd > /dev/null 2>&1 || true + lxc exec "${name}" -- snap start microcloud > /dev/null 2>&1 || true + lxc exec "${name}" -- lxd waitready - lxd waitready - " + set_debug_binaries "${name}" ) } @@ -418,7 +464,8 @@ reset_snaps() { # Makes only `num_disks` and `num_ifaces` disks and interfaces available for the next test. reset_system() { if [ "${SNAPSHOT_RESTORE}" = 1 ]; then - restore_system "${*}" + # shellcheck disable=SC2048,SC2086 + restore_system ${*} return fi @@ -435,10 +482,16 @@ reset_system() { lxc start "${name}" || true - lxc file push "${MICROCLOUD_SNAP_PATH}" "${name}"/root/microcloud.snap + if [ -n "${MICROCLOUD_SNAP_PATH}" ]; then + lxc file push --quiet "${MICROCLOUD_SNAP_PATH}" "${name}"/root/microcloud.snap + fi lxc exec "${name}" -- ip link del lxdfan0 || true + # Resync the time in case it got out of sync with the other VMs. + lxc exec "${name}" -- timedatectl set-ntp off + lxc exec "${name}" -- timedatectl set-ntp on + # Rescan for any disks we hid from the previous run. lxc exec "${name}" -- sh -c " for h in /sys/class/scsi_host/host*; do @@ -448,6 +501,8 @@ reset_system() { reset_snaps "${name}" + # Attempt to destroy the zpool as we may have left dangling volumes when we wiped the LXD database earlier. + # This is slightly faster than deleting the volumes by hand on each system. lxc exec "${name}" -- zpool destroy -f local || true # Hide any extra disks for this run. @@ -455,11 +510,13 @@ reset_system() { disks=\$(lsblk -o NAME,SERIAL | grep \"lxd_disk[0-9]\" | cut -d\" \" -f1 | tac) count_disks=\$(echo \"\${disks}\" | wc -l) for d in \${disks} ; do - if [ \${count_disks} -gt \${num_disks} ]; then + if [ \${count_disks} -gt ${num_disks} ]; then + echo \"Deleting /dev/\${d}\" echo 1 > /sys/block/\${d}/device/delete else - wipefs -af /dev/\${d} - dd if=/dev/zero of=/dev/\${d} bs=4096 count=100 + echo \"Wiping /dev/\${d}\" + wipefs --quiet -af /dev/\${d} + dd if=/dev/zero of=/dev/\${d} bs=4096 count=100 status=none fi count_disks=\$((count_disks - 1)) @@ -476,6 +533,7 @@ reset_system() { # Re-enable as many interfaces as we want for this run. for i in $(seq 1 "${num_ifaces}") ; do iface="enp$((i + 5))s0" + lxc exec "${name}" -- ip addr flush dev "${iface}" lxc exec "${name}" -- ip link set "${iface}" up lxc exec "${name}" -- sh -c "echo 1 > /proc/sys/net/ipv6/conf/${iface}/disable_ipv6" > /dev/null done @@ -503,11 +561,10 @@ cluster_reset() { done echo 'config: {}' | lxc profile edit default || true - lxc storage rm local || true " - lxc exec "${name}" -- sh -c " - if snap list | grep -q microceph ; then + if lxc exec "${name}" -- snap list microceph > /dev/null 2>&1; then + lxc exec "${name}" -- sh -c " # Ceph might not be responsive if we haven't set it up yet. microceph_setup=0 if timeout -k 3 3 microceph cluster list ; then @@ -518,53 +575,67 @@ cluster_reset() { microceph.ceph tell mon.\* injectargs '--mon-allow-pool-delete=true' lxc storage rm remote || true microceph.rados purge lxd_remote --yes-i-really-really-mean-it --force + microceph.ceph fs fail lxd_cephfs || true + microceph.ceph fs rm lxd_cephfs --yes-i-really-mean-it || true + microceph.rados purge lxd_cephfs_meta --yes-i-really-really-mean-it --force || true + microceph.rados purge lxd_cephfs_data --yes-i-really-really-mean-it --force || true microceph.rados purge .mgr --yes-i-really-really-mean-it --force for pool in \$(microceph.ceph osd pool ls) ; do microceph.ceph osd pool rm \${pool} \${pool} --yes-i-really-really-mean-it done - for pool in \$(microceph.ceph osd ls) ; do - microceph.ceph osd out \${pool} - microceph.ceph osd down \${pool} --definitely-dead - microceph.ceph osd purge \${pool} --yes-i-really-mean-it --force - microceph.ceph osd destroy \${pool} --yes-i-really-mean-it --force + microceph.ceph osd set noup + for osd in \$(microceph.ceph osd ls) ; do + microceph.ceph config set osd.\${osd} osd_pool_default_crush_rule \$(microceph.ceph osd crush rule dump microceph_auto_osd | jq '.rule_id') + microceph.ceph osd crush reweight osd.\${osd} 0 + microceph.ceph osd out \${osd} + microceph.ceph osd down \${osd} --definitely-dead + pkill -f \"ceph-osd .* --id \${osd}\" + microceph.ceph osd purge \${osd} --yes-i-really-mean-it --force + microceph.ceph osd destroy \${osd} --yes-i-really-mean-it --force + rm -rf /var/snap/microceph/common/data/osd/ceph-\${osd} done fi - fi - " + " + fi ) } # reset_systems: Concurrently or sequentially resets the specified number of systems. reset_systems() { + collect_go_cover_files + if [ "${SNAPSHOT_RESTORE}" = 1 ]; then - restore_systems "${*}" + # shellcheck disable=SC2048,SC2086 + restore_systems ${*} return fi + echo "::group::reset_systems" + num_vms=3 num_disks=3 num_ifaces=1 - if echo "${1}" | grep -Pq '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_vms="${1}" shift 1 fi - if echo "${1}" | grep -Pq '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_disks="${1}" shift 1 fi - if echo "${1}" | grep -Pq '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_ifaces="${1}" shift 1 fi - for i in $(seq 1 "${num_vms}") ; do - name=$(printf "micro%02d" "$i") - if [ "$i" = 1 ]; then + for i in $(seq -f "%02g" 1 "${num_vms}") ; do + name="micro${i}" + if [ "${name}" = "micro01" ]; then cluster_reset "${name}" fi @@ -577,33 +648,37 @@ reset_systems() { # Pause any extra systems. total_machines="$(lxc list -f csv -c n micro | wc -l)" - for i in $(seq "$((1 + num_vms))" "${total_machines}"); do - name=$(printf "micro%02d" "$i") + for i in $(seq -f "%02g" "$((1 + num_vms))" "${total_machines}"); do + name="micro${i}" lxc pause "${name}" || true done - if [ "${CONCURRENT_SETUP}" = 1 ]; then - wait - fi + wait + + echo "::endgroup::" } # restore_systems: Restores the systems from a snapshot at snap0. restore_systems() { + echo "::group::restore_systems" + + collect_go_cover_files + num_vms=3 num_disks=3 num_extra_ifaces=1 - if echo "${1}" | grep -Pq '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_vms=${1} shift 1 fi - if echo "${1}" | grep -Pq '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_disks=${1} shift 1 fi - if echo "${1}" | grep -Pq '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_extra_ifaces=${1} shift 1 fi @@ -628,8 +703,8 @@ restore_systems() { done ) - for n in $(seq 1 "${num_vms}") ; do - name="$(printf "micro%02d" "${n}")" + for n in $(seq -f "%02g" 1 "${num_vms}") ; do + name="micro${n}" if [ "${CONCURRENT_SETUP}" = 1 ]; then restore_system "${name}" "${num_disks}" "${num_extra_ifaces}" & else @@ -637,9 +712,9 @@ restore_systems() { fi done - if [ "${CONCURRENT_SETUP}" = 1 ]; then - wait - fi + wait + + echo "::endgroup::" } restore_system() { @@ -647,13 +722,13 @@ restore_system() { shift 1 num_disks="0" - if echo "${1}" | grep -Pq '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_disks="${1}" shift 1 fi num_extra_ifaces="0" - if echo "${1}" | grep -Pq '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_extra_ifaces="${1}" shift 1 fi @@ -670,7 +745,7 @@ restore_system() { lxc remote switch local lxc project switch microcloud-test - if lxc list "${name}" -f csv -c s | grep -qxF "RUNNING"; then + if [ "$(lxc list "${name}" -f csv -c s)" = "RUNNING" ]; then lxc stop "${name}" --force fi @@ -690,7 +765,7 @@ restore_system() { for n in $(seq 1 "${num_disks}") ; do disk="${name}-disk${n}" - lxc storage volume create zpool "${disk}" size=5GiB --type=block + lxc storage volume create zpool "${disk}" size=10GiB --type=block lxc config device add "${name}" "disk${n}" disk pool=zpool source="${disk}" done @@ -708,6 +783,7 @@ restore_system() { done ) + set_debug_binaries "${name}" echo "==> Restored ${name}" } @@ -721,7 +797,6 @@ cleanup_systems() { lxc project switch microcloud-test echo "==> Removing systems" lxc list -c n -f csv | xargs --no-run-if-empty lxc delete --force - lxc image list -c f -f csv | xargs --no-run-if-empty lxc image delete for profile in $(lxc profile list -f csv | cut -d, -f1 | grep -vxF default); do lxc profile delete "${profile}" @@ -757,13 +832,17 @@ setup_lxd_project() { fi lxc remote switch local - lxc project create microcloud-test || true - lxc project switch microcloud-test + lxc project create microcloud-test -c features.images=false || true + lxc project switch microcloud-test # Create a zfs pool so we can use fast snapshots. - lxc storage create zpool zfs volume.size=5GiB - - lxc remote list -f csv | cut -d',' -f1 | grep -qxF "ubuntu-minimal" || lxc remote add ubuntu-minimal https://cloud-images.ubuntu.com/minimal/releases/ --protocol simplestreams --auth-type none + if [ -z "${TEST_STORAGE_SOURCE:-}" ]; then + lxc storage create zpool dir volume.size=5GiB + else + sudo wipefs --all --quiet "${TEST_STORAGE_SOURCE}" + sudo blkdiscard "${TEST_STORAGE_SOURCE}" || true + lxc storage create zpool dir source="${TEST_STORAGE_SOURCE}" + fi # Setup default profile cat << EOF | lxc profile edit default @@ -784,6 +863,7 @@ EOF lxc profile set default environment.TEST_CONSOLE=1 lxc profile set default environment.DEBIAN_FRONTEND=noninteractive + lxc profile set default environment.PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin" ) } @@ -792,6 +872,8 @@ create_system() { num_disks="${2:-0}" shift 2 + os="${BASE_OS:-24.04}" + echo "==> ${name} Creating VM with ${num_disks} disks" ( set -eu @@ -800,11 +882,22 @@ create_system() { exec > /dev/null fi - lxc init ubuntu-minimal:22.04 "${name}" --vm -c limits.cpu=2 -c limits.memory=4GiB + # Pre fetch additional images to be used by the VM through security.devlxd.images=true + lxc image copy ubuntu-minimal-daily:24.04 local: + lxc image copy ubuntu-minimal-daily:22.04 local: + if [ "${SKIP_VM_LAUNCH}" != "1" ]; then + lxc image copy ubuntu-minimal-daily:24.04 local: --vm + lxc image copy ubuntu-minimal-daily:22.04 local: --vm + fi + + lxc init "ubuntu-minimal-daily:${os}" "${name}" --vm -c limits.cpu=4 -c limits.memory=4GiB -c security.devlxd.images=true + + # Disable vGPU to save RAM + lxc config set "${name}" raw.qemu.conf='[device "qemu_gpu"]' for n in $(seq 1 "${num_disks}") ; do disk="${name}-disk${n}" - lxc storage volume create zpool "${disk}" size=5GiB --type=block + lxc storage volume create zpool "${disk}" size=10GiB --type=block lxc config device add "${name}" "disk${n}" disk pool=zpool source="${disk}" done @@ -818,10 +911,6 @@ setup_system() { echo "==> ${name} Setting up" - # Bring enp6s0 up but disable IPv6 (should do through netplan). - lxc exec "${name}" -- ip link set enp6s0 up - lxc exec "${name}" -- sh -c "echo 1 > /proc/sys/net/ipv6/conf/enp6s0/disable_ipv6" > /dev/null - ( set -eu @@ -829,22 +918,52 @@ setup_system() { exec > /dev/null fi + # Disable unneeded services/timers/sockets/mounts (source of noise/slowdown) + lxc exec "${name}" -- systemctl mask --now apport.service cron.service e2scrub_reap.service esm-cache.service grub-common.service grub-initrd-fallback.service networkd-dispatcher.service polkit.service secureboot-db.service serial-getty@ttyS0.service ssh.service systemd-journal-flush.service unattended-upgrades.service + lxc exec "${name}" -- systemctl mask --now apt-daily-upgrade.timer apt-daily.timer dpkg-db-backup.timer e2scrub_all.timer fstrim.timer motd-news.timer update-notifier-download.timer update-notifier-motd.timer + lxc exec "${name}" -- systemctl mask --now iscsid.socket + lxc exec "${name}" -- systemctl mask --now dev-hugepages.mount sys-kernel-debug.mount sys-kernel-tracing.mount + + # Turn off debugfs and mitigations + echo 'GRUB_CMDLINE_LINUX_DEFAULT="quiet debugfs=off mitigations=off"' | lxc exec "${name}" -- tee /etc/default/grub.d/zz-lxd-speed.cfg + lxc exec "${name}" -- update-grub + + # Faster apt + echo "force-unsafe-io" | lxc exec "${name}" -- tee /etc/dpkg/dpkg.cfg.d/force-unsafe-io + + # Remove unneeded/unwanted packages + lxc exec "${name}" -- apt-get autopurge -y lxd-installer + # Install the snaps. lxc exec "${name}" -- apt-get update - lxc exec "${name}" -- apt-get install --no-install-recommends -y snapd curl jq zfsutils-linux htop + if [ -n "${CLOUD_INSPECT:-}" ] || [ "${SNAPSHOT_RESTORE}" = 0 ]; then + lxc exec "${name}" -- apt-get install --no-install-recommends -y jq zfsutils-linux htop + else + lxc exec "${name}" -- apt-get install --no-install-recommends -y jq + fi - lxc exec "${name}" -- sh -c "PATH=\$PATH:/snap/bin snap install snapd" + lxc exec "${name}" -- snap install snapd + lxc exec "${name}" -- snap install yq + + # Free disk blocks + lxc exec "${name}" -- apt-get clean + lxc exec "${name}" -- systemctl start fstrim.service # Snaps can occasionally fail to install properly, so repeatedly try. lxc exec "${name}" -- sh -c " - export PATH=\$PATH:/snap/bin while ! test -e /snap/bin/microceph ; do - snap install microceph || true + snap install microceph --channel=\"${MICROCEPH_SNAP_CHANNEL}\" --cohort='+' || true sleep 1 done + if [ ! \"${BASE_OS}\" = \"22.04\" ]; then + # dm-crypt needs to be manually connected for microceph full disk encyption. + snap connect microceph:dm-crypt + snap restart microceph.daemon + fi + while ! test -e /snap/bin/microovn ; do - snap install microovn || true + snap install microovn --channel=\"${MICROOVN_SNAP_CHANNEL}\" --cohort='+' || true sleep 1 done @@ -853,26 +972,37 @@ setup_system() { fi while ! test -e /snap/bin/lxd ; do - snap install lxd --channel latest/stable --cohort='+' || true + snap install lxd --channel=\"${LXD_SNAP_CHANNEL}\" --cohort='+' || true sleep 1 done " - lxc file push "${MICROCLOUD_SNAP_PATH}" "${name}"/root/microcloud.snap - lxc exec "${name}" -- sh -c "PATH=\$PATH:/snap/bin snap install --devmode /root/microcloud.snap" - ) - - # Sleep some time so the snaps are fully set up. - sleep 3 - + # Silence the "If this is your first time running LXD on this machine" banner + # on first invocation + lxc exec "${name}" -- mkdir -p /root/snap/lxd/common/config/ + lxc exec "${name}" -- touch /root/snap/lxd/common/config/config.yml - lxc stop "${name}" + if [ -n "${MICROCLOUD_SNAP_PATH}" ]; then + lxc file push --quiet "${MICROCLOUD_SNAP_PATH}" "${name}"/root/microcloud.snap + lxc exec "${name}" -- snap install --dangerous /root/microcloud.snap + else + lxc exec "${name}" -- snap install microcloud --channel="${MICROCLOUD_SNAP_CHANNEL}" --cohort="+" + fi - lxc snapshot "${name}" snap0 + set_debug_binaries "${name}" + ) - lxc start "${name}" + # let boot/cloud-init finish its job + lxc exec "${name}" -- systemctl is-system-running --wait || lxc exec "${name}" -- systemctl --failed || true - lxd_wait_vm "${name}" + # Create a snapshot so we can restore to this point. + if [ "${SNAPSHOT_RESTORE}" = 1 ]; then + lxc stop "${name}" + lxc snapshot "${name}" snap0 + else + # Sleep some time so the snaps are fully set up. + sleep 3 + fi echo "==> ${name} Finished Setting up" } @@ -882,29 +1012,54 @@ new_system() { name=${1} num_disks=${2:-0} - create_system "${name}" "${num_disks}" - lxd_wait_vm "${name}" + ( + set -eu + # Sometimes, the cloud-init user script fails to run in a CI environment, + # so we retry a few times. + for i in $(seq 5); do + create_system "${name}" "${num_disks}" + + if ! lxd_wait_vm "${name}"; then + echo "lxd_wait_vm failed, removing ${name} and retrying (attempt ${i})" + lxc delete "${name}" -f + for n in $(seq 1 "${num_disks}") ; do + disk="${name}-disk${n}" + lxc storage volume delete zpool "${disk}" + done + else + break + fi + + if [ "${i}" = 5 ]; then + echo "Failed to create ${name} after 5 attempts" + exit 1 + fi + done + ) + # Sleep some time so the vm is fully set up. sleep 3 setup_system "${name}" } new_systems() { + echo "::group::new_systems" + num_vms=3 num_disks=3 num_ifaces=1 - if echo "${1}" | grep -qP '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_vms="${1}" shift 1 fi - if echo "${1}" | grep -qP '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_disks="${1}" shift 1 fi - if echo "${1}" | grep -qP '\d+'; then + if [[ "${1:-}" =~ ^[0-9]+$ ]]; then num_ifaces="${1}" shift 1 fi @@ -920,40 +1075,18 @@ new_systems() { lxc profile device add default "eth${i}" nic network="microbr$((i - 1))" name="eth${i}" done - if [ "${CONCURRENT_SETUP}" = 1 ]; then - for n in $(seq 1 "${num_vms}"); do - name=$(printf "micro%02d" "${n}") - create_system "${name}" "${num_disks}" & - done - - wait - - for n in $(seq 1 "${num_vms}"); do - name=$(printf "micro%02d" "${n}") - - ( - lxd_wait_vm "${name}" - # Sleep some time so the vm is fully set up. - sleep 3 - setup_system "${name}" - ) & - - done - - wait - - else - for n in $(seq 1 "${num_vms}"); do - name="$(printf "micro%02d" "${n}")" - create_system "${name}" "${num_disks}" - lxd_wait_vm "${name}" + for n in $(seq -f "%02g" 1 "${num_vms}"); do + name="micro${n}" + if [ "${CONCURRENT_SETUP}" = 1 ]; then + new_system "${name}" "${num_disks}" & + else + new_system "${name}" "${num_disks}" + fi + done - # Sleep some time so the vm is fully set up. - sleep 3 + wait - setup_system "${name}" - done - fi + echo "::endgroup::" } wait_snapd() { @@ -975,22 +1108,65 @@ lxd_wait_vm() { name="${1}" echo "==> ${name} Awaiting VM..." - for round in $(seq 640); do - if lxc info "${name}" | grep -qF "Status: READY" ; then + sleep 5 + for round in $(seq 1 5 150); do + if [ "$(lxc list -f csv -c s "${name}")" = "READY" ] ; then wait_snapd "${name}" echo " ${name} VM is ready" return 0 fi # Sometimes the VM just won't start, so retry after 3 minutes. - if [ "$((round % 180))" = 0 ]; then + if [ "$((round % 60))" = 0 ]; then echo "==> ${name} Timeout (${round}s): Re-initializing VM" lxc restart "${name}" --force fi - sleep 1 + sleep 5 done echo " ${name} VM failed to start" return 1 } + +# ip_prefix_by_netmask: Returns the prefix length of the given netmask. +ip_prefix_by_netmask () { + # shellcheck disable=SC2048,SC2086 + c=0 x=0$( printf '%o' ${1//./ } ) + # shellcheck disable=SC2048,SC2086 + while [ $x -gt 0 ]; do + (( c += x % 2, x >>= 1 )) + done + + echo /$c ; +} + +# ip_config_to_netaddr: Returns the IPv4 network address of the given interface. +# e.g: ip_config_to_netaddr lxdbr0 (with inet: 10.233.6.X/24)-> 10.233.6.0/24 +ip_config_to_netaddr () { + local line ip mask net_addr + line=$(ifconfig -a "$1" | grep netmask | tr -s " ") + ip=$(echo "$line" | cut -f 3 -d " ") + mask=$(echo "$line" | cut -f 5 -d " ") + + IFS=. read -r io1 io2 io3 io4 <<< "$ip" + IFS=. read -r mo1 mo2 mo3 mo4 <<< "$mask" + net_addr="$((io1 & mo1)).$((io2 & mo2)).$((io3 & mo3)).$((io4 & mo4))" + + echo "${net_addr}$(ip_prefix_by_netmask "${mask}")" +} + +set_cluster_subnet() { + num_systems="${1}" + iface="${2}" + prefix="${3}" + + shift 3 + + for n in $(seq 2 $((num_systems + 1))); do + cluster_ip="${prefix}.${n}/24" + name="$(printf "micro%02d" $((n-1)))" + lxc exec "${name}" -- ip addr flush "${iface}" + lxc exec "${name}" -- ip addr add "${cluster_ip}" dev "${iface}" + done +} diff --git a/microcloud/test/lint/capitalize-errors.sh b/microcloud/test/lint/capitalize-errors.sh new file mode 100755 index 00000000..89e4bde5 --- /dev/null +++ b/microcloud/test/lint/capitalize-errors.sh @@ -0,0 +1,5 @@ +#!/bin/sh -eu + +echo "Checking for error messages beginning with lower-case letters..." + +! git grep --untracked -P -n 'fmt\.Errorf\("[a-z]' -- '*.go' diff --git a/microcloud/test/main.sh b/microcloud/test/main.sh index 846e2a81..b9462720 100755 --- a/microcloud/test/main.sh +++ b/microcloud/test/main.sh @@ -1,4 +1,5 @@ -#!/bin/sh -eu +#!/bin/bash +set -eu [ -n "${GOPATH:-}" ] && export "PATH=${GOPATH}/bin:${PATH}" # Don't translate lxc output for parsing in it in tests. @@ -42,6 +43,9 @@ cleanup() { lxc project switch microcloud-test set +e + if [ "${TEST_CURRENT}" = "setup" ] && [ "${TEST_RESULT}" = "success" ]; then + return + fi # Allow for inspection if [ -n "${CLOUD_INSPECT:-}" ]; then @@ -54,12 +58,45 @@ cleanup() { read -r _ fi + echo "::group::debug-failure" + lxc list --all-projects || true + lxc exec micro01 -- lxc list || true + + for name in $(lxc list -c n -f csv micro); do + echo "Check LXD resources on ${name} for disk ordering" + lxc exec "${name}" -- lxc query "/1.0/resources" | jq -r '.storage.disks[] | {id, device_id, device_path}' + lxc exec "${name}" -- lsblk + done + + for name in $(lxc list -c n -f csv micro); do + echo -n "${name} out file:" + if ! lxc exec "${name}" -- test -e out; then + echo " was not found" + continue + elif ! lxc exec "${name}" -- test -s out; then + echo " was empty" + continue + fi + echo + lxc exec "${name}" -- cat out + done + echo "::endgroup::" + + # LXD daemon logs + echo "::group::lxd logs" + journalctl --quiet --no-hostname --no-pager --boot=0 --lines=100 --unit=snap.lxd.daemon.service + echo "::endgroup::" + + # dmesg may contain oops, IO errors, crashes, etc + echo "::group::dmesg logs" + journalctl --quiet --no-hostname --no-pager --boot=0 --lines=100 --dmesg + echo "::endgroup::" + if [ -n "${GITHUB_ACTIONS:-}" ]; then echo "==> Skipping cleanup (GitHub Action runner detected)" else echo "==> Cleaning up" - - cleanup_systems + cleanup_systems fi echo "" @@ -68,6 +105,11 @@ cleanup() { echo "==> TEST DONE: ${TEST_CURRENT_DESCRIPTION}" fi echo "==> Test result: ${TEST_RESULT}" + + if [ "${CONCURRENT_SETUP}" = 1 ]; then + # kill our whole process group + kill -- -$$ + fi } # Must be set before cleanup() @@ -81,24 +123,67 @@ trap cleanup EXIT HUP INT TERM # Import all the testsuites import_subdir_files suites +LXD_SNAP_CHANNEL="${LXD_SNAP_CHANNEL:-5.21/edge}" +export LXD_SNAP_CHANNEL + +MICROCEPH_SNAP_CHANNEL="${MICROCEPH_SNAP_CHANNEL:-reef/candidate}" +export MICROCEPH_SNAP_CHANNEL + +MICROCLOUD_SNAP_CHANNEL="${MICROCLOUD_SNAP_CHANNEL:-latest/edge}" +export MICROCLOUD_SNAP_CHANNEL + +MICROOVN_SNAP_CHANNEL="${MICROOVN_SNAP_CHANNEL:-22.03/edge}" +export MICROOVN_SNAP_CHANNEL + +MICROOVN_SNAP_PATH="${MICROOVN_SNAP_PATH:-}" +export MICROOVN_SNAP_PATH + +MICROCEPH_SNAP_PATH="${MICROCEPH_SNAP_PATH:-}" +export MICROCEPH_SNAP_PATH + CONCURRENT_SETUP=${CONCURRENT_SETUP:-0} export CONCURRENT_SETUP SKIP_SETUP_LOG=${SKIP_SETUP_LOG:-0} export SKIP_SETUP_LOG +SKIP_VM_LAUNCH=${SKIP_VM_LAUNCH:-0} +export SKIP_VM_LAUNCH + SNAPSHOT_RESTORE=${SNAPSHOT_RESTORE:-0} export SNAPSHOT_RESTORE +TESTBED_READY=${TESTBED_READY:-0} +export TESTBED_READY + +set +u if [ -z "${MICROCLOUD_SNAP_PATH}" ] || ! [ -e "${MICROCLOUD_SNAP_PATH}" ]; then - # TODO: Setup snap build - echo "Undefined or missing MICROCLOUD_SNAP_PATH" >&2 - exit 1 + MICROCLOUD_SNAP_PATH="" +fi + +if [ -z "${MICROCLOUD_DEBUG_PATH}" ] || ! [ -e "${MICROCLOUD_DEBUG_PATH}" ]; then + MICROCLOUD_DEBUG_PATH="" +fi + +if [ -z "${MICROCLOUDD_DEBUG_PATH}" ] || ! [ -e "${MICROCLOUDD_DEBUG_PATH}" ]; then + MICROCLOUDD_DEBUG_PATH="" fi +if [ -z "${LXD_DEBUG_PATH}" ] || ! [ -e "${LXD_DEBUG_PATH}" ]; then + LXD_DEBUG_PATH="" +fi +set -u + export MICROCLOUD_SNAP_PATH +echo "===> Checking that all snap channels are set to latest/edge" +check_snap_channels + run_test() { + if [ "${TESTBED_READY}" = 0 ]; then + testbed_setup + fi + TEST_CURRENT="${1}" TEST_CURRENT_DESCRIPTION="${2:-${1}}" @@ -107,31 +192,92 @@ run_test() { ${TEST_CURRENT} END_TIME="$(date +%s)" - echo "==> TEST DONE: ${TEST_CURRENT_DESCRIPTION} ($((END_TIME - START_TIME))s)" -} + collect_go_cover_files -# allow for running a specific set of tests -if [ "$#" -gt 0 ] && [ "$1" != "all" ] && [ "$1" != "cluster" ] && [ "$1" != "standalone" ]; then - run_test "test_${1}" - # shellcheck disable=SC2034 - TEST_RESULT=success - exit -fi + echo "::notice::==> TEST DONE: ${TEST_CURRENT_DESCRIPTION} ($((END_TIME - START_TIME))s)" +} # Create 4 nodes with 3 disks and 3 extra interfaces. # These nodes should be used across most tests and reset with the `reset_systems` function. -new_systems 4 3 3 +testbed_setup() { + echo "==> SETUP STARTED" + START_TIME="$(date +%s)" + + new_systems 4 3 5 + TESTBED_READY=1 + + END_TIME="$(date +%s)" + echo "::notice::==> SETUP DONE ($((END_TIME - START_TIME))s)" +} + +collect_go_cover_files() { + if [ -n "${GOCOVERDIR}" ]; then + echo "==> Collecting Go coverage files" + lxc list -c n -f csv | xargs --no-run-if-empty -I {} sh -c " + container_name=\"{}\" + timestamp=\$(date +%Y%m%d_%H%M%S_%N) + destination=\"${GOCOVERDIR}/\${container_name}_\${timestamp}\" + lxc file pull -r \"\${container_name}/var/snap/microcloud/common/data/cover\" \"\${destination}\" || true + " + fi +} + +# test groups +run_add_tests() { + run_test test_add_interactive "add interactive" +} + +run_instances_tests() { + run_test test_instances_config "instances config" + run_test test_instances_launch "instances launch" +} + +run_basic_tests() { + run_test test_auto "auto" +} + +run_recover_tests() { + run_test test_recover "recover" +} -if [ "${1:-"all"}" != "cluster" ]; then - run_test test_instances "instances" +run_interactive_tests() { run_test test_interactive "interactive" + run_test test_interactive_combinations "interactive combinations" +} + +run_mismatch_tests() { run_test test_service_mismatch "service mismatch" run_test test_disk_mismatch "disk mismatch" - run_test test_interactive_combinations "interactive combinations" - run_test test_auto "auto" - run_test test_add_interactive "add interactive" - run_test test_add_auto "add auto" +} + +run_preseed_tests() { run_test test_preseed "preseed" +} + +# allow for running a specific set of tests +if [ "${1:-"all"}" = "all" ]; then + run_add_tests + run_instances_tests + run_basic_tests + run_interactive_tests + run_mismatch_tests + run_preseed_tests +elif [ "${1}" = "add" ]; then + run_add_tests +elif [ "${1}" = "instances" ]; then + run_instances_tests +elif [ "${1}" = "basic" ]; then + run_basic_tests +elif [ "${1}" = "interactive" ]; then + run_interactive_tests +elif [ "${1}" = "mismatch" ]; then + run_mismatch_tests +elif [ "${1}" = "preseed" ]; then + run_preseed_tests +elif [ "${1}" = "setup" ]; then + testbed_setup +else + run_test "test_${1}" fi # shellcheck disable=SC2034 diff --git a/microcloud/test/suites/add.sh b/microcloud/test/suites/add.sh index ccaf8ea0..7b245530 100644 --- a/microcloud/test/suites/add.sh +++ b/microcloud/test/suites/add.sh @@ -1,102 +1,4 @@ -test_add_auto() { - reset_systems 4 0 0 - - # Test with just LXD and MicroCloud, and no disks. - for m in micro01 micro02 micro03 ; do - lxc exec "${m}" -- snap disable microovn || true - lxc exec "${m}" -- snap disable microceph || true - done - - # Disable extra nodes so we don't add them yet. - for m in micro03 micro04 ; do - lxc exec "${m}" -- snap disable microcloud - done - - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --auto > out" - lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q - - # Re-enable the nodes. - for m in micro03 micro04 ; do - lxc exec "${m}" -- snap enable microcloud - lxc exec "${m}" -- snap start microcloud - done - - # Add the nodes. - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud add --auto > out" - lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q - - for m in micro01 micro02 micro03 micro04 ; do - validate_system_lxd "${m}" 4 - - # Supress the first message from LXD. - lxc exec "${m}" -- lxc list > /dev/null 2>&1 || true - - # Ensure we created no storage devices. - lxc exec "${m}" -- sh -ceu "lxc storage ls -f csv | wc -l | grep -q 0" - done - - # Test with all systems. - reset_systems 4 0 0 - - # Disable extra nodes so we don't add them yet. - for m in micro03 micro04 ; do - lxc exec "${m}" -- snap disable microcloud - done - - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --auto > out" - lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q - - # Re-enable the nodes. - for m in micro03 micro04 ; do - lxc exec "${m}" -- snap enable microcloud - lxc exec "${m}" -- snap start microcloud - done - - # Add the nodes. - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud add --auto > out" - lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q - - for m in micro01 micro02 micro03 micro04 ; do - validate_system_lxd "${m}" 4 - validate_system_microceph "${m}" - validate_system_microovn "${m}" - - # Supress the first message from LXD. - lxc exec "${m}" -- sh -c "lxc ls >> /dev/null 2>&1" || true - - # Ensure we created no storage devices. - lxc exec "${m}" -- sh -ceu "lxc storage ls -f csv | wc -l | grep -q 0" - done - - # Test with ZFS and Ceph disks. - reset_systems 4 2 0 - - # Disable extra nodes so we don't add them yet. - # shellcheck disable=SC2043 - for m in micro04 ; do - lxc exec "${m}" -- sh -c "snap disable microcloud" - done - - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --auto > out" - lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q - - # Re-enable the nodes. - # shellcheck disable=SC2043 - for m in micro04 ; do - lxc exec "${m}" -- sh -c "snap enable microcloud" - lxc exec "${m}" -- sh -c "snap start microcloud" - done - - # Add the nodes. - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud add --auto > out" - lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q - - for m in micro01 micro02 micro03 micro04 ; do - validate_system_lxd "${m}" 4 disk1 1 - validate_system_microceph "${m}" disk2 - validate_system_microovn "${m}" - done -} +#!/bin/bash test_add_interactive() { reset_systems 4 2 1 @@ -104,7 +6,7 @@ test_add_interactive() { # Disable extra nodes so we don't add them yet. # shellcheck disable=SC2043 for m in micro04 ; do - lxc exec "${m}" -- sh -c "snap disable microcloud" + lxc exec "${m}" -- snap disable microcloud done echo "Test growing a MicroCloud with all services and devices set up" @@ -152,7 +54,6 @@ test_add_interactive() { validate_system_microovn "${m}" done - reset_systems 4 2 1 echo "Test growing a MicroCloud with missing services" diff --git a/microcloud/test/suites/basic.sh b/microcloud/test/suites/basic.sh index f92b7257..41021ba2 100644 --- a/microcloud/test/suites/basic.sh +++ b/microcloud/test/suites/basic.sh @@ -1,3 +1,5 @@ +#!/bin/bash + test_interactive() { reset_systems 3 3 1 @@ -48,9 +50,9 @@ test_interactive() { done echo "Creating a MicroCloud with ZFS and Ceph storage" - SETUP_CEPH="yes" - CEPH_FILTER="lxd_disk2" - CEPH_WIPE="yes" + export SETUP_CEPH="yes" + export CEPH_FILTER="lxd_disk2" + export CEPH_WIPE="yes" microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q @@ -102,156 +104,7 @@ test_interactive() { done } -test_instances() { - reset_systems 3 3 2 - - # Setup a MicroCloud with 3 systems, ZFS storage, and a FAN network. - addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 -- sh -c " - cat << EOF > /root/preseed.yaml -lookup_subnet: ${addr}/24 -systems: -- name: micro01 - storage: - local: - path: /dev/sdb - wipe: true -- name: micro02 - storage: - local: - path: /dev/sdb - wipe: true -- name: micro03 - storage: - local: - path: /dev/sdb - wipe: true -" - - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --preseed /root/preseed.yaml" - - # Add cloud-init entry for checking ready state on launched instances. - lxc exec micro01 -- sh -c " - for m in \$(lxc ls -f csv -c n) ; do - lxc rm \$m -f - done - - cat << EOF | lxc profile edit default -config: - cloud-init.user-data: | - #cloud-config - packages: - - curl - write_files: - - content: | - #!/bin/sh - exec curl --unix-socket /dev/lxd/sock lxd/1.0 -X PATCH -d '{\"state\": \"Ready\"}' - path: /var/lib/cloud/scripts/per-boot/ready.sh - permissions: \"0755\" -EOF -" - - # Launch a container and VM with ZFS storage & FAN network. - lxc exec micro01 -- sh -c "lxc launch ubuntu:22.04 v1 --vm -s local -n lxdfan0" - lxc exec micro01 -- sh -c "lxc launch ubuntu:22.04 c1 -s local -n lxdfan0" - - # Ensure we can reach the launched instances. - for m in c1 v1 ; do - echo "Waiting up to 5 mins for ${m} to start" - lxc exec micro01 -- sh -ceu " - for round in \$(seq 300); do - if lxc info ${m} | grep -qF \"Status: READY\" ; then - - lxc rm ${m} -f - return 0 - fi - sleep 1 - done - return 1 - " - done - - reset_systems 3 3 2 - - # Create a MicroCloud with ceph and ovn setup. - addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 -- sh -c " - cat << EOF > /root/preseed.yaml -lookup_subnet: ${addr}/24 -systems: -- name: micro01 - storage: - ceph: - - path: /dev/sdc - wipe: true - - path: /dev/sdd - wipe: true -- name: micro02 - storage: - ceph: - - path: /dev/sdc - wipe: true - - path: /dev/sdd - wipe: true -- name: micro03 - storage: - ceph: - - path: /dev/sdc - wipe: true - - path: /dev/sdd - wipe: true -ovn: - ipv4_gateway: 10.1.123.1/24 - ipv4_range: 10.1.123.100-10.1.123.254 - ipv6_gateway: fd42:1:1234:1234::1/64 -" - - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --preseed /root/preseed.yaml" - - # Add cloud-init entry for checking ready state on launched instances. - lxc exec micro01 -- sh -c " - for m in \$(lxc ls -f csv -c n) ; do - lxc rm \$m -f - done - - cat << EOF | lxc profile edit default -config: - cloud-init.user-data: | - #cloud-config - packages: - - curl - write_files: - - content: | - #!/bin/sh - exec curl --unix-socket /dev/lxd/sock lxd/1.0 -X PATCH -d '{\"state\": \"Ready\"}' - path: /var/lib/cloud/scripts/per-boot/ready.sh - permissions: \"0755\" -EOF -" - - # Launch a container and VM with CEPH storage & OVN network. - lxc exec micro01 -- sh -c "lxc launch ubuntu:22.04 v1 --vm -s remote -n default" - lxc exec micro01 -- sh -c "lxc launch ubuntu:22.04 c1 -s remote -n default" - - # Ensure we can reach the launched instances. - for m in c1 v1 ; do - echo "Waiting up to 5 mins for ${m} to start" - lxc exec micro01 -- sh -ceu " - for round in \$(seq 300); do - if lxc info ${m} | grep -qF \"Status: READY\" ; then - lxc rm ${m} -f - - return 0 - fi - sleep 1 - done - return 1 - " - done - -} - -test_case() { +_test_case() { # Number of systems to use in the test. num_systems="${1}" @@ -295,59 +148,59 @@ test_case() { printf "\n" - LOOKUP_IFACE="enp5s0" # filter string for the lookup interface table. - LIMIT_SUBNET="yes" # (yes/no) input for limiting lookup of systems to the above subnet. + export LOOKUP_IFACE="enp5s0" # filter string for the lookup interface table. + export LIMIT_SUBNET="yes" # (yes/no) input for limiting lookup of systems to the above subnet. - EXPECT_PEERS="$((num_systems - 1))" + export EXPECT_PEERS="$((num_systems - 1))" if [ "${num_disks}" -gt 0 ] ; then if [ -z "${force_no_zfs}" ]; then - SETUP_ZFS="yes" - ZFS_FILTER="disk1" - ZFS_WIPE="yes" + export SETUP_ZFS="yes" + export ZFS_FILTER="disk1" + export ZFS_WIPE="yes" expected_zfs_disk="disk1" else - SETUP_ZFS="no" + export SETUP_ZFS="no" fi fi - if [ "${num_disks}" -gt 0 ] && [ "${num_systems}" -ge 3 ] ; then + if [ "${num_disks}" -gt 0 ] ; then # If we only have one disk and we used it for ZFS, there should be no prompt. if [ "${num_disks}" = 1 ] && [ -z "${force_no_zfs}" ] ; then - echo "Insufficient disks" + echo "Insufficient disks to test Remote storage" elif [ -z "${force_no_ceph}" ]; then - SETUP_CEPH="yes" - CEPH_WIPE="yes" + export SETUP_CEPH="yes" + export CEPH_WIPE="yes" expected_ceph_disks="${num_disks}" if [ -n "${expected_zfs_disk}" ]; then expected_ceph_disks="$((num_disks - 1))" fi else - SETUP_CEPH="no" + export SETUP_CEPH="no" fi fi - if [ "${num_ifaces}" -gt 0 ] && [ "${num_systems}" -ge 3 ] ; then + if [ "${num_ifaces}" -gt 0 ] ; then if [ -z "${force_no_ovn}" ] ; then - SETUP_OVN="yes" + export SETUP_OVN="yes" # Always pick the first available interface. - OVN_FILTER="enp6s0" - IPV4_SUBNET="10.1.123.1/24" - IPV4_START="10.1.123.100" - IPV4_END="10.1.123.254" - IPV6_SUBNET="fd42:1:1234:1234::1/64" + export OVN_FILTER="enp6s0" + export IPV4_SUBNET="10.1.123.1/24" + export IPV4_START="10.1.123.100" + export IPV4_END="10.1.123.254" + export IPV6_SUBNET="fd42:1:1234:1234::1/64" expected_ovn_iface="enp6s0" else - SETUP_OVN="no" + export SETUP_OVN="no" fi fi microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q - for i in $(seq 1 "${num_systems}") ; do - name="$(printf "micro%02d" "${i}")" + for i in $(seq -f "%02g" 1 "${num_systems}") ; do + name="micro${i}" if [ -n "${expected_ovn_iface}" ]; then validate_system_lxd "${name}" "${num_systems}" "${expected_zfs_disk}" "${expected_ceph_disks}" "${expected_ovn_iface}" "${IPV4_SUBNET}" "${IPV4_START}"-"${IPV4_END}" "${IPV6_SUBNET}" @@ -368,69 +221,63 @@ test_case() { done fi - validate_system_microceph "${name}" "${ceph_disks}" + validate_system_microceph "${name}" "${ceph_disks}" validate_system_microovn "${name}" done - } +} test_interactive_combinations() { - for num_systems in $(seq 2 4) ; do - max_disks=2 - - # A setup with 3 disks is redundant if we have less than 3 peers, - # since we have already covered having too many disks for zfs with 2 disks per system. - if [ "${num_systems}" -ge 3 ] ; then - max_disks=3 - fi - - for num_disks in $(seq 0 "${max_disks}") ; do - # A setup with OVN interfaces is not necessary with fewer - # than 3 machines as OVN setup will get skipped anyway. - max_ifaces=0 - if [ "${num_systems}" -ge 3 ]; then - max_ifaces=2 - if [ "${num_disks}" -gt 0 ]; then - # If we are testing disks too, just stick to one interface, or none. - max_ifaces=1 - fi - fi - - for num_ifaces in $(seq 0 "${max_ifaces}") ; do - # Run a test without forcibly skipping any services. - test_case "${num_systems}" "${num_disks}" "${num_ifaces}" - - if [ "${num_systems}" -lt 3 ]; then - if [ "${num_disks}" -gt 0 ] ; then - # If we have fewer than 3 systems, we can still create ZFS so test forcibly skipping it. - test_case "${num_systems}" "${num_disks}" "${num_ifaces}" "zfs" - fi - - # Only run additional tests with skipped services if we actually have devices to set up. - elif [ "${num_ifaces}" = 1 ]; then - if [ "${num_disks}" -gt 0 ] ; then - # Test forcibly skipping ZFS, sending available disks to Ceph instead. - test_case "${num_systems}" "${num_disks}" "${num_ifaces}" "zfs" - if [ "${num_disks}" -gt 1 ] ; then - # Test forcibly skipping Ceph only if we have extra disks after ZFS setup. - test_case "${num_systems}" "${num_disks}" "${num_ifaces}" "ceph" - fi - - # Test forcibly skipping both Ceph and ZFS to create no storage devices. - test_case "${num_systems}" "${num_disks}" "${num_ifaces}" "zfs" "ceph" - - # Test forcibly skipping Ceph, ZFS, and OVN to get a FAN device. - test_case "${num_systems}" "${num_disks}" "${num_ifaces}" "zfs" "ceph" "ovn" - fi - fi - - if [ "${num_systems}" -ge 3 ] && [ "${num_ifaces}" -gt 0 ]; then - # Test forcibly skipping OVN whenever we can assign interfaces. - test_case "${num_systems}" "${num_disks}" "${num_ifaces}" "ovn" - fi - done - done - done + # Test with 2 systems, no disks, no interfaces. + _test_case 2 0 0 + + # Test with 2 systems, 1 disk, no interfaces, and each combination of skipping ZFS, Ceph. + _test_case 2 1 0 + _test_case 2 1 0 "zfs" + _test_case 2 1 0 "ceph" + _test_case 2 1 0 "zfs" "ceph" + + # Test with 2 systems, 0 disks, 1 interface, and each combination of skipping OVN. + _test_case 2 0 1 + _test_case 2 0 1 "ovn" + + # Test with 2 systems, 1 disks, 1 interface, and each combination of skipping ZFS, Ceph, OVN. + _test_case 2 1 1 + _test_case 2 1 1 "zfs" + _test_case 2 1 1 "ceph" + _test_case 2 1 1 "zfs" "ceph" + _test_case 2 1 1 "ovn" + _test_case 2 1 1 "zfs" "ovn" + _test_case 2 1 1 "ceph" "ovn" + _test_case 2 1 1 "zfs" "ceph" "ovn" + + # Test with 2 systems, 2 disks, 1 interface, and each combination of skipping ZFS, Ceph, OVN. + _test_case 2 2 1 + _test_case 2 2 1 "zfs" + _test_case 2 2 1 "ceph" + _test_case 2 2 1 "zfs" "ceph" + _test_case 2 2 1 "ovn" + _test_case 2 2 1 "zfs" "ovn" + _test_case 2 2 1 "ceph" "ovn" + _test_case 2 2 1 "zfs" "ceph" "ovn" + + # Test with 2 systems, 3 disks, 1 interface, and each combination of skipping ZFS, Ceph, OVN. + _test_case 2 3 1 + _test_case 2 3 1 "zfs" + _test_case 2 3 1 "ceph" + _test_case 2 3 1 "zfs" "ceph" + _test_case 2 3 1 "ovn" + _test_case 2 3 1 "zfs" "ovn" + _test_case 2 3 1 "ceph" "ovn" + _test_case 2 3 1 "zfs" "ceph" "ovn" + + # Test with 3 systems, with and without disks & interfaces. + _test_case 3 0 0 + _test_case 3 2 2 + + # Test with 4 systems, with and without disks & interfaces. + _test_case 4 0 0 + _test_case 4 2 2 } test_service_mismatch() { @@ -450,22 +297,24 @@ test_service_mismatch() { # Install microceph and microovn on the first machine only. for m in micro02 micro03 ; do - lxc exec "${m}" -- snap remove microceph --purge - lxc exec "${m}" -- snap remove microovn --purge + lxc exec "${m}" -- snap disable microceph + lxc exec "${m}" -- snap disable microovn lxc exec "${m}" -- snap restart microcloud done # Init should fail to find the other systems as they don't have the same services. - # 30s should be enough time to find the other systems. - echo "Peers with missing services won't be found after 30s" + # The error is reported on the joining side. + echo "Peers with missing services cannot join" ! microcloud_interactive | lxc exec micro01 -- sh -c "timeout -k 5 30 microcloud init > out" || false # Ensure we exited while still looking for servers, and found none. lxc exec micro01 -- tail -1 out | grep "Scanning for eligible servers" -q # Install the remaining services on the other systems. - lxc exec micro02 -- sh -c "snap install microceph microovn" - lxc exec micro03 -- sh -c "snap install microceph microovn" + lxc exec micro02 -- snap enable microceph + lxc exec micro02 -- snap enable microovn + lxc exec micro03 -- snap enable microceph + lxc exec micro03 -- snap enable microovn # Init should now work. echo "Creating a MicroCloud with MicroCeph and MicroOVN, but without their LXD devices" @@ -482,10 +331,9 @@ test_service_mismatch() { reset_systems 3 3 1 # Run all services on the other systems only. - lxc exec micro01 -- sh -c "snap disable microceph || true" - lxc exec micro01 -- sh -c "snap disable microovn || true" - lxc exec micro01 -- sh -c "snap restart microcloud" - + lxc exec micro01 -- snap disable microceph || true + lxc exec micro01 -- snap disable microovn || true + lxc exec micro01 -- snap restart microcloud SKIP_SERVICE="yes" unset SETUP_CEPH SETUP_OVN @@ -540,13 +388,13 @@ test_disk_mismatch() { test_auto() { reset_systems 2 0 0 - lxc exec micro02 -- sh -c "snap stop microcloud" + lxc exec micro02 -- snap stop microcloud echo MicroCloud auto setup without any peers. ! lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --auto > out 2>&1" || false lxc exec micro01 -- tail -1 out | grep -q "Error: Found no available systems" - lxc exec micro02 -- sh -c "snap start microcloud" + lxc exec micro02 -- snap start microcloud echo Auto-create a MicroCloud with 2 systems with no disks/interfaces. lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --auto > out" @@ -556,10 +404,10 @@ test_auto() { validate_system_microovn "${m}" # Supress the first message from LXD. - lxc exec ${m} -- sh -c "lxc list > /dev/null 2>&1" || true + lxc exec ${m} -- lxc list > /dev/null 2>&1 || true # Ensure we created no storage devices. - lxc exec ${m} -- sh -ceu "lxc storage ls -f csv | wc -l | grep -q 0" + [ "$(lxc exec ${m} -- lxc storage ls -f csv | wc -l)" = "0" ] done reset_systems 2 0 1 @@ -572,11 +420,11 @@ test_auto() { validate_system_microovn "${m}" # Ensure we didn't create any other network devices. - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^default," || false - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^UPLINK," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^default," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^UPLINK," || false # Ensure we created no storage devices. - lxc exec ${m} -- sh -ceu "lxc storage ls -f csv | wc -l | grep -q 0" + [ "$(lxc exec ${m} -- lxc storage ls -f csv | wc -l)" = "0" ] done @@ -590,11 +438,11 @@ test_auto() { validate_system_microovn "${m}" # Ensure we didn't create any other network devices. - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^default," || false - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^UPLINK," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^default," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^UPLINK," || false # Ensure we created no ceph storage devices. - ! lxc exec ${m} -- sh -ceu "lxc storage ls -f csv" | grep -q "^remote,ceph" || false + ! lxc exec ${m} -- lxc storage ls -f csv | grep -q "^remote,ceph" || false done reset_systems 3 0 0 @@ -610,7 +458,7 @@ test_auto() { lxc exec ${m} -- lxc list > /dev/null 2>&1 || true # Ensure we created no storage devices. - lxc exec ${m} -- sh -ceu "lxc storage ls -f csv | wc -l | grep -q 0" + [ "$(lxc exec ${m} -- lxc storage ls -f csv | wc -l)" = "0" ] done reset_systems 3 0 1 @@ -623,11 +471,11 @@ test_auto() { validate_system_microovn "${m}" # Ensure we didn't create any other network devices. - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^default," || false - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^UPLINK," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^default," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^UPLINK," || false # Ensure we created no storage devices. - lxc exec ${m} -- sh -ceu "lxc storage ls -f csv | wc -l | grep -q 0" + [ "$(lxc exec ${m} -- lxc storage ls -f csv | wc -l)" = "0" ] done reset_systems 3 1 1 @@ -635,16 +483,16 @@ test_auto() { echo Auto-create a MicroCloud with 3 systems with 1 disk and 1 interface each. lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --auto > out" for m in micro01 micro02 micro03; do - validate_system_lxd "${m}" 3 "" 1 - validate_system_microceph "${m}" disk1 + validate_system_lxd "${m}" 3 "" 1 0 + validate_system_microceph "${m}" 0 disk1 validate_system_microovn "${m}" # Ensure we didn't create any other network devices. - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^default," || false - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^UPLINK," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^default," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^UPLINK," || false # Ensure we created no zfs storage devices. - ! lxc exec ${m} -- sh -ceu "lxc storage ls -f csv" | grep -q "^local,zfs" || false + ! lxc exec ${m} -- lxc storage ls -f csv | grep -q "^local,zfs" || false done reset_systems 3 3 1 @@ -652,12 +500,12 @@ test_auto() { echo Auto-create a MicroCloud with 3 systems with 3 disks and 1 interface each. lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --auto > out" for m in micro01 micro02 micro03 ; do - validate_system_lxd "${m}" 3 disk1 2 - validate_system_microceph "${m}" disk2 disk3 + validate_system_lxd "${m}" 3 disk1 2 0 + validate_system_microceph "${m}" 0 disk2 disk3 validate_system_microovn "${m}" # Ensure we didn't create any other network devices. - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^default," || false - ! lxc exec ${m} -- sh -c "lxc network ls -f csv" | grep -q "^UPLINK," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^default," || false + ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^UPLINK," || false done } diff --git a/microcloud/test/suites/instances.sh b/microcloud/test/suites/instances.sh new file mode 100644 index 00000000..9681094e --- /dev/null +++ b/microcloud/test/suites/instances.sh @@ -0,0 +1,315 @@ +#!/bin/bash + +check_instance_connectivity() { + instance_1="${1:-}" + instance_2="${2:-}" + is_vm="${3:-0}" + + skip_instance() { + [ "${is_vm}" = "1" ] && [ "${SKIP_VM_LAUNCH}" = "1" ] + } + + if skip_instance ; then + echo "Skipping VM ${instance_1} because SKIP_VM_LAUNCH=1 is set" + return 0 + fi + + + # Ensure we can reach the launched instances. + for m in "${instance_1}" "${instance_2}" ; do + echo -n "Waiting up to 5 mins for ${m} to start " + lxc exec micro01 -- sh -ceu " + for round in \$(seq 100); do + if lxc list -f csv -c s ${m} | grep -qxF READY; then + lxc exec ${m} -- stat /cephfs + echo \" ${m} booted successfully\" + + return 0 + fi + echo -n . + sleep 3 + done + echo FAIL + return 1 + " + done + + for m in "${instance_1}" "${instance_2}" ; do + lxc exec micro01 -- lxc exec "${m}" -- apt-get update + lxc exec micro01 -- lxc exec "${m}" -- apt-get install -y --no-install-recommends iputils-ping + + echo "Test connectivity to lxdbr0" + IPV4_GW="$(lxc network get lxdbr0 ipv4.address | cut -d/ -f1)" + IPV6_GW="$(lxc network get lxdbr0 ipv6.address | cut -d/ -f1)" + + lxc exec micro01 -- lxc exec "${m}" -- ping -nc1 -w5 -4 "${IPV4_GW}" + lxc exec micro01 -- lxc exec "${m}" -- ping -nc1 -w5 -6 "${IPV6_GW}" + done + + echo "Test connectivity between instances" + lxc exec micro01 -- lxc exec "${instance_1}" -- ping -nc1 -w5 -4 "${instance_2}" + lxc exec micro01 -- lxc exec "${instance_1}" -- ping -nc1 -w5 -6 "${instance_2}" + lxc exec micro01 -- lxc exec "${instance_2}" -- ping -nc1 -w5 -4 "${instance_1}" + lxc exec micro01 -- lxc exec "${instance_2}" -- ping -nc1 -w5 -6 "${instance_1}" +} + + +test_instances_config() { + reset_systems 3 3 2 + + # Setup a MicroCloud with 3 systems, ZFS storage, and a FAN network. + addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) + preseed="$(cat << EOF +lookup_subnet: ${addr}/24 +systems: +- name: micro01 + storage: + local: + path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk1 + wipe: true +- name: micro02 + storage: + local: + path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk1 + wipe: true +- name: micro03 + storage: + local: + path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk1 + wipe: true +EOF + )" + + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud init --preseed' <<< "$preseed" + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + + # Init a container and VM with ZFS storage & FAN network. + lxc exec micro01 -- lxc init --empty v1 --vm + lxc exec micro01 -- lxc init --empty c1 + + # Ensure proper storage pool and network selection by inspecting their used_by. + for m in c1 v1 ; do + lxc exec micro01 -- lxc storage show local | grep -xF -- "- /1.0/instances/${m}" + lxc exec micro01 -- lxc network show lxdfan0 | grep -xF -- "- /1.0/instances/${m}" + done + + reset_systems 3 3 2 + + # Create a MicroCloud with ceph and ovn setup. + addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) + preseed="$(cat << EOF +lookup_subnet: ${addr}/24 +systems: +- name: micro01 + storage: + ceph: + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk2 + wipe: true + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk3 + wipe: true +- name: micro02 + storage: + ceph: + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk2 + wipe: true + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk3 + wipe: true +- name: micro03 + storage: + ceph: + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk2 + wipe: true + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk3 + wipe: true +ovn: + ipv4_gateway: 10.1.123.1/24 + ipv4_range: 10.1.123.100-10.1.123.254 + ipv6_gateway: fd42:1:1234:1234::1/64 +EOF + )" + + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud init --preseed' <<< "$preseed" + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + + # Delete any instances left behind. + lxc exec micro01 -- sh -c " + for m in \$(lxc ls -f csv -c n) ; do + lxc rm \$m -f + done +" + + # Launch a container and VM with CEPH storage & OVN network. + lxc exec micro01 -- lxc init ubuntu-minimal-daily:22.04 v1 -c limits.memory=512MiB -d root,size=3GiB --vm -s remote -n default + lxc exec micro01 -- lxc init ubuntu-minimal-daily:22.04 c1 -c limits.memory=512MiB -d root,size=3GiB -s remote -n default + + # Ensure proper storage pool and network selection by inspecting their used_by. + for m in c1 v1 ; do + lxc exec micro01 -- lxc storage show remote | grep -xF -- "- /1.0/instances/${m}" + lxc exec micro01 -- lxc network show default | grep -xF -- "- /1.0/instances/${m}" + done +} + +test_instances_launch() { + reset_systems 3 3 2 + + # Setup a MicroCloud with 3 systems, ZFS storage, and a FAN network. + addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) + preseed="$(cat << EOF +lookup_subnet: ${addr}/24 +systems: +- name: micro01 + ovn_uplink_interface: enp6s0 + storage: + local: + path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk1 + wipe: true +- name: micro02 + ovn_uplink_interface: enp6s0 + storage: + local: + path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk1 + wipe: true +- name: micro03 + ovn_uplink_interface: enp6s0 + storage: + local: + path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk1 + wipe: true +EOF + )" + + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud init --preseed' <<< "$preseed" + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + + # Delete any instances left behind. + lxc exec micro01 -- sh -c " + for m in \$(lxc ls -f csv -c n) ; do + lxc rm \$m -f + done +" + + # Add cloud-init entry for checking ready state on launched instances. + lxc exec micro01 -- lxc profile edit default << EOF +config: + cloud-init.user-data: | + #cloud-config + write_files: + - content: | + #!/bin/sh + exec curl --unix-socket /dev/lxd/sock lxd/1.0 -X PATCH -d '{"state": "Ready"}' + path: /var/lib/cloud/scripts/per-boot/ready.sh + permissions: "0755" +EOF + + # Launch a container and VM with ZFS storage & FAN network. + if [ "${SKIP_VM_LAUNCH}" = "1" ]; then + echo "::warning::SKIPPING VM LAUNCH TEST" + else + lxc exec micro01 -- lxc launch ubuntu-minimal-daily:22.04 v1 -c limits.memory=512MiB -d root,size=3GiB --vm -s local -n default + fi + lxc exec micro01 -- lxc launch ubuntu-minimal-daily:22.04 c1 -c limits.memory=512MiB -d root,size=2GiB -s local -n default + + # Ensure we can reach the launched instances. + for m in c1 v1 ; do + if [ "${m}" = "v1" ] && [ "${SKIP_VM_LAUNCH}" = "1" ]; then + continue + fi + + echo -n "Waiting up to 5 mins for ${m} to start " + lxc exec micro01 -- sh -ceu " + for round in \$(seq 100); do + if lxc list -f csv -c s ${m} | grep -qxF READY; then + echo \" ${m} booted successfully\" + + lxc rm ${m} -f + return 0 + fi + echo -n . + sleep 3 + done + echo FAIL + return 1 + " + done + + reset_systems 3 3 2 + + # Create a MicroCloud with ceph and ovn setup. + addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) + preseed="$(cat << EOF +lookup_subnet: ${addr}/24 +systems: +- name: micro01 + storage: + ceph: + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk2 + wipe: true + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk3 + wipe: true +- name: micro02 + storage: + ceph: + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk2 + wipe: true + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk3 + wipe: true +- name: micro03 + storage: + ceph: + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk2 + wipe: true + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk3 + wipe: true +ovn: + ipv4_gateway: 10.1.123.1/24 + ipv4_range: 10.1.123.100-10.1.123.254 + ipv6_gateway: fd42:1:1234:1234::1/64 +EOF + )" + + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud init --preseed' <<< "$preseed" + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + + # Delete any instances left behind. + lxc exec micro01 -- sh -c " + for m in \$(lxc ls -f csv -c n) ; do + lxc rm \$m -f + done +" + # Add cloud-init entry for checking ready state on launched instances. + lxc exec micro01 -- lxc profile edit default << EOF +config: + cloud-init.user-data: | + #cloud-config + write_files: + - content: | + #!/bin/sh + exec curl --unix-socket /dev/lxd/sock lxd/1.0 -X PATCH -d '{"state": "Ready"}' + path: /var/lib/cloud/scripts/per-boot/ready.sh + permissions: "0755" +devices: + fs: + ceph.cluster_name: ceph + ceph.user_name: admin + path: /cephfs + source: cephfs:lxd_cephfs/ + type: disk +EOF + + # Launch 2 containers and VM with CEPH storage & OVN network. + if [ "${SKIP_VM_LAUNCH}" = "1" ]; then + echo "::warning::SKIPPING VM LAUNCH TEST" + else + lxc exec micro01 -- lxc launch ubuntu-minimal-daily:22.04 v1 -c limits.memory=512MiB -d root,size=3GiB --vm -s remote -n default + fi + lxc exec micro01 -- lxc launch ubuntu-minimal-daily:22.04 c1 -c limits.memory=512MiB -d root,size=2GiB -s remote -n default + lxc exec micro01 -- lxc launch ubuntu-minimal-daily:22.04 c2 -c limits.memory=512MiB -d root,size=2GiB -s remote -n default + + check_instance_connectivity "c1" "c2" "0" + check_instance_connectivity "v1" "c1" "1" + + lxc exec micro01 -- lxc delete -f c1 c2 + if ! [ "${SKIP_VM_LAUNCH}" = "1" ]; then + lxc exec micro01 -- lxc delete -f v1 + fi +} diff --git a/microcloud/test/suites/preseed.sh b/microcloud/test/suites/preseed.sh index 34381a29..ff827f51 100644 --- a/microcloud/test/suites/preseed.sh +++ b/microcloud/test/suites/preseed.sh @@ -1,12 +1,12 @@ +#!/bin/bash test_preseed() { reset_systems 4 3 2 - addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) + lookup_gateway=$(lxc network get lxdbr0 ipv4.address) # Create a MicroCloud with storage directly given by-path on one node, and by filter on other nodes. - lxc exec micro01 -- sh -c " - cat << EOF > /root/preseed.yaml -lookup_subnet: ${addr}/24 + lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed << EOF +lookup_subnet: ${lookup_gateway} systems: - name: micro01 ovn_uplink_interface: enp6s0 @@ -14,12 +14,12 @@ systems: ovn_uplink_interface: enp6s0 storage: local: - path: /dev/sdc + path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk2 wipe: true ceph: - - path: /dev/sdb + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk1 wipe: true - - path: /dev/sdd + - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk3 wipe: true - name: micro03 ovn_uplink_interface: enp6s0 @@ -31,23 +31,20 @@ ovn: storage: local: - - find: id == sdb + - find: device_id == *lxd_disk1 find_min: 2 find_max: 2 wipe: true ceph: - - find: id == sdc + - find: device_id == *lxd_disk2 find_min: 2 find_max: 2 wipe: true - - find: id == sdd + - find: device_id == *lxd_disk3 find_min: 2 find_max: 2 wipe: true EOF -" - - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --preseed /root/preseed.yaml" for m in micro01 micro03 ; do validate_system_lxd ${m} 3 disk1 2 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 @@ -61,46 +58,39 @@ EOF validate_system_microovn micro02 # Grow the MicroCloud with a new node, with filter-based storage selection. - lxc exec micro01 -- sh -c " - cat << EOF > /root/preseed.yaml -lookup_subnet: ${addr}/24 + lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud add --preseed < /root/preseed.yaml -lookup_subnet: ${addr}/24 + lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed << EOF +lookup_subnet: ${lookup_gateway} systems: - name: micro01 - name: micro02 - name: micro03 EOF -" - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --preseed /root/preseed.yaml" for m in micro01 micro02 micro03 ; do validate_system_lxd ${m} 3 validate_system_microceph ${m} @@ -108,25 +98,20 @@ EOF done reset_systems 3 3 2 - addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) # Create a MicroCloud if we don't have MicroOVN or MicroCeph installed. - lxc exec micro01 -- sh -c " - snap disable microceph - snap disable microovn - + lxc exec micro01 -- snap disable microceph + lxc exec micro01 -- snap disable microovn sleep 1 - cat << EOF > /root/preseed.yaml -lookup_subnet: ${addr}/24 + lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed << EOF +lookup_subnet: ${lookup_gateway} systems: - name: micro01 - name: micro02 - name: micro03 EOF -" - lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --preseed /root/preseed.yaml" for m in micro01 micro02 micro03 ; do validate_system_lxd ${m} 3 done