diff --git a/component/firecracker-base/default-variables.txt b/component/firecracker-base/default-variables.txt new file mode 100644 index 0000000000..0b67437af2 --- /dev/null +++ b/component/firecracker-base/default-variables.txt @@ -0,0 +1,3 @@ +CONFIGURATION_MANAGEMENT_TOOL="shell" +CONFIGURATION_MANAGEMENT_BRANCH="main" +AUTOMATED="true" diff --git a/component/firecracker-base/machine-userdata.sh b/component/firecracker-base/machine-userdata.sh new file mode 100644 index 0000000000..15a0882764 --- /dev/null +++ b/component/firecracker-base/machine-userdata.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -eo pipefail + +# Add an example variables file +cat << HEREDOC > /tmp/variables.txt +CONFIGURATION_MANAGEMENT_BRANCH="main" +CONFIGURATION_MANAGEMENT_TOOL="shell" +AUTOMATED="true" +HEREDOC + +curl -s https://raw.githubusercontent.com/systeminit/si/main/component/firecracker-base/orchestrate-install.sh | bash \ No newline at end of file diff --git a/component/firecracker-base/orchestrate-install.sh b/component/firecracker-base/orchestrate-install.sh new file mode 100644 index 0000000000..b3d31f6182 --- /dev/null +++ b/component/firecracker-base/orchestrate-install.sh @@ -0,0 +1,215 @@ +#!/bin/bash + +# Call this function with: +# ./orchestrate-install.sh + +set -eo pipefail + +check_params_set(){ + + test -f ${VARIABLES_FILE:-/tmp/variables.txt} || (echo "Error: Could not find VARIABLES_FILE: $VARIABLES_FILE file to drive installation" && exit 1); [ "$?" -eq 1 ] && exit 1 + + echo "---------------------------------" + echo "Values passed as inputs:" + echo "VARIABLES_FILE=${VARIABLES_FILE:-/tmp/variables.txt}" + cat ${VARIABLES_FILE:-/tmp/variables.txt} + eval $(cat ${VARIABLES_FILE:-/tmp/variables.txt}) + echo "---------------------------------" + + [[ "$AUTOMATED" != "true" ]] && sleep 5 # Giving some time for real users to review the vars file + +} + +check_os_release() { + + test -f /etc/os-release || (echo "Error: Could not find an /etc/os-release file to determine Operating System" && exit 1); [ "$?" -eq 1 ] && exit 1 + echo "------------------------------------" + echo "Info: /etc/os-release shown below:" + cat /etc/os-release + echo "------------------------------------" + [[ "$(cat /etc/os-release | grep 'CentOS Linux release 7')" ]] && export OS_VARIANT=centos-7 && return 0 + [[ "$(cat /etc/os-release | grep 'CentOS Stream release 8')" ]] && export OS_VARIANT=centos-stream-8 && return 0 + [[ "$(cat /etc/os-release | grep 'Rocky Linux release 8')" ]] && export OS_VARIANT=rocky-8 && return 0 + [[ "$(cat /etc/os-release | grep 'Red Hat Enterprise Linux Server release 7')" ]] && export OS_VARIANT=redhat-7 && return 0 + [[ "$(cat /etc/os-release | grep 'Red Hat Enterprise Linux release 8')" ]] && export OS_VARIANT=redhat-8 && return 0 + [[ "$(cat /etc/os-release | grep 'Amazon Linux release 2')" ]] && export OS_VARIANT=amazon-linux-2 && return 0 + [[ "$(cat /etc/os-release | grep ^NAME | grep Fedora)" ]] && export OS_VARIANT=fedora && return 0 + [[ "$(cat /etc/os-release | grep ^NAME | grep Ubuntu)" ]] && export OS_VARIANT=ubuntu && return 0 + [[ "$(cat /etc/os-release | grep ^NAME | grep Debian)" ]] && export OS_VARIANT=debian && return 0 + [[ "$(cat /etc/os-release | grep ^NAME | grep Mint)" ]] && export OS_VARIANT=mint && return 0 + echo "Error: Operating system could not be determined or is unsupported, could not configure the OS for firecracker node" && exit 1 + +} + +install_pre_reqs() { + + echo "Info: Installing prereqs for configuration" + + case $OS_VARIANT in + centos-7) + # Insert OS specific setup steps here + sudo yum -v update -y + ;; + redhat-7) + # Insert OS specific setup steps here + sudo yum -v update -y + ;; + centos-stream-8) + # Insert OS specific setup steps here + sudo yum -v update -y + ;; + redhat-8) + # Insert OS specific setup steps here + sudo yum -v update -y + ;; + rocky-8) + # Insert OS specific setup steps here + sudo yum -v update -y + ;; + amazon-linux-2) + # Insert OS specific setup steps here + sudo yum -v update -y + ;; + ubuntu) + # Insert OS specific setup steps here + echo "Info: executing prereq steps for ubuntu" + ;; + *) + echo "Error: Something went wrong, OS_VARIANT determined to be: $OS_VARIANT (unsupported)" && exit 1 + ;; + esac + + [[ $? != 0 ]] && echo "Error: Exit code $? returned during installation; see above error log for information" + + return 0 + +} +execute_configuration_management() { + + echo "Info: Installation folder set to /firecracker-data/" + + if [[ $CONFIGURATION_MANAGEMENT_TOOL == "shell" ]]; then + + # TODO(johnrwatson): Set up cgroup and cpu time/memory limits for jailer. + # < limits here > - will do later + + # Update Process Limits + echo -e "\jailer-shared soft nproc 16384\jailer-shared hard nproc 16384\n" | sudo tee -a /etc/security/limits.conf + + # Mount secondary EBS volume at /data for + mkdir -p /firecracker-data/output/ && cd /firecracker-data/ + + # Helper Scripts + curl https://raw.githubusercontent.com/systeminit/si-firecracker-config/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/start.sh > ./start.sh + curl https://raw.githubusercontent.com/systeminit/si-firecracker-config/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/stop.sh > ./stop.sh + + # Remainder of the binaries + wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/rootfs.ext4 + wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/image-kernel.bin + wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/firecracker + wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/jailer + + # TODO(johnrwatson): Currently not used but we could maybe make dynamic keys for each micro-vm (or use something like aws ssm/tailscale) + # This is a bit of a poor attempt to setup a child key, but will do until we have this properly working + if [[ -z "$FIRECRACKER_SSH_KEY" ]]; then + ssh-keygen -b 2048 -t rsa -f /firecracker-data/micro-vm-key -q -N "" + else + mv $FIRECRACKER_SSH_KEY /firecracker-data/micro-vm-key + fi + + # Create a user and group to run firecracker/jailer with & another group for the shared folders + useradd -M jailer-shared + usermod -L jailer-shared + groupadd -g 10000 jailer-processes + usermod -a -G jailer-processes jailer-shared + + # Set up correct permissions for the /firecracker-data/ folder + chown -R jailer-shared:jailer-shared /firecracker-data/ + chmod a+x /firecracker-data/*{.sh,firecracker,jailer} + chmod 400 /firecracker-data/micro-vm-key + + # Copy bins to /usr/bin/ + cp ./firecracker /usr/bin/firecracker + cp ./jailer /usr/bin/jailer + + # Load kernel module + modprobe kvm_intel + + # TODO(johnrwatson): Can do better than this, needs review + chmod 777 /dev/kvm + + # Configure packet forwarding + sysctl -w net.ipv4.conf.all.forwarding=1 + + # Avoid "nf_conntrack: table full, dropping packet" + #sudo sysctl -w net.ipv4.netfilter.ls=99999999 + + # Avoid "neighbour: arp_cache: neighbor table overflow!" + sysctl -w net.ipv4.neigh.default.gc_thresh1=1024 + sysctl -w net.ipv4.neigh.default.gc_thresh2=2048 + sysctl -w net.ipv4.neigh.default.gc_thresh3=4096 + + # Masquerade all external traffic as if it was wrong the external interface + iptables -t nat -A POSTROUTING -o enp4s0 -j MASQUERADE + + # Adjust MTU to make it consistent + ip link set dev enp4s0 mtu 1500 + + else + echo "Error: Unsupported or unknown configuration management tool specified, exiting." + exit 1 + fi + + echo "Info: System configuration complete" + +} + +execute_cleanup() { + + case $OS_VARIANT in + centos-7) + # Insert OS specific cleanup steps here + yum -v clean all + ;; + redhat-7) + # Insert OS specific cleanup steps here + yum -v clean all + ;; + centos-stream-8) + # Insert OS specific cleanup steps here + yum -v clean all + ;; + redhat-8) + # Insert OS specific cleanup steps here + yum -v clean all + ;; + rocky-8) + # Insert OS specific cleanup steps here + yum -v clean all + ;; + amazon-linux-2) + # Insert OS specific cleanup steps here + yum -v clean all + ;; + ubuntu) + # Insert OS specific setup steps here + echo "Info: Executing post-clean up for ubuntu" + ;; + *) + echo "Error: Something went wrong during cleanup, OS_VARIANT set to: $OS_VARIANT" && exit 1 + ;; + esac + + rm -Rf /tmp/firecracker-install/* + +} + +# ----------------------------------------- + +VARIABLES_FILE=$1 + +check_params_set && echo -e "Installation Values found to be:\n - $VARIABLES_FILE" +check_os_release && echo -e "Operating System found to be:\n - $OS_VARIANT" +install_pre_reqs +execute_configuration_management +execute_cleanup \ No newline at end of file diff --git a/component/firecracker-base/start.sh b/component/firecracker-base/start.sh new file mode 100644 index 0000000000..c60ab19b98 --- /dev/null +++ b/component/firecracker-base/start.sh @@ -0,0 +1,181 @@ +#!/bin/bash + +set -euo pipefail + +SB_ID="${1:-0}" # Default to sb_id=0 + +JAILER_BINARY="/usr/bin/jailer" +FC_BINARY="/usr/bin/jailer" +RO_DRIVE="/firecracker-data/rootfs.ext4" +KERNEL="/firecracker-data/image-kernel.bin" +TAP_DEV="fc-${SB_ID}-tap0" + +KERNEL_BOOT_ARGS="panic=1 pci=off nomodules reboot=k tsc=reliable quiet i8042.nokbd i8042.noaux 8250.nr_uarts=0 ipv6.disable=1" +#KERNEL_BOOT_ARGS="console=ttyS0 reboot=k panic=1 pci=off nomodules i8042.nokbd i8042.noaux ipv6.disable=1" + +API_SOCKET="/srv/jailer/firecracker/$SB_ID/root/run/firecracker.socket" +CURL=(curl --silent --show-error --header "Content-Type: application/json" --unix-socket "${API_SOCKET}" --write-out "HTTP %{http_code}") + +# Create a user and group to run the execution via for one micro-vm +# TODO(johnrwatson): There is a edge case where this will clash with an already existing user + will fail +# root@ip-10-1-29-58:/firecracker-data# useradd -M -u 10000$SB_ID jailer-$SB_ID +# useradd: user 'jailer-20359' already exists +useradd -M -u 10000$SB_ID jailer-$SB_ID +usermod -L jailer-$SB_ID + +# This was created earlier on the machine provisioning +# groupadd -g 10000 jailer-processes +usermod -a -G jailer-processes jailer-$SB_ID +usermod -a -G root jailer-$SB_ID +usermod -a -G kvm jailer-$SB_ID + +curl_put() { + local URL_PATH="$1" + local OUTPUT RC + OUTPUT="$("${CURL[@]}" -X PUT --data @- "http://localhost/${URL_PATH#/}" 2>&1)" + RC="$?" + if [ "$RC" -ne 0 ]; then + echo "Error: curl PUT ${URL_PATH} failed with exit code $RC, output:" + echo "$OUTPUT" + return 1 + fi + # Error if output doesn't end with "HTTP 2xx" + if [[ "$OUTPUT" != *HTTP\ 2[0-9][0-9] ]]; then + echo "Error: curl PUT ${URL_PATH} failed with non-2xx HTTP status code, output:" + echo "$OUTPUT" + return 1 + fi +} + +logfile="/srv/jailer/firecracker/$SB_ID/root/fc-sb${SB_ID}-log" +metricsfile="/srv/jailer/firecracker/$SB_ID/root/fc-sb${SB_ID}-metrics" + +mkdir -p /srv/jailer/firecracker/$SB_ID/root/ +touch "$logfile" +touch "$metricsfile" + +# Simlink in the rootfs and kernel files +# TODO(johnrwatson): Figure out how to avoid this, it's a total waste of resources +cp $KERNEL /srv/jailer/firecracker/$SB_ID/root/image-kernel.bin +cp $RO_DRIVE /srv/jailer/firecracker/$SB_ID/root/rootfs.ext4 + +chown -R jailer-$SB_ID:jailer-$SB_ID /srv/jailer/firecracker/$SB_ID/root/ + +# Create network namespace for jailer incantation +ip netns add jailer-$SB_ID + +# Setup TAP device that uses proxy ARP +MASK_LONG="255.255.255.252" +MASK_SHORT="/30" +FC_IP="10.0.0.1" # Intentionally hardcoded to make cross-microvm communication +TAP_IP="10.0.0.2" # more difficult & to simplify rootfs creation/configuration +FC_MAC="$(printf '02:FC:00:00:%02X:%02X' $((SB_ID / 256)) $((SB_ID % 256)))" +ip netns exec jailer-$SB_ID ip link del "$TAP_DEV" 2> /dev/null || true +ip netns exec jailer-$SB_ID ip tuntap add dev "$TAP_DEV" mode tap + +# Disable ipv6, enable Proxy ARP +ip netns exec jailer-$SB_ID sysctl -w net.ipv4.conf.${TAP_DEV}.proxy_arp=1 > /dev/null +ip netns exec jailer-$SB_ID sysctl -w net.ipv6.conf.${TAP_DEV}.disable_ipv6=1 > /dev/null + +# Add IP to TAP for micro-vm +ip netns exec jailer-$SB_ID ip addr add "${TAP_IP}${MASK_SHORT}" dev "$TAP_DEV" +ip netns exec jailer-$SB_ID ip link set dev "$TAP_DEV" up + +NET_LINK_MAIN_IP="$(printf '100.65.%s.%s' $(((4 * SB_ID + 1) / 256)) $(((4 * SB_ID + 1) % 256)))" +NET_LINK_JAILER_IP="$(printf '100.65.%s.%s' $(((4 * SB_ID + 2) / 256)) $(((4 * SB_ID + 2) % 256)))" + +# Set up IP link into default namespace for external routing +ip link add veth-main$1 type veth peer name veth-jailer$1 +ip link set veth-jailer$1 netns jailer-$1 +ip addr add $NET_LINK_MAIN_IP/30 dev veth-main$1 +ip netns exec jailer-$1 ip addr add $NET_LINK_JAILER_IP/30 dev veth-jailer$1 + +# Bring the veth link up for external routing +ip link set dev veth-main$1 up +ip netns exec jailer-$1 ip link set dev veth-jailer$1 up +ip netns exec jailer-$1 ip route add default via $NET_LINK_MAIN_IP +ip netns exec jailer-$1 ip route + +# NAT within the namespace to route return traffic to TAP device of firecracker process for inbound traffic +ip netns exec jailer-$1 iptables -t nat -A POSTROUTING -o veth-jailer$1 -j MASQUERADE +ip netns exec jailer-$1 iptables -A FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT +ip netns exec jailer-$1 iptables -A FORWARD -i fc-$1-tap0 -o veth-jailer$1 -j ACCEPT + +KERNEL_BOOT_ARGS="${KERNEL_BOOT_ARGS} ip=${FC_IP}::${TAP_IP}:${MASK_LONG}::eth0:off" + +# Start Firecracker API server +rm -f "$API_SOCKET" || echo "socket missing" + +# Example FC invocation directly +#"${FC_BINARY}" --api-sock "$API_SOCKET" --id "${SB_ID}" --boot-timer >> "$logfile" & + +# TODO(johnrwatson): We don't use proper cgroup isolation, we probably want this in the future +"${JAILER_BINARY}" --cgroup-version 2 --id $SB_ID --exec-file /usr/bin/firecracker --uid 10000$SB_ID --gid 10000 --netns /var/run/netns/jailer-$SB_ID --new-pid-ns -- --boot-timer >> "$logfile" & + +sleep 0.015s + +# Wait for API server to start +while [ ! -e "$API_SOCKET" ]; do + echo "FC $SB_ID still not ready..." + sleep 0.01s +done + +curl_put '/logger' < /dev/null || true + +# Remove veth devices +ip link del veth-main$SB_ID 2> /dev/null || true +ip link del veth-jailer$SB_ID 2> /dev/null || true + +# Remove iptables rules +ip netns exec jailer-$SB_ID iptables -t nat -D POSTROUTING -o veth-jailer$SB_ID -j MASQUERADE +ip netns exec jailer-$SB_ID iptables -D FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT +ip netns exec jailer-$SB_ID iptables -D FORWARD -i fc-$SB_ID-tap0 -o veth-jailer$SB_ID -j ACCEPT + +# Remove network namespace +ip netns del jailer-$SB_ID + +# Remove user and group +userdel jailer-$SB_ID + +# Remove directories and files +rm -rf /srv/jailer/firecracker/$SB_ID \ No newline at end of file