diff --git a/component/firecracker-base/orchestrate-install.sh b/component/firecracker-base/orchestrate-install.sh old mode 100644 new mode 100755 index b3d31f6182..ebbb57086b --- a/component/firecracker-base/orchestrate-install.sh +++ b/component/firecracker-base/orchestrate-install.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Call this function with: +# Call this function with: # ./orchestrate-install.sh set -eo pipefail @@ -27,15 +27,16 @@ check_os_release() { echo "Info: /etc/os-release shown below:" cat /etc/os-release echo "------------------------------------" - [[ "$(cat /etc/os-release | grep 'CentOS Linux release 7')" ]] && export OS_VARIANT=centos-7 && return 0 - [[ "$(cat /etc/os-release | grep 'CentOS Stream release 8')" ]] && export OS_VARIANT=centos-stream-8 && return 0 + [[ "$(cat /etc/os-release | grep 'CentOS Linux release 7')" ]] && export OS_VARIANT=centos-7 && return 0 + [[ "$(cat /etc/os-release | grep 'CentOS Stream release 8')" ]] && export OS_VARIANT=centos-stream-8 && return 0 [[ "$(cat /etc/os-release | grep 'Rocky Linux release 8')" ]] && export OS_VARIANT=rocky-8 && return 0 [[ "$(cat /etc/os-release | grep 'Red Hat Enterprise Linux Server release 7')" ]] && export OS_VARIANT=redhat-7 && return 0 [[ "$(cat /etc/os-release | grep 'Red Hat Enterprise Linux release 8')" ]] && export OS_VARIANT=redhat-8 && return 0 [[ "$(cat /etc/os-release | grep 'Amazon Linux release 2')" ]] && export OS_VARIANT=amazon-linux-2 && return 0 [[ "$(cat /etc/os-release | grep ^NAME | grep Fedora)" ]] && export OS_VARIANT=fedora && return 0 [[ "$(cat /etc/os-release | grep ^NAME | grep Ubuntu)" ]] && export OS_VARIANT=ubuntu && return 0 - [[ "$(cat /etc/os-release | grep ^NAME | grep Debian)" ]] && export OS_VARIANT=debian && return 0 + [[ "$(cat /etc/os-release | grep ^NAME | grep -i pop!_os )" ]] && export OS_VARIANT=ubuntu && return 0 + [[ "$(cat /etc/os-release | grep ^NAME | grep Debian)" ]] && export OS_VARIANT=debian && return 0 [[ "$(cat /etc/os-release | grep ^NAME | grep Mint)" ]] && export OS_VARIANT=mint && return 0 echo "Error: Operating system could not be determined or is unsupported, could not configure the OS for firecracker node" && exit 1 @@ -48,7 +49,7 @@ install_pre_reqs() { case $OS_VARIANT in centos-7) # Insert OS specific setup steps here - sudo yum -v update -y + sudo yum -v update -y ;; redhat-7) # Insert OS specific setup steps here @@ -94,34 +95,39 @@ execute_configuration_management() { # < limits here > - will do later # Update Process Limits - echo -e "\jailer-shared soft nproc 16384\jailer-shared hard nproc 16384\n" | sudo tee -a /etc/security/limits.conf + if ! grep -Fxq "jailer-shared" /etc/security/limits.conf; then + echo -e "\jailer-shared soft nproc 16384\jailer-shared hard nproc 16384\n" | sudo tee -a /etc/security/limits.conf + fi - # Mount secondary EBS volume at /data for + # Mount secondary EBS volume at /data for mkdir -p /firecracker-data/output/ && cd /firecracker-data/ # Helper Scripts - curl https://raw.githubusercontent.com/systeminit/si-firecracker-config/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/start.sh > ./start.sh - curl https://raw.githubusercontent.com/systeminit/si-firecracker-config/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/stop.sh > ./stop.sh + curl https://raw.githubusercontent.com/systeminit/si/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/component/firecracker-base/start.sh > ./start.sh + curl https://raw.githubusercontent.com/systeminit/si/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/component/firecracker-base/stop.sh > ./stop.sh + curl https://raw.githubusercontent.com/systeminit/si/${CONFIGURATION_MANAGEMENT_BRANCH:-main}/component/firecracker-base/prepare_jailer.sh > ./prepare_jailer.sh # Remainder of the binaries - wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/rootfs.ext4 - wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/image-kernel.bin - wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/firecracker - wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/jailer + wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/rootfs.ext4 -O ./rootfs.ext4 + wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/image-kernel.bin -O ./image-kernel.bin + wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/firecracker -O ./firecracker + wget https://si-tools-prod-ec2-firecracker-config.s3.amazonaws.com/firecracker/latest/jailer -O ./jailer # TODO(johnrwatson): Currently not used but we could maybe make dynamic keys for each micro-vm (or use something like aws ssm/tailscale) # This is a bit of a poor attempt to setup a child key, but will do until we have this properly working - if [[ -z "$FIRECRACKER_SSH_KEY" ]]; then - ssh-keygen -b 2048 -t rsa -f /firecracker-data/micro-vm-key -q -N "" - else - mv $FIRECRACKER_SSH_KEY /firecracker-data/micro-vm-key - fi + # if [[ -z "$FIRECRACKER_SSH_KEY" ]]; then + # ssh-keygen -b 2048 -t rsa -f /firecracker-data/micro-vm-key -q -N "" + # else + # mv $FIRECRACKER_SSH_KEY /firecracker-data/micro-vm-key -f + # fi # Create a user and group to run firecracker/jailer with & another group for the shared folders - useradd -M jailer-shared - usermod -L jailer-shared - groupadd -g 10000 jailer-processes - usermod -a -G jailer-processes jailer-shared + if ! id jailer-shared >/dev/null 2>&1; then + useradd -M jailer-shared + usermod -L jailer-shared + groupadd -g 10000 jailer-processes + usermod -a -G jailer-processes jailer-shared + fi # Set up correct permissions for the /firecracker-data/ folder chown -R jailer-shared:jailer-shared /firecracker-data/ @@ -133,7 +139,7 @@ execute_configuration_management() { cp ./jailer /usr/bin/jailer # Load kernel module - modprobe kvm_intel + modprobe kvm_intel || echo "loading AMD instead" || modprobe kvm_amd # TODO(johnrwatson): Can do better than this, needs review chmod 777 /dev/kvm @@ -153,7 +159,7 @@ execute_configuration_management() { iptables -t nat -A POSTROUTING -o enp4s0 -j MASQUERADE # Adjust MTU to make it consistent - ip link set dev enp4s0 mtu 1500 + ip link set dev $(ip route get 8.8.8.8 | awk -- '{printf $5}') mtu 1500 else echo "Error: Unsupported or unknown configuration management tool specified, exiting." @@ -164,6 +170,7 @@ execute_configuration_management() { } + execute_cleanup() { case $OS_VARIANT in @@ -198,18 +205,35 @@ execute_cleanup() { *) echo "Error: Something went wrong during cleanup, OS_VARIANT set to: $OS_VARIANT" && exit 1 ;; - esac + esac rm -Rf /tmp/firecracker-install/* - + +} + +prepare_jailers() { + if test -f "./prepare_jailer.sh"; then + ITERATIONS="${1:-100}" # Default to 100 jails + echo "Creating $ITERATIONS jails..." + for (( iter=0; iter<$ITERATIONS; iter++ )) + do + ./prepare_jailer.sh $iter & + done + wait + else + echo "prepare_jailer.sh script not found, skipping jail creation." + exit 1 + fi } # ----------------------------------------- VARIABLES_FILE=$1 +JAILS_TO_CREATE=$2 check_params_set && echo -e "Installation Values found to be:\n - $VARIABLES_FILE" check_os_release && echo -e "Operating System found to be:\n - $OS_VARIANT" install_pre_reqs execute_configuration_management -execute_cleanup \ No newline at end of file +prepare_jailers $JAILS_TO_CREATE +execute_cleanup diff --git a/component/firecracker-base/prepare_jailer.sh b/component/firecracker-base/prepare_jailer.sh new file mode 100755 index 0000000000..8e4c60ccf8 --- /dev/null +++ b/component/firecracker-base/prepare_jailer.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +set -euo pipefail + +SB_ID="${1:-0}" # Default to sb_id=0 + +DATA_DIR="/firecracker-data" +JAILER_DIR="/srv/jailer/firecracker" +JAILER_BINARY="/usr/bin/jailer" + +ROOTFS="rootfs.ext4" +KERNEL="image-kernel.bin" + +RO_DRIVE="$DATA_DIR/$ROOTFS" +KERNEL_IMG="$DATA_DIR/$KERNEL" + +TAP_DEV="fc-${SB_ID}-tap0" +FC_MAC="$(printf '02:FC:00:00:%02X:%02X' $((SB_ID / 256)) $((SB_ID % 256)))" +VETH_DEV="veth-jailer$SB_ID" +JAILER_NS="jailer-$SB_ID" + +########## ############################# ######### +########## User Prep ######### +########## ############################# ######### + +# Create a user and group to run the execution via for one micro-vm +if id 10000$SB_ID >/dev/null 2>&1; then + echo "User 10000$SB_ID already exists, skipping creation" +else + echo "Creating user 10000$SB_ID" + useradd -M -u 10000$SB_ID $JAILER_NS + usermod -L $JAILER_NS + + # This was created earlier on the machine provisioning + usermod -a -G jailer-processes $JAILER_NS + usermod -a -G root $JAILER_NS + usermod -a -G kvm $JAILER_NS +fi + +########## ############################# ######### +########## Disk Prep ######### +########## ############################# ######### + +JAIL="$JAILER_DIR/$SB_ID/root" +mkdir -p "$JAIL/" + +touch $JAIL/logs +touch $JAIL/metrics + +if test -f "$JAIL/$KERNEL"; then + echo "Jailed kernel exists, skipping creation." +else + echo "Copying kernel image..." + cp -v $KERNEL_IMG "$JAIL/$KERNEL" +fi + +if test -f "$JAIL/$ROOTFS"; then + echo "Jailed rootfs exists, skipping creation." +else + echo "Copying rootfs..." + cp -v $RO_DRIVE "$JAIL/$ROOTFS" + # using device mapper for CoW should be faster. Something like this? + # OVERLAY="$JAIL/$ROOTFS" + # touch $OVERLAY + # truncate --size=1073741824 $OVERLAY + # BASE_LOOP=$(losetup --find --show --read-only $RO_DRIVE) + # LOOP=$(losetup --find --show --read-only $OVERLAY) + # BASE_SZ=$(blockdev --getsz $RO_DRIVE) + # OVERLAY_SZ=$(blockdev --getsz $OVERLAY_LOOP) + # printf "0 $BASE_SZ linear $BASE_LOOP 0\n$BASE_SZ $OVERLAY_SZ zero" | dmsetup create rootfsbase + # echo "0 $OVERLAY_SZ snapshot /dev/mapper/overlay_$SB_ID $LOOP P 8" | dmsetup create overlay_$SB_ID +fi + +chown -R jailer-$SB_ID:jailer-$SB_ID $JAIL/ + +########## ############################# ######### +########## Network Prep ######### +########## ############################# ######### + +# Create network namespace for jailer incantation +if test -f /run/netns/$JAILER_NS; then + echo "Network namespace $JAILER_NS already exists, skipping creation and configuration." +else + echo "Creating and configuring network..." + ip netns add $JAILER_NS + + MASK_LONG="255.255.255.252" + MASK_SHORT="/30" + FC_IP="10.0.0.1" # Intentionally hardcoded to make cross-microvm communication + TAP_IP="10.0.0.2" # more difficult & to simplify rootfs creation/configuration + NET_LINK_MAIN_IP="$(printf '100.65.%s.%s' $(((4 * SB_ID + 1) / 256)) $(((4 * SB_ID + 1) % 256)))" + NET_LINK_JAILER_IP="$(printf '100.65.%s.%s' $(((4 * SB_ID + 2) / 256)) $(((4 * SB_ID + 2) % 256)))" + + # Setup TAP device that uses proxy ARP + ip netns exec $JAILER_NS ip link del "$TAP_DEV" || true + ip netns exec $JAILER_NS ip tuntap add dev "$TAP_DEV" mode tap + + # Disable ipv6, enable Proxy ARP + ip netns exec $JAILER_NS sysctl -w net.ipv4.conf.${TAP_DEV}.proxy_arp=1 > /dev/null + ip netns exec $JAILER_NS sysctl -w net.ipv6.conf.${TAP_DEV}.disable_ipv6=1 > /dev/null + + # Add IP to TAP for micro-vm + ip netns exec $JAILER_NS ip addr add "${TAP_IP}${MASK_SHORT}" dev "$TAP_DEV" + ip netns exec $JAILER_NS ip link set dev "$TAP_DEV" up + + # Set up IP link into default namespace for external routing + ip link add veth-main$SB_ID type veth peer name $VETH_DEV + ip link set $VETH_DEV netns $JAILER_NS + ip addr add $NET_LINK_MAIN_IP/30 dev veth-main$SB_ID + ip netns exec $JAILER_NS ip addr add $NET_LINK_JAILER_IP/30 dev $VETH_DEV + + # Bring the veth link up for external routing + ip link set dev veth-main$SB_ID up + ip netns exec $JAILER_NS ip link set dev $VETH_DEV up + ip netns exec $JAILER_NS ip route replace default via $NET_LINK_MAIN_IP + + # NAT within the namespace to route return traffic to TAP device of firecracker process for inbound traffic + ip netns exec $JAILER_NS iptables -t nat -A POSTROUTING -o $VETH_DEV -j MASQUERADE + ip netns exec $JAILER_NS iptables -A FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + ip netns exec $JAILER_NS iptables -A FORWARD -i $TAP_DEV -o $VETH_DEV -j ACCEPT +fi + +########## ############################# ######### +########## Firecracker Prep ######### +########## ############################# ######### + +cat << EOF > $JAIL/firecracker.conf +{ + "boot-source": { + "kernel_image_path": "./$KERNEL", + "boot_args": "panic=1 pci=off nomodules reboot=k tsc=reliable quiet i8042.nokbd i8042.noaux 8250.nr_uarts=0 ipv6.disable=1" + }, + "drives": [ + { + "drive_id": "1", + "is_root_device": true, + "is_read_only": true, + "path_on_host": "./$ROOTFS" + } + ], + "machine-config": { + "vcpu_count": 1, + "mem_size_mib": 512 + }, + "network-interfaces": [{ + "iface_id": "1", + "guest_mac": "$FC_MAC", + "host_dev_name": "$TAP_DEV" + }], + "vsock":{ + "guest_cid": 3, + "uds_path": "./v.sock" + }, + "logger": { + "level": "Debug", + "log_path": "./logs", + "show_level": false, + "show_log_origin": false + }, + "metrics": { + "metrics_path": "./metrics" + } +} +EOF diff --git a/component/firecracker-base/start.sh b/component/firecracker-base/start.sh old mode 100644 new mode 100755 index c60ab19b98..a90614ed24 --- a/component/firecracker-base/start.sh +++ b/component/firecracker-base/start.sh @@ -3,179 +3,24 @@ set -euo pipefail SB_ID="${1:-0}" # Default to sb_id=0 - JAILER_BINARY="/usr/bin/jailer" -FC_BINARY="/usr/bin/jailer" -RO_DRIVE="/firecracker-data/rootfs.ext4" -KERNEL="/firecracker-data/image-kernel.bin" -TAP_DEV="fc-${SB_ID}-tap0" - -KERNEL_BOOT_ARGS="panic=1 pci=off nomodules reboot=k tsc=reliable quiet i8042.nokbd i8042.noaux 8250.nr_uarts=0 ipv6.disable=1" -#KERNEL_BOOT_ARGS="console=ttyS0 reboot=k panic=1 pci=off nomodules i8042.nokbd i8042.noaux ipv6.disable=1" - -API_SOCKET="/srv/jailer/firecracker/$SB_ID/root/run/firecracker.socket" -CURL=(curl --silent --show-error --header "Content-Type: application/json" --unix-socket "${API_SOCKET}" --write-out "HTTP %{http_code}") - -# Create a user and group to run the execution via for one micro-vm -# TODO(johnrwatson): There is a edge case where this will clash with an already existing user + will fail -# root@ip-10-1-29-58:/firecracker-data# useradd -M -u 10000$SB_ID jailer-$SB_ID -# useradd: user 'jailer-20359' already exists -useradd -M -u 10000$SB_ID jailer-$SB_ID -usermod -L jailer-$SB_ID - -# This was created earlier on the machine provisioning -# groupadd -g 10000 jailer-processes -usermod -a -G jailer-processes jailer-$SB_ID -usermod -a -G root jailer-$SB_ID -usermod -a -G kvm jailer-$SB_ID - -curl_put() { - local URL_PATH="$1" - local OUTPUT RC - OUTPUT="$("${CURL[@]}" -X PUT --data @- "http://localhost/${URL_PATH#/}" 2>&1)" - RC="$?" - if [ "$RC" -ne 0 ]; then - echo "Error: curl PUT ${URL_PATH} failed with exit code $RC, output:" - echo "$OUTPUT" - return 1 - fi - # Error if output doesn't end with "HTTP 2xx" - if [[ "$OUTPUT" != *HTTP\ 2[0-9][0-9] ]]; then - echo "Error: curl PUT ${URL_PATH} failed with non-2xx HTTP status code, output:" - echo "$OUTPUT" - return 1 - fi -} - -logfile="/srv/jailer/firecracker/$SB_ID/root/fc-sb${SB_ID}-log" -metricsfile="/srv/jailer/firecracker/$SB_ID/root/fc-sb${SB_ID}-metrics" - -mkdir -p /srv/jailer/firecracker/$SB_ID/root/ -touch "$logfile" -touch "$metricsfile" - -# Simlink in the rootfs and kernel files -# TODO(johnrwatson): Figure out how to avoid this, it's a total waste of resources -cp $KERNEL /srv/jailer/firecracker/$SB_ID/root/image-kernel.bin -cp $RO_DRIVE /srv/jailer/firecracker/$SB_ID/root/rootfs.ext4 - -chown -R jailer-$SB_ID:jailer-$SB_ID /srv/jailer/firecracker/$SB_ID/root/ - -# Create network namespace for jailer incantation -ip netns add jailer-$SB_ID - -# Setup TAP device that uses proxy ARP -MASK_LONG="255.255.255.252" -MASK_SHORT="/30" -FC_IP="10.0.0.1" # Intentionally hardcoded to make cross-microvm communication -TAP_IP="10.0.0.2" # more difficult & to simplify rootfs creation/configuration -FC_MAC="$(printf '02:FC:00:00:%02X:%02X' $((SB_ID / 256)) $((SB_ID % 256)))" -ip netns exec jailer-$SB_ID ip link del "$TAP_DEV" 2> /dev/null || true -ip netns exec jailer-$SB_ID ip tuntap add dev "$TAP_DEV" mode tap - -# Disable ipv6, enable Proxy ARP -ip netns exec jailer-$SB_ID sysctl -w net.ipv4.conf.${TAP_DEV}.proxy_arp=1 > /dev/null -ip netns exec jailer-$SB_ID sysctl -w net.ipv6.conf.${TAP_DEV}.disable_ipv6=1 > /dev/null - -# Add IP to TAP for micro-vm -ip netns exec jailer-$SB_ID ip addr add "${TAP_IP}${MASK_SHORT}" dev "$TAP_DEV" -ip netns exec jailer-$SB_ID ip link set dev "$TAP_DEV" up - -NET_LINK_MAIN_IP="$(printf '100.65.%s.%s' $(((4 * SB_ID + 1) / 256)) $(((4 * SB_ID + 1) % 256)))" -NET_LINK_JAILER_IP="$(printf '100.65.%s.%s' $(((4 * SB_ID + 2) / 256)) $(((4 * SB_ID + 2) % 256)))" - -# Set up IP link into default namespace for external routing -ip link add veth-main$1 type veth peer name veth-jailer$1 -ip link set veth-jailer$1 netns jailer-$1 -ip addr add $NET_LINK_MAIN_IP/30 dev veth-main$1 -ip netns exec jailer-$1 ip addr add $NET_LINK_JAILER_IP/30 dev veth-jailer$1 - -# Bring the veth link up for external routing -ip link set dev veth-main$1 up -ip netns exec jailer-$1 ip link set dev veth-jailer$1 up -ip netns exec jailer-$1 ip route add default via $NET_LINK_MAIN_IP -ip netns exec jailer-$1 ip route - -# NAT within the namespace to route return traffic to TAP device of firecracker process for inbound traffic -ip netns exec jailer-$1 iptables -t nat -A POSTROUTING -o veth-jailer$1 -j MASQUERADE -ip netns exec jailer-$1 iptables -A FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT -ip netns exec jailer-$1 iptables -A FORWARD -i fc-$1-tap0 -o veth-jailer$1 -j ACCEPT - -KERNEL_BOOT_ARGS="${KERNEL_BOOT_ARGS} ip=${FC_IP}::${TAP_IP}:${MASK_LONG}::eth0:off" - -# Start Firecracker API server -rm -f "$API_SOCKET" || echo "socket missing" - -# Example FC invocation directly -#"${FC_BINARY}" --api-sock "$API_SOCKET" --id "${SB_ID}" --boot-timer >> "$logfile" & - -# TODO(johnrwatson): We don't use proper cgroup isolation, we probably want this in the future -"${JAILER_BINARY}" --cgroup-version 2 --id $SB_ID --exec-file /usr/bin/firecracker --uid 10000$SB_ID --gid 10000 --netns /var/run/netns/jailer-$SB_ID --new-pid-ns -- --boot-timer >> "$logfile" & - -sleep 0.015s - -# Wait for API server to start -while [ ! -e "$API_SOCKET" ]; do - echo "FC $SB_ID still not ready..." - sleep 0.01s -done - -curl_put '/logger' < /dev/null || true diff --git a/lib/deadpool-cyclone/src/instance/cyclone/local_uds.rs b/lib/deadpool-cyclone/src/instance/cyclone/local_uds.rs index cce8316234..ab9e80a2ef 100644 --- a/lib/deadpool-cyclone/src/instance/cyclone/local_uds.rs +++ b/lib/deadpool-cyclone/src/instance/cyclone/local_uds.rs @@ -697,7 +697,7 @@ impl LocalFirecrackerRuntime { // over, ensuring that we do cleanup along the way // Obviously this has the potential to clash, but overall the risk here is fairly low // assuming that cleanup works as expected ;) - let vm_id: String = thread_rng().gen_range(0..5000).to_string(); + let vm_id: String = thread_rng().gen_range(0..100).to_string(); let sock = PathBuf::from(&format!("/srv/jailer/firecracker/{}/root/v.sock", vm_id)); Ok(Box::new(LocalFirecrackerRuntime {