Freax13 · Freax13 · Oct 22, 2024 · Oct 20, 2024 · Oct 20, 2024 · Oct 22, 2024
diff --git a/README.md b/README.md
@@ -44,6 +44,8 @@ It should be impossible for the attacker to tamper with any of these without the
 
 There is no way for the workloads to communicate with the host, external devices or external services. The only input to the workload is the input file provided to it.
 
+More details can be found [here](docs/attestation.md).
+
 ### Non-goals
 
 The workload kernel has not been hardened against attacks from within the workload itself, only the outputs of trusted workloads and inputs should be considered trusted. 

diff --git a/common/io/src/input.rs b/common/io/src/input.rs
@@ -1,19 +1,42 @@
-use bytemuck::{bytes_of, Pod, Zeroable};
-use sha2::{Digest, Sha256};
+use bytemuck::{bytes_of, CheckedBitPattern, NoUninit};
+use sha2::{Digest, Sha256, Sha384};
 
-#[derive(Debug, Clone, Copy, Pod, Zeroable)]
+pub const MAX_HASH_SIZE: usize = 48;
+
+#[derive(Debug, Clone, Copy, CheckedBitPattern, NoUninit, PartialEq, Eq)]
 #[repr(C)]
 pub struct Header {
-    pub input_len: usize,
-    pub hash: [u8; 32],
+    pub input_len: u64,
+    pub hash_type: HashType,
+    pub hash: [u8; MAX_HASH_SIZE],
+    pub next_hash: [u8; 32],
 }
 
 impl Header {
-    pub fn new(bytes: &[u8]) -> Self {
-        let hash = Sha256::digest(bytes);
+    pub fn new(bytes: &[u8], hash_type: HashType, next: &Self) -> Self {
+        Self {
+            input_len: bytes.len() as u64,
+            hash_type,
+            hash: hash_type.hash(bytes),
+            next_hash: next.hash(),
+        }
+    }
+
+    pub fn without_hash(bytes: &[u8]) -> Self {
+        Self {
+            input_len: bytes.len() as u64,
+            hash_type: HashType::Sha256,
+            hash: [0; MAX_HASH_SIZE],
+            next_hash: [0; 32],
+        }
+    }
+
+    pub const fn end() -> Self {
         Self {
-            input_len: bytes.len(),
-            hash: <[u8; 32]>::from(hash),
+            input_len: !0,
+            hash_type: HashType::Sha256,
+            hash: [0; MAX_HASH_SIZE],
+            next_hash: [0; 32],
         }
     }
 
@@ -25,3 +48,52 @@ impl Header {
         self.hash() == hash
     }
 }
+
+#[derive(Debug, Clone, Copy, CheckedBitPattern, NoUninit, PartialEq, Eq, Default)]
+#[repr(u64)]
+pub enum HashType {
+    #[default]
+    Sha256,
+    Sha384,
+}
+
+impl HashType {
+    pub fn hash(self, data: &[u8]) -> [u8; MAX_HASH_SIZE] {
+        let mut hash = [0; MAX_HASH_SIZE];
+        match self {
+            HashType::Sha256 => hash[..32].copy_from_slice(&Sha256::digest(data)),
+            HashType::Sha384 => hash[..48].copy_from_slice(&Sha384::digest(data)),
+        }
+        hash
+    }
+}
+
+pub enum Hasher {
+    Sha256(Sha256),
+    Sha384(Sha384),
+}
+
+impl Hasher {
+    pub fn new(hash_type: HashType) -> Self {
+        match hash_type {
+            HashType::Sha256 => Self::Sha256(Sha256::new()),
+            HashType::Sha384 => Self::Sha384(Sha384::new()),
+        }
+    }
+
+    pub fn update(&mut self, data: &[u8]) {
+        match self {
+            Hasher::Sha256(hasher) => hasher.update(data),
+            Hasher::Sha384(hasher) => hasher.update(data),
+        }
+    }
+
+    pub fn verify(self, hash: [u8; MAX_HASH_SIZE]) {
+        let mut bytes = [0; MAX_HASH_SIZE];
+        match self {
+            Hasher::Sha256(hasher) => bytes[..32].copy_from_slice(&hasher.finalize()),
+            Hasher::Sha384(hasher) => bytes[..48].copy_from_slice(&hasher.finalize()),
+        }
+        assert_eq!(hash, bytes, "input hash doesn't match hash in header");
+    }
+}
diff --git a/common/loader/src/init.rs b/common/loader/src/init.rs
@@ -1,6 +1,8 @@
 use std::iter::once;
 
+use bytemuck::bytes_of;
 use constants::physical_address::INIT_FILE;
+use io::input::Header;
 use snp_types::VmplPermissions;
 use x86_64::structures::paging::PhysFrame;
 
@@ -9,27 +11,34 @@ use crate::{LoadCommand, LoadCommandPayload};
 pub fn load_init(init: &[u8]) -> impl Iterator<Item = LoadCommand> + '_ {
     let start_frame = PhysFrame::from_start_address(INIT_FILE.start.start_address()).unwrap();
     let end_frame = PhysFrame::from_start_address(INIT_FILE.end.start_address()).unwrap();
-    let mut frames = PhysFrame::range(start_frame, end_frame);
+    let frames = PhysFrame::range(start_frame, end_frame);
 
+    let start_header = Header::without_hash(init);
     let mut bytes = [0; 0x1000];
-    bytes[..8].copy_from_slice(&init.len().to_ne_bytes());
+    bytes[..size_of::<Header>()].copy_from_slice(bytes_of(&start_header));
+    let start_header_payload = LoadCommandPayload::Normal(bytes);
 
-    let physical_address = frames.next().unwrap();
-    let header_load = LoadCommand {
-        physical_address,
-        vmpl1_perms: VmplPermissions::READ,
-        payload: LoadCommandPayload::Normal(bytes),
-    };
+    let end_header = Header::end();
+    let mut bytes = [0; 0x1000];
+    bytes[..size_of::<Header>()].copy_from_slice(bytes_of(&end_header));
+    let end_header_payload = LoadCommandPayload::Normal(bytes);
 
-    once(header_load).chain(init.chunks(0x1000).map(move |chunk| {
-        let mut bytes = [0; 0x1000];
-        bytes[..chunk.len()].copy_from_slice(chunk);
+    let payloads = once((VmplPermissions::READ, start_header_payload))
+        .chain(init.chunks(0x1000).map(|chunk| {
+            let mut bytes = [0; 0x1000];
+            bytes[..chunk.len()].copy_from_slice(chunk);
+            (
+                VmplPermissions::READ | VmplPermissions::EXECUTE_USER,
+                LoadCommandPayload::Normal(bytes),
+            )
+        }))
+        .chain(once((VmplPermissions::READ, end_header_payload)));
 
-        let physical_address = frames.next().unwrap();
-        LoadCommand {
+    payloads
+        .zip(frames)
+        .map(|((vmpl1_perms, payload), physical_address)| LoadCommand {
             physical_address,
-            vmpl1_perms: VmplPermissions::READ | VmplPermissions::EXECUTE_USER,
-            payload: LoadCommandPayload::Normal(bytes),
-        }
-    }))
+            vmpl1_perms,
+            payload,
+        })
 }
diff --git a/common/loader/src/input.rs b/common/loader/src/input.rs
@@ -2,25 +2,52 @@ use std::{iter::once, mem::size_of};
 
 use bytemuck::bytes_of;
 use constants::physical_address::INPUT_FILE;
-use io::input::Header;
+use io::input::{HashType, Header};
 use snp_types::VmplPermissions;
 use x86_64::structures::paging::PhysFrame;
 
 use crate::{LoadCommand, LoadCommandPayload};
 
-pub fn load_input(input: &[u8]) -> (impl Iterator<Item = LoadCommand> + '_, [u8; 32]) {
-    let header = Header::new(input);
+pub struct Input<T> {
+    pub bytes: T,
+    pub hash_type: HashType,
+}
+
+pub fn load_input(
+    inputs: &[Input<impl AsRef<[u8]>>],
+) -> (impl Iterator<Item = LoadCommand> + '_, [u8; 32]) {
+    let mut header = Header::end();
+    let mut headers = inputs
+        .iter()
+        .rev()
+        .map(|input| {
+            header = Header::new(input.bytes.as_ref(), input.hash_type, &header);
+            header
+        })
+        .collect::<Vec<_>>();
+    headers.reverse();
 
-    let payloads = once(LoadCommandPayload::Shared({
-        let mut bytes = [0; 0x1000];
-        bytes[..size_of::<Header>()].copy_from_slice(bytes_of(&header));
-        bytes
-    }))
-    .chain(input.chunks(0x1000).map(|chunk| {
-        let mut bytes = [0; 0x1000];
-        bytes[..chunk.len()].copy_from_slice(chunk);
-        LoadCommandPayload::Shared(bytes)
-    }));
+    let payloads = headers
+        .into_iter()
+        .zip(inputs)
+        .flat_map(|(header, input)| {
+            once(LoadCommandPayload::Shared({
+                let mut bytes = [0; 0x1000];
+                bytes[..size_of::<Header>()].copy_from_slice(bytes_of(&header));
+                bytes
+            }))
+            .chain(input.bytes.as_ref().chunks(0x1000).map(|chunk| {
+                let mut bytes = [0; 0x1000];
+                bytes[..chunk.len()].copy_from_slice(chunk);
+                LoadCommandPayload::Shared(bytes)
+            }))
+        })
+        .chain(once(LoadCommandPayload::Shared({
+            let header = Header::end();
+            let mut bytes = [0; 0x1000];
+            bytes[..size_of::<Header>()].copy_from_slice(bytes_of(&header));
+            bytes
+        })));
 
     let start_frame = PhysFrame::from_start_address(INPUT_FILE.start.start_address()).unwrap();
     let end_frame = PhysFrame::from_start_address(INPUT_FILE.end.start_address()).unwrap();

diff --git a/common/loader/src/lib.rs b/common/loader/src/lib.rs
@@ -10,6 +10,9 @@ mod input;
 mod kernel;
 mod supervisor;
 
+pub use input::Input;
+pub use io::input::HashType;
+
 #[derive(Debug)]
 pub struct LoadCommand {
     pub physical_address: PhysFrame,
@@ -69,10 +72,10 @@ pub fn generate_load_commands<'a>(
     kernel: &'a [u8],
     init: &'a [u8],
     load_kasan_shadow_mappings: bool,
-    input: &'a [u8],
+    inputs: &'a [Input<impl AsRef<[u8]>>],
 ) -> (impl Iterator<Item = LoadCommand> + 'a, [u8; 32]) {
     let base_load_commands =
         generate_base_load_commands(supervisor, kernel, init, load_kasan_shadow_mappings);
-    let (load_input, host_data) = input::load_input(input);
+    let (load_input, host_data) = input::load_input(inputs);
     (base_load_commands.chain(load_input), host_data)
 }
diff --git a/docs/attestation.md b/docs/attestation.md
@@ -0,0 +1,113 @@
+# Attestation
+
+Attestation is used to prove that a certain set of supervisor, kernel, workload init binary, and workload input were used to produce a certain output.
+If any binary or input is changed, the change will be visible in the attestation report.
+Notably, this includes malicious workload inputs:
+Even if a malicious input manages to exploit the workload, it is impossible for the exploit to generate an attestation report that doesn't include the malicious workload input.
+Attestation reports are signed by the hardware root of trust.
+
+## Hardware-measured input memory
+
+The supervisor, the kernel, and the workload init binary are added directly to the initial guest memory during launch.
+On AMD SEV-SNP, this memory is added using `SNP_LAUNCH_UPDATE`.
+On Intel TDX, this memory is added using `MEM.PAGE.ADD` and `MR.EXTEND`.
+
+The hardware derives a launch measurement from the initial guest memory.
+This launch measurement never changes unless the supervisor, the kernel, or the init binary changes.
+On AMD SEV-SNP, the launch measurement is stored in the `MEASUREMENT` field in the attestation report.
+On Intel TDX, the launch measurement is stored in the `MRTD` field in the TD quote.
+The launch measurement is independent of the workload input and output.
+
+`mushroom verify` computes the launch measurement for a given supervisor, kernel, and workload init binary and verifies that it matches the value in the attestation report.
+
+#### Details
+
+The initial memory is assembled by [`loader`](../common/loader/) sub-crate.
+We use special linker scripts for the supervisor and kernel that explicitly specify physical addresses for all segments.
+The loader parses the ELF binaries and generates load commands for each segment at the specified physical addresses.
+On AMD SEV-SNP, the loader uses the segment permissions in the kernel binary as the VMPL 1 permissions used in `SNP_LAUNCH_UPDATE`.
+On Intel TDX, the loader cannot add permissions for the L2 VM, so this is done by the supervisor during boot.
+
+## Supervisor-measured input memory
+
+The workload input is measured and verified by the supervisor before the workload is started.
+
+The workload input is initially stored in unmeasured shared memory.
+As the supervisor reads and verifies the input, it converts it into private memory.
+The supervisor never interprets the input in any way, it only passes it forward to the workload kernel.
+
+The supervisor verifies that the input matches a hypervisor-supplied hash.
+This hash is also part of the attestation report.
+Because the hash is part of the attestation report, this hypervisor can't change the input hash without this being visible in the attestation report, so the hash isn't considered an untrusted input even though it's supplied by the hypervisor.
+On AMD SEV-SNP, the input hash is stored in the `HOST_DATA` field in the attestation report.
+On Intel TDX, the input hash is stored in the first 32 bytes of the `MRCONFIGID` field in the TD quote.
+
+`mushroom verify` computes the hash for a given input and verifies that it matches the value in the attestation report.
+
+#### Details
+
+Mushroom allows the input to be split up into multiple chunks.
+Chunks are placed one after another in memory.
+
+Each input chunk is preceded by a header containing the chunk length, its hash, the hash type, and the hash of the next chunk header:
+```rust
+#[repr(C)]
+pub struct Header {
+    pub input_len: u64,
+    pub hash_type: HashType,
+    pub hash: [u8; MAX_HASH_SIZE],
+    pub next_hash: [u8; 32],
+}
+```
+The first header is verified by hashing it with SHA-256 and comparing the digest to the hash in the attestation report (`HOST_DATA` or `MRCONFIGID`).
+The following header(s) are verified by hashing it with SHA-256 and comparing the digest to the `next_hash` in the previous header.
+Note that `hash` contains the digest of the chunk content whereas `next_hash` contains the digest of the next chunk header (**not** the next chunk content).
+Because each chunk header contains the digest of the next chunk header and is therefore dependent on its content, the hashes have to be calculated from back to front, but the supervisor can verify the hashes from front to back.
+
+The last chunk is marked with `input_len` being equal to `0xffff_ffff_ffff_ffff`.
+
+### Why isn't the workload input measured as part of the launch measurement?
+
+1. Separation of concerns: mushroom is designed for use cases where the relying party will verify many attestation reports for the same workload with different inputs.
+   Separating the workload from its input simplifies the computations needed to verify attestation reports.
+2. Performance: Adding memory to the launch measurement is fairly slow on AMD SEV-SNP.
+   A large input could cause significant performance problems.
+
+## Output
+
+The workload output is hashed using SHA-256 and its digest placed into the attestation report.
+The output size is also placed into the attestation report.
+On AMD SEV-SNP, the output digest and size are placed in the `REPORT_DATA` field in the attestation report.
+On Intel TDX, the output digest and size are placed in the `REPORTDATA` field in the TD quote.
+
+Note that on both AMD SEV-SNP and Intel TDX, the `REPORT_DATA` and `REPORTDATA` fields are the only fields that can be influenced by the workload at runtime.
+The workload cannot influence any other fields including the `MEASUREMENT`, `MRTD`, `HOST_DATA`, and `MRCONFIGID` fields as all of these are protected by the hardware.
+If a malicous party were to change the supervisor, kernel, workload init binary or workload input that in such a way that it gains code execution within workload, the hardware will prevent the attacker from creating attestation reports that don't reflect the changed binaries/input.
+
+`mushroom verify` computes the hash for a given output and verifies that the digest and output size match the values in the attestation report.
+
+## Attestation report formats
+
+### AMD SEV-SNP
+
+On AMD SEV-SNP, the attestation report returned by mushroom contains an attestation report created by the hardware concatenated with the VCEK certificate that proves that the attestation report was generated by real hardware.
+`mushroom verify` checks that the public key in the VCEK matches the signature in the attestation report and checks that the VCEK was signed by one of the built-in ASKs [^1].
+
+[^1]: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/57230.pdf
+
+### Intel TDX
+
+On Intel TDX, the attestation report is just a normal TD quote version 4 [^2].
+
+Note that the supervisor outputs a TD report, not a TD quote.
+It's the responsibility of the mushroom VMM to talk to the quote generation service running on the host to turn the TD report into a full TD quote.
+This doesn't need to be done inside the TD guest.
+
+[^2]: https://download.01.org/intel-sgx/latest/dcap-latest/linux/docs/Intel_TDX_DCAP_Quoting_Library_API.pdf
+
+## Policies & SVNs
+
+Mushroom verifies the trustworthiness of the TEE hardware/firmware by checking the policies and SVNs in attestation reports.
+The mushroom CLI uses reasonable defaults for policies.
+The default SVN minimums match the latest available SVNs at the time the mushroom is compiled, but these may become outdated when new TEE firmwares are released.
+Library users have to specify their own allowed policy flags and minimum SVNs.
diff --git a/host/mushroom-verify/Cargo.toml b/host/mushroom-verify/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"
 
 [dependencies]
 bytemuck = { version = "1.15.0", features = ["derive", "min_const_generics"], optional = true }
-loader = { workspace = true, optional = true }
+loader = { workspace = true }
 io = { workspace = true }
 p384 = { version = "0.13.0", optional = true }
 sha2 = "0.10.8"
@@ -16,5 +16,5 @@ x86_64 = { version = "0.15.1", default-features = false, optional = true }
 
 [features]
 default = ["snp", "tdx"]
-snp = ["dep:bytemuck", "dep:p384", "dep:loader", "dep:snp-types", "dep:vcek-kds"]
-tdx = ["dep:loader", "dep:tdx-types", "dep:x86_64"]
+snp = ["dep:bytemuck", "dep:p384", "dep:snp-types", "dep:vcek-kds"]
+tdx = ["dep:tdx-types", "dep:x86_64"]