diff --git a/common/constants/src/lib.rs b/common/constants/src/lib.rs index a340ce2c..89150a78 100644 --- a/common/constants/src/lib.rs +++ b/common/constants/src/lib.rs @@ -332,6 +332,10 @@ impl IntoIterator for PageRange { } } +// Irq vectors used in the kernel. +pub const TLB_VECTOR: u8 = 0x20; +pub const TIMER_VECTOR: u8 = 0x21; + #[cfg(test)] fn check_ranges(ranges: &[PageRange]) where diff --git a/common/tdx-types/src/tdcall.rs b/common/tdx-types/src/tdcall.rs index 3df87bbf..2dcf568b 100644 --- a/common/tdx-types/src/tdcall.rs +++ b/common/tdx-types/src/tdcall.rs @@ -73,6 +73,17 @@ impl MdFieldId { true, ); + pub const TDVPS_TSC_DEADLINE: Self = Self::new( + 89, + ElementSizeCode::SixtyFour, + 0, + 0, + false, + ContextCode::TdVcpu, + 32, + true, + ); + #[allow(clippy::too_many_arguments)] const fn new( field_code: u32, @@ -217,16 +228,6 @@ impl Apic { self.0[0x20 / 4].store(value, Ordering::SeqCst); } - /// Returns the highest priority requested interrupt. - pub fn pending_vectora_todo(&self) -> Option { - (0..8).rev().find_map(|i| { - let offset = 0x100 | (i * 16); - let idx = offset / 4; - let irr = self.0[idx].load(Ordering::SeqCst); - (irr != 0).then(|| i as u8 * 32 + 31 - irr.leading_zeros() as u8) - }) - } - /// Returns the highest priority requested interrupt. pub fn pending_vector(&self) -> Option { (0..8).rev().find_map(|i| { diff --git a/common/tdx-types/src/vmexit.rs b/common/tdx-types/src/vmexit.rs index a20fd99a..1bee184f 100644 --- a/common/tdx-types/src/vmexit.rs +++ b/common/tdx-types/src/vmexit.rs @@ -2,3 +2,4 @@ pub const VMEXIT_REASON_CPUID_INSTRUCTION: u32 = 10; pub const VMEXIT_REASON_HLT_INSTRUCTION: u32 = 12; pub const VMEXIT_REASON_VMCALL_INSTRUCTION: u32 = 18; pub const VMEXIT_REASON_MSR_WRITE: u32 = 32; +pub const VMEXIT_REASON_PREEMPTION_TIMER_EXPIRED: u32 = 52; diff --git a/host/mushroom/Cargo.toml b/host/mushroom/Cargo.toml index 739ca89e..749551b6 100644 --- a/host/mushroom/Cargo.toml +++ b/host/mushroom/Cargo.toml @@ -24,7 +24,7 @@ constants = { workspace = true } loader = { workspace = true } log-types = { workspace = true, features = ["std"] } mushroom-verify = { workspace = true, optional = true } -nix = { version = "0.29.0", features = ["fs", "ioctl", "mman", "pthread", "resource", "signal"] } +nix = { version = "0.29.0", features = ["fs", "ioctl", "mman", "pthread", "resource", "signal", "time"] } profiler-types = { workspace = true } qgs-client = { workspace = true, optional = true } rand = "0.8.5" diff --git a/host/mushroom/src/insecure.rs b/host/mushroom/src/insecure.rs index 12b8d2dc..ca0000ce 100644 --- a/host/mushroom/src/insecure.rs +++ b/host/mushroom/src/insecure.rs @@ -16,13 +16,23 @@ use anyhow::{bail, Context, Result}; use bit_field::BitField; use constants::{ physical_address::{kernel, supervisor, DYNAMIC_2MIB}, - INSECURE_SUPERVISOR_CALL_PORT, MAX_APS_COUNT, + INSECURE_SUPERVISOR_CALL_PORT, MAX_APS_COUNT, TIMER_VECTOR, }; use loader::Input; -use nix::sys::pthread::pthread_kill; +use nix::{ + sys::{ + pthread::pthread_kill, + signal::SigEvent, + time::TimeSpec, + timer::{Timer, TimerSetTimeFlags}, + }, + time::ClockId, + unistd::gettid, +}; use snp_types::PageType; use supervisor_services::{SlotIndex, SupervisorCallNr}; use tracing::info; +use volatile::map_field; use x86_64::registers::{ control::{Cr0Flags, Cr4Flags}, model_specific::EferFlags, @@ -45,6 +55,8 @@ static YMM_OFFSET: LazyLock = LazyLock::new(|| { res.ebx as usize }); +const TIMER_PERIOD: Duration = Duration::from_millis(10); + /// Create the VM, load the kernel, init & input and run the APs. pub fn main( kvm_handle: &KvmHandle, @@ -293,9 +305,40 @@ fn run_kernel_vcpu( let xsave_size = *KVM_XSAVE_SIZE.get().unwrap(); let run_state = &run_states[usize::from(id)]; - run_state.wait(); + run_state.wait(Duration::MAX); + + // Setup a timer to reguluarly kick the thread out of KVM_RUN. + let mut timer = Timer::new( + ClockId::CLOCK_MONOTONIC, + SigEvent::new(nix::sys::signal::SigevNotify::SigevThreadId { + signal: SIG_KICK, + thread_id: gettid().as_raw(), + si_value: 0, + }), + )?; + timer.set( + nix::sys::timer::Expiration::Interval(TimeSpec::from_duration(TIMER_PERIOD)), + TimerSetTimeFlags::empty(), + )?; + let mut last_timer_injection = Instant::now(); + let mut in_service_timer_irq = false; while !run_state.is_stopped() { + // Check if we need to inject a timer interrupt. + if !in_service_timer_irq && last_timer_injection.elapsed() >= TIMER_PERIOD { + if map_field!(kvm_run.ready_for_interrupt_injection).read() != 0 { + map_field!(kvm_run.request_interrupt_window).write(0); + + ap.interrupt(TIMER_VECTOR)?; + + last_timer_injection = Instant::now(); + in_service_timer_irq = true; + } else { + // Ask to be notified when the guest can receive an interrupt. + map_field!(kvm_run.request_interrupt_window).write(1); + } + } + // Run the AP. let res = ap.run(); match res { @@ -321,7 +364,10 @@ fn run_kernel_vcpu( run_state.kick(); } } - nr if nr == SupervisorCallNr::Halt as u64 => run_state.wait(), + nr if nr == SupervisorCallNr::Halt as u64 => { + let timeout = TIMER_PERIOD.saturating_sub(last_timer_injection.elapsed()); + run_state.wait(timeout); + } nr if nr == SupervisorCallNr::Kick as u64 => { let index = regs.rdi as usize; run_states[index].kick(); @@ -390,6 +436,12 @@ fn run_kernel_vcpu( k }); } + KvmExit::WrMsr(msr) => match msr.index { + // EOI. + 0x80b => in_service_timer_irq = false, + index => unimplemented!("unimplemented MSR write to {index:#x}"), + }, + KvmExit::IrqWindowOpen => {} KvmExit::Interrupted => {} exit => { let regs = ap.get_regs()?; @@ -428,24 +480,28 @@ impl RunState { *self.running.lock().unwrap() == NextRunStateValue::Stopped } - pub fn wait(&self) { + pub fn wait(&self, timeout: Duration) { drop( self.condvar - .wait_while(self.running.lock().unwrap(), |state| match *state { - NextRunStateValue::Halted => { - // Keep waiting. - true - } - NextRunStateValue::Ready => { - // Consume the ready state and return. - *state = NextRunStateValue::Halted; - false - } - NextRunStateValue::Stopped => { - // Don't update the state, but return. - false - } - }) + .wait_timeout_while( + self.running.lock().unwrap(), + timeout, + |state| match *state { + NextRunStateValue::Halted => { + // Keep waiting. + true + } + NextRunStateValue::Ready => { + // Consume the ready state and return. + *state = NextRunStateValue::Halted; + false + } + NextRunStateValue::Stopped => { + // Don't update the state, but return. + false + } + }, + ) .unwrap(), ); } diff --git a/host/mushroom/src/kvm.rs b/host/mushroom/src/kvm.rs index 49e5b9ee..a5250961 100644 --- a/host/mushroom/src/kvm.rs +++ b/host/mushroom/src/kvm.rs @@ -792,6 +792,17 @@ impl VcpuHandle { Ok(()) } + pub fn interrupt(&self, vector: u8) -> Result<()> { + #[repr(transparent)] + struct KvmInterrupt(u32); + let kvm_interrupt_val = KvmInterrupt(u32::from(vector)); + + ioctl_write_ptr!(kvm_interrupt, KVMIO, 0x86, KvmInterrupt); + let res = unsafe { kvm_interrupt(self.fd.as_raw_fd(), &kvm_interrupt_val) }; + res.context("failed to interrupt")?; + Ok(()) + } + #[cfg(feature = "tdx")] unsafe fn memory_encrypt_op_tdx<'a>( &self, diff --git a/tee/kernel/src/exception.rs b/tee/kernel/src/exception.rs index d4f30dbf..d13b7cba 100644 --- a/tee/kernel/src/exception.rs +++ b/tee/kernel/src/exception.rs @@ -7,11 +7,14 @@ use core::{ ptr::null_mut, }; -use crate::memory::pagetable::flush::{tlb_shootdown_handler, TLB_VECTOR}; +use crate::memory::pagetable::flush::tlb_shootdown_handler; use crate::spin::lazy::Lazy; -use crate::user::process::syscall::cpu_state::exception_entry; +use crate::time; +use crate::user::process::syscall::cpu_state::{exception_entry, interrupt_entry}; use alloc::alloc::alloc; +use constants::{TIMER_VECTOR, TLB_VECTOR}; use log::{debug, error, trace}; +use x86_64::registers::model_specific::Msr; use x86_64::structures::gdt::SegmentSelector; use x86_64::{ instructions::tables::load_tss, @@ -146,6 +149,7 @@ pub fn load_idt() { .set_handler_fn(general_protection_fault_handler); idt.page_fault.set_handler_fn(page_fault_handler); idt[TLB_VECTOR].set_handler_fn(tlb_shootdown_handler); + idt[TIMER_VECTOR].set_handler_fn(timer_handler); idt[0x80] .set_handler_fn(int0x80_handler) @@ -328,6 +332,35 @@ extern "x86-interrupt" fn double_fault_handler(frame: InterruptStackFrame, code: panic!("double fault {frame:x?} {code:x?}"); } +#[naked] +extern "x86-interrupt" fn timer_handler(frame: InterruptStackFrame) { + unsafe { + naked_asm!( + "cld", + // Check whether the irq happened in userspace. + "test word ptr [rsp+8], 3", + "je {kernel_timer_handler}", + + // Userspace code path: + "swapgs", + // Store the error code. + "mov byte ptr gs:[{VECTOR_OFFSET}], {TIMER_VECTOR}", + // Jump to the userspace exit point. + "jmp {interrupt_entry}", + + kernel_timer_handler = sym kernel_timer_handler, + VECTOR_OFFSET = const offset_of!(PerCpu, vector), + TIMER_VECTOR = const TIMER_VECTOR, + interrupt_entry = sym interrupt_entry, + ); + } +} + +extern "x86-interrupt" fn kernel_timer_handler(_: InterruptStackFrame) { + time::try_fire_clocks(); + eoi(); +} + #[naked] extern "x86-interrupt" fn int0x80_handler(frame: InterruptStackFrame) { // The code that entered userspace stored addresses where execution should @@ -343,3 +376,10 @@ extern "x86-interrupt" fn int0x80_handler(frame: InterruptStackFrame) { ); } } + +/// Signal EOI. +pub fn eoi() { + unsafe { + Msr::new(0x80b).write(0); + } +} diff --git a/tee/kernel/src/memory/pagetable/flush.rs b/tee/kernel/src/memory/pagetable/flush.rs index b35a47a4..e86cdd77 100644 --- a/tee/kernel/src/memory/pagetable/flush.rs +++ b/tee/kernel/src/memory/pagetable/flush.rs @@ -4,7 +4,7 @@ use core::{ }; use bit_field::{BitArray, BitField}; -use constants::{ApBitmap, ApIndex, AtomicApBitmap, MAX_APS_COUNT}; +use constants::{ApBitmap, ApIndex, AtomicApBitmap, MAX_APS_COUNT, TLB_VECTOR}; use x86_64::{ instructions::tlb::{self, InvPicdCommand, Invlpgb}, registers::{ @@ -14,12 +14,10 @@ use x86_64::{ structures::{idt::InterruptStackFrame, paging::Page}, }; -use crate::{per_cpu::PerCpu, spin::lazy::Lazy}; +use crate::{exception::eoi, per_cpu::PerCpu, spin::lazy::Lazy}; use super::ActivePageTableGuard; -pub const TLB_VECTOR: u8 = 0x20; - static INVLPGB: Lazy> = Lazy::new(Invlpgb::new); static ACTIVE_APS: AtomicApBitmap = AtomicApBitmap::empty(); @@ -80,10 +78,7 @@ pub extern "x86-interrupt" fn tlb_shootdown_handler(_: InterruptStackFrame) { process_flushes(idx); - // Signal EOI. - unsafe { - Msr::new(0x80b).write(0); - } + eoi(); } fn send_tlb_ipis(aps: ApBitmap) { diff --git a/tee/kernel/src/rt.rs b/tee/kernel/src/rt.rs index dc80263e..d4f23645 100644 --- a/tee/kernel/src/rt.rs +++ b/tee/kernel/src/rt.rs @@ -3,7 +3,7 @@ use core::{ future::Future, panic::Location, pin::Pin, - task::{Context, Waker}, + task::{Context, Poll, Waker}, }; use crate::{spin::mutex::Mutex, user::schedule_vcpu}; @@ -140,3 +140,25 @@ enum TaskState { /// The task has finished. Done, } + +pub async fn r#yield() { + struct Yield { + polled: bool, + } + + impl Future for Yield { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if self.polled { + Poll::Ready(()) + } else { + self.polled = true; + cx.waker().wake_by_ref(); + Poll::Pending + } + } + } + + Yield { polled: false }.await +} diff --git a/tee/kernel/src/time.rs b/tee/kernel/src/time.rs index 6cc9a69b..c92373b0 100644 --- a/tee/kernel/src/time.rs +++ b/tee/kernel/src/time.rs @@ -119,6 +119,15 @@ pub fn now(clock: ClockId) -> Timespec { STATE.lock().read_clock(clock) } +pub fn try_fire_clocks() { + let Some(mut guard) = STATE.try_lock() else { + // Some other thread is already using the state. Don't do anything + // now. + return; + }; + guard.fire_clocks(); +} + pub fn set(clock: ClockId, time: Timespec) -> Result<()> { match clock { ClockId::Realtime => STATE.lock().set_real_time(time), diff --git a/tee/kernel/src/user/process/syscall/cpu_state.rs b/tee/kernel/src/user/process/syscall/cpu_state.rs index 621e8056..badb6d80 100644 --- a/tee/kernel/src/user/process/syscall/cpu_state.rs +++ b/tee/kernel/src/user/process/syscall/cpu_state.rs @@ -10,6 +10,7 @@ use core::{ use alloc::{vec, vec::Vec}; use bit_field::BitField; use bytemuck::{bytes_of, bytes_of_mut, from_bytes, from_bytes_mut, Pod, Zeroable}; +use constants::TIMER_VECTOR; use usize_conversions::{usize_from, FromUsize}; use x86_64::{ align_down, @@ -158,6 +159,7 @@ impl CpuState { code, }) } + TIMER_VECTOR => Exit::Timer, 0x80 => { let no = self.registers.rax as u32; let arg0 = self.registers.rbx as u32; @@ -650,6 +652,7 @@ pub enum Exit { DivideError, GeneralProtectionFault, PageFault(PageFaultExit), + Timer, } #[derive(Debug, Clone, Copy)] @@ -753,6 +756,7 @@ pub fn init() { unsafe extern "sysv64" { fn enter_userspace(); + pub fn interrupt_entry(); pub fn exception_entry(); fn syscall_entry(); } @@ -858,7 +862,16 @@ global_asm!( // Enter usermode. "iretq", - // Exit point for an exception/interrupt. + // Exit point for an exception. + // Note that `swapgs` was already executed by the exception/interrupt handler. + ".global interrupt_entry", + "interrupt_entry:", + // Clear the IF flag in the kernel's RFLAGS registers. + "and dword ptr gs:[{K_RFLAGS_OFFSET}], ~{INTERRUPT_FLAG}", + + // Fall through to exception_entry. + + // Exit point for an interrupt. // Note that `swapgs` was already executed by the exception/interrupt handler. ".global exception_entry", "exception_entry:", @@ -972,4 +985,5 @@ global_asm!( U_FS_BASE_OFFSET = const userspace_reg_offset!(fs_base), U_GS_OFFSET = const userspace_reg_offset!(gs), U_SS_OFFSET = const userspace_reg_offset!(ss), + INTERRUPT_FLAG = const RFlags::INTERRUPT_FLAG.bits(), ); diff --git a/tee/kernel/src/user/process/thread.rs b/tee/kernel/src/user/process/thread.rs index 875e4fb5..23569e5f 100644 --- a/tee/kernel/src/user/process/thread.rs +++ b/tee/kernel/src/user/process/thread.rs @@ -12,11 +12,12 @@ use core::{ use crate::{ error::bail, + exception::eoi, fs::{ fd::{FileDescriptor, FileDescriptorTable}, node::FileAccessContext, }, - rt::notify::Notify, + rt::{self, notify::Notify}, spin::mutex::{Mutex, MutexGuard}, time, user::process::{ @@ -35,7 +36,7 @@ use bytemuck::{Pod, Zeroable}; use crossbeam_utils::atomic::AtomicCell; use futures::{select_biased, FutureExt}; use pin_project::pin_project; -use x86_64::VirtAddr; +use x86_64::{instructions::interrupts, VirtAddr}; use crate::{ error::Result, @@ -245,6 +246,19 @@ impl Thread { assert!(self.queue_signal(sig_info)); } Exit::PageFault(page_fault) => self.handle_page_fault(page_fault), + Exit::Timer => { + // Handle the timer interrupt. + time::try_fire_clocks(); + + // Signal that we're done handling the interrupt. + eoi(); + + // Re-enable interrupts. + interrupts::enable(); + + // Yield to the scheduler. + rt::r#yield().await; + } } } }; @@ -434,9 +448,10 @@ impl Thread { )?; } } - Exit::DivideError | Exit::GeneralProtectionFault | Exit::PageFault(_) => { - writeln!(write, "{:indent$}{exit:?}", "")? - } + Exit::DivideError + | Exit::GeneralProtectionFault + | Exit::PageFault(_) + | Exit::Timer => writeln!(write, "{:indent$}{exit:?}", "")?, } } else { writeln!(write, "{:indent$}thread has never exited", "")?; diff --git a/tee/supervisor-snp/src/ap.rs b/tee/supervisor-snp/src/ap.rs index fe57557b..c27f06a4 100644 --- a/tee/supervisor-snp/src/ap.rs +++ b/tee/supervisor-snp/src/ap.rs @@ -4,7 +4,7 @@ use bit_field::BitField; use constants::{ApIndex, AtomicApBitmap}; use snp_types::{ intercept::{ - VMEXIT_CPUID, VMEXIT_INIT, VMEXIT_INTR, VMEXIT_INVALID, VMEXIT_NMI, VMEXIT_NPF, + VMEXIT_CPUID, VMEXIT_INIT, VMEXIT_INTR, VMEXIT_INVALID, VMEXIT_MSR, VMEXIT_NMI, VMEXIT_NPF, VMEXIT_PAUSE, VMEXIT_SMI, VMEXIT_VMMCALL, }, vmsa::SevFeatures, @@ -18,7 +18,7 @@ use crate::{ ghcb::{create_ap, exit, run_vmpl, vmsa_tweak_bitmap}, output, per_cpu::PerCpu, - scheduler::{start_next_ap, WAKE_UP_VECTOR}, + scheduler::{start_next_ap, TIMER_VECTOR, WAKE_UP_VECTOR}, }; use self::vmsa::Vmpl1Vmsa; @@ -49,17 +49,28 @@ pub fn run_vcpu() -> ! { let tweak_bitmap = vmsa_tweak_bitmap(); let mut halted = false; + let mut requested_timer_irq = false; + let mut in_service_timer_irq = false; loop { // Handle interrupts. if PerCpu::get().interrupted.swap(false, Ordering::SeqCst) { while let Some(vector) = pop_pending_event() { match vector.get() { WAKE_UP_VECTOR => eoi(), + TIMER_VECTOR => { + requested_timer_irq = true; + eoi(); + } vector => unimplemented!("unhandled vector: {vector}"), } } } + // Don't halt if we can a timer IRQ. + if requested_timer_irq && !in_service_timer_irq { + halted = false; + } + // See if the kernel was kicked. if halted && WAKEUP_TOKEN.get(PerCpu::current_vcpu_index()) { halted = false; @@ -71,6 +82,30 @@ pub fn run_vcpu() -> ! { continue; } + // Inject pending timer IRQ if possible. + if !in_service_timer_irq && requested_timer_irq { + let mut guard = vmsa.modify(); + let mut vintr_ctrl = guard.vintr_ctrl(tweak_bitmap); + // Check if V_IRQ is not already set. + if !vintr_ctrl.get_bit(8) { + // Set V_IRQ. + vintr_ctrl.set_bit(8, true); + // Set VGIF. + vintr_ctrl.set_bit(9, true); + // Set V_INTR_PRIO. + vintr_ctrl.set_bits(16..=19, 2); + // Clear V_IGN_TPR. + vintr_ctrl.set_bit(20, false); + // Set V_INTR_VECTOR. + vintr_ctrl.set_bits(32..=39, u64::from(constants::TIMER_VECTOR)); + + guard.set_vintr_ctrl(vintr_ctrl, tweak_bitmap); + + requested_timer_irq = false; + in_service_timer_irq = true; + } + } + // Run the AP. run_vmpl(1); @@ -114,6 +149,18 @@ pub fn run_vcpu() -> ! { let next_rip = guard.guest_nrip(tweak_bitmap); guard.set_rip(next_rip, tweak_bitmap); } + VMEXIT_MSR => { + // Make sure that the MSR access was a write. + assert_eq!(guard.guest_exit_info1(tweak_bitmap), 1); + + match guard.rcx(tweak_bitmap) as u32 { + 0x80b => in_service_timer_irq = false, // EOI. + unknown => unimplemented!("unimplemented MSR write {unknown:#x}"), + } + + let next_rip = guard.guest_nrip(tweak_bitmap); + guard.set_rip(next_rip, tweak_bitmap); + } VMEXIT_VMMCALL => { match guard.rax(tweak_bitmap) { nr if nr == SupervisorCallNr::StartNextAp as u64 => start_next_ap(), diff --git a/tee/supervisor-snp/src/exception.rs b/tee/supervisor-snp/src/exception.rs index fb27dd79..54572c36 100644 --- a/tee/supervisor-snp/src/exception.rs +++ b/tee/supervisor-snp/src/exception.rs @@ -9,6 +9,7 @@ use bit_field::BitField; use constants::MAX_APS_COUNT; use spin::Lazy; use x86_64::{ + registers::model_specific::Msr, structures::idt::{InterruptDescriptorTable, InterruptStackFrame}, VirtAddr, }; @@ -17,6 +18,7 @@ use crate::{ ghcb::{set_hv_doorbell_page, write_msr}, pagetable::Synchronized, per_cpu::PerCpu, + scheduler::TIMER_VECTOR, shared, }; @@ -33,6 +35,17 @@ pub fn init() { // Enable the x2apic. write_msr(0x80f, 0x1ff).unwrap(); + + // Enable APIC timer. + const PERIODIC_TIMER_MODE: u64 = 1 << 17; + // Initialize APIC Timer Local Vector Table Register. + write_msr(0x832, u64::from(TIMER_VECTOR) | PERIODIC_TIMER_MODE).unwrap(); + // Initialize Divide Configuration Register. Divide by 1. + write_msr(0x83e, 0b1011).unwrap(); + // Initialize Timer Initial Count Register. + let tsc_frequency = unsafe { Msr::new(0xC001_0134).read() } * 1_000_000; + let timer_hz = 100; + write_msr(0x838, tsc_frequency / timer_hz).unwrap(); } static IDT: Lazy = Lazy::new(|| { diff --git a/tee/supervisor-snp/src/scheduler.rs b/tee/supervisor-snp/src/scheduler.rs index 18ee2c2c..d8a288bb 100644 --- a/tee/supervisor-snp/src/scheduler.rs +++ b/tee/supervisor-snp/src/scheduler.rs @@ -10,6 +10,7 @@ use crate::{ pub const STARTUP_VECTOR: u8 = 0x20; pub const WAKE_UP_VECTOR: u8 = 0x21; +pub const TIMER_VECTOR: u8 = 0x22; static READY: AtomicUsize = AtomicUsize::new(0); @@ -39,6 +40,7 @@ pub fn wait_for_vcpu_start() { match vector.get() { STARTUP_VECTOR => eoi(), WAKE_UP_VECTOR => eoi(), + TIMER_VECTOR => eoi(), event => unimplemented!("unimplemented event {event}"), } } diff --git a/tee/supervisor-tdx/src/vcpu.rs b/tee/supervisor-tdx/src/vcpu.rs index e468f990..493e6b48 100644 --- a/tee/supervisor-tdx/src/vcpu.rs +++ b/tee/supervisor-tdx/src/vcpu.rs @@ -1,14 +1,15 @@ use core::{ arch::{ asm, - x86_64::{CpuidResult, __cpuid_count}, + x86_64::{CpuidResult, __cpuid, __cpuid_count, _rdtsc}, }, cmp, sync::atomic::{AtomicUsize, Ordering}, }; use bit_field::BitField; -use constants::{ApIndex, MAX_APS_COUNT}; +use constants::{ApIndex, MAX_APS_COUNT, TIMER_VECTOR}; +use spin::Lazy; use supervisor_services::{SlotIndex, SupervisorCallNr}; use tdx_types::{ tdcall::{ @@ -16,7 +17,8 @@ use tdx_types::{ TDX_L2_EXIT_PENDING_INTERRUPT, TDX_PENDING_INTERRUPT, TDX_SUCCESS, }, vmexit::{ - VMEXIT_REASON_CPUID_INSTRUCTION, VMEXIT_REASON_MSR_WRITE, VMEXIT_REASON_VMCALL_INSTRUCTION, + VMEXIT_REASON_CPUID_INSTRUCTION, VMEXIT_REASON_MSR_WRITE, + VMEXIT_REASON_PREEMPTION_TIMER_EXPIRED, VMEXIT_REASON_VMCALL_INSTRUCTION, }, }; use x86_64::{ @@ -158,6 +160,8 @@ pub fn init_vcpu() { MdFieldId::X2APIC_EOI_WRITE_MASK, ); } + + update_tsc_deadline(); } static APICS: [Apic; MAX_APS_COUNT as usize] = [const { Apic::new() }; MAX_APS_COUNT as usize]; @@ -323,7 +327,33 @@ pub fn run_vcpu() -> ! { } guest_state.rip += u64::from(vm_exit.vm_exit_instruction_length); } + VMEXIT_REASON_PREEMPTION_TIMER_EXPIRED => { + APICS[PerCpu::current_vcpu_index()].set_irr(TIMER_VECTOR); + update_tsc_deadline(); + } unknown => panic!("{unknown:#x} {guest_state:x?} {vm_exit:x?}"), } } } + +/// Returns `delta(TSC)/s`. +fn tsc_frequency() -> u64 { + // Try to get the frequency from cpuid. + let result = unsafe { __cpuid(0x15) }; + assert_ne!(result.ebx, 0); + u64::from(result.ecx) * u64::from(result.ebx) / u64::from(result.eax) +} + +const TIMER_HZ: u64 = 100; +static TIMER_INTERRUPT_PERIOD: Lazy = Lazy::new(|| tsc_frequency() / TIMER_HZ); + +/// Set the deadline to `now + TIMER_INTERRUPT_PERIOD`. +fn update_tsc_deadline() { + unsafe { + Tdcall::vp_wr( + MdFieldId::TDVPS_TSC_DEADLINE, + _rdtsc() + *TIMER_INTERRUPT_PERIOD, + !0, + ); + } +}