diff --git a/scheds/rust/scx_lavd/src/bpf/main.bpf.c b/scheds/rust/scx_lavd/src/bpf/main.bpf.c index 48cd0d271..c1bbff3c5 100644 --- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c +++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c @@ -1024,16 +1024,16 @@ static u64 find_proper_dsq(struct task_ctx *taskc, struct cpu_ctx *cpuc) return cpuc->cpdom_alt_id; } -static bool try_kick_task_idle_cpu(struct task_struct *p, struct task_ctx *taskc) +static bool try_kick_task_idle_cpu(struct task_struct *p, + struct task_ctx *taskc, s32 prev_cpu) { bool found_idle = false; - s32 prev_cpu, cpu; + s32 cpu; /* * Find an idle cpu but do not reserve the idle cpu. That is because * there is no guarantee the idle cpu will be picked up at this point. */ - prev_cpu = scx_bpf_task_cpu(p); cpu = find_idle_cpu(p, taskc, prev_cpu, 0, false, &found_idle); if (found_idle && cpu >= 0) { scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); @@ -1082,7 +1082,7 @@ void BPF_STRUCT_OPS(lavd_enqueue, struct task_struct *p, u64 enq_flags) { struct cpu_ctx *cpuc_task, *cpuc_cur; struct task_ctx *taskc; - s32 cpu_id; + s32 prev_cpu; u64 dsq_id, now; /* @@ -1096,9 +1096,9 @@ void BPF_STRUCT_OPS(lavd_enqueue, struct task_struct *p, u64 enq_flags) * always put the task to the global DSQ, so any idle CPU can pick it * up. */ - cpu_id = scx_bpf_task_cpu(p); taskc = get_task_ctx(p); - cpuc_task = get_cpu_ctx_id(cpu_id); + prev_cpu = scx_bpf_task_cpu(p); + cpuc_task = get_cpu_ctx_id(prev_cpu); cpuc_cur = get_cpu_ctx(); if (!cpuc_cur || !cpuc_task || !taskc) return; @@ -1115,8 +1115,8 @@ void BPF_STRUCT_OPS(lavd_enqueue, struct task_struct *p, u64 enq_flags) */ dsq_id = find_proper_dsq(taskc, cpuc_task); now = scx_bpf_now(); - if (can_direct_dispatch(p, taskc, cpuc_task, cpu_id, &enq_flags, now)) { - scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu_id, + if (can_direct_dispatch(p, taskc, cpuc_task, prev_cpu, &enq_flags, now)) { + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | prev_cpu, p->scx.slice, enq_flags); return; } @@ -1128,7 +1128,7 @@ void BPF_STRUCT_OPS(lavd_enqueue, struct task_struct *p, u64 enq_flags) * If there is an idle cpu for the task, try to kick it up now * so it can consume the task immediately. */ - if (try_kick_task_idle_cpu(p, taskc)) + if (try_kick_task_idle_cpu(p, taskc, prev_cpu)) return; /* @@ -1784,7 +1784,7 @@ void BPF_STRUCT_OPS(lavd_update_idle, s32 cpu, bool idle) * As an idle task cannot be preempted, * per-CPU preemption information should be cleared. */ - reset_cpu_preemption_info(cpuc); + reset_cpu_preemption_info(cpuc, false); } /* * The CPU is exiting from the idle state. @@ -1831,12 +1831,6 @@ void BPF_STRUCT_OPS(lavd_cpu_release, s32 cpu, { struct cpu_ctx *cpuc; - /* - * When a CPU is released to serve higher priority scheduler class, - * requeue the tasks in a local DSQ to the global enqueue. - */ - scx_bpf_reenqueue_local(); - /* * When the scx scheduler loses control of a CPU, * reset the CPU's preemption information so it cannot be a victim. @@ -1846,7 +1840,13 @@ void BPF_STRUCT_OPS(lavd_cpu_release, s32 cpu, scx_bpf_error("Failed to lookup cpu_ctx %d", cpu); return; } - reset_cpu_preemption_info(cpuc); + reset_cpu_preemption_info(cpuc, true); + + /* + * When a CPU is released to serve higher priority scheduler class, + * requeue the tasks in a local DSQ to the global enqueue. + */ + scx_bpf_reenqueue_local(); } void BPF_STRUCT_OPS(lavd_enable, struct task_struct *p) diff --git a/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c b/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c index 955178d4f..370cd49a1 100644 --- a/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c +++ b/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c @@ -130,18 +130,8 @@ static struct cpu_ctx *find_victim_cpu(const struct cpumask *cpumask, cur_cpu = cpuc->cpu_id; /* - * First, test the current CPU since it can skip the expensive IPI. - */ - if (can_cpu_be_kicked(now, cpuc) && - bpf_cpumask_test_cpu(cur_cpu, cpumask) && - can_cpu1_kick_cpu2(&prm_task, &prm_cpus[0], cpuc)) { - victim_cpu = &prm_task; - goto bingo_out; - } - - /* - * If the current CPU cannot be a victim, let's check if it is worth to - * try to kick other CPU at the expense of IPI. + * First check if it is worth to try to kick other CPU + * at the expense of IPI. */ if (!is_worth_kick_other_task(taskc)) goto null_out; @@ -216,11 +206,6 @@ static struct cpu_ctx *find_victim_cpu(const struct cpumask *cpumask, return NULL; } -static void kick_current_cpu(struct task_struct *p) -{ - WRITE_ONCE(p->scx.slice, 0); -} - static bool try_kick_cpu(struct task_struct *p, struct cpu_ctx *cpuc_cur, struct cpu_ctx *victim_cpuc) { @@ -234,29 +219,15 @@ static bool try_kick_cpu(struct task_struct *p, struct cpu_ctx *cpuc_cur, u64 old; bool ret = false; - /* - * If the current CPU is a victim, we just reset the current task's - * time slice as an optimization. Othewise, kick the remote CPU for - * preemption. - * - * Resetting task's time slice to zero does not trigger an immediate - * preemption. However, the cost of self-IPI is prohibitively expensive - * for some scenarios. The actual preemption will happen at the next - * ops.tick(). - */ - if (cpuc_cur->cpu_id == victim_cpuc->cpu_id) { - struct task_struct *tsk = bpf_get_current_task_btf(); - kick_current_cpu(tsk); - return true; - } - /* * Kick a victim CPU if it is not victimized yet by another * concurrent kick task. + * + * */ old = p->scx.slice; - if (old != 0) - ret = __sync_bool_compare_and_swap(&p->scx.slice, old, 0); + if (old != 1 && old != 0) + ret = __sync_bool_compare_and_swap(&p->scx.slice, old, 1); /* * Kick the remote CPU for preemption. @@ -329,7 +300,7 @@ static bool try_yield_current_cpu(struct task_struct *p_run, * give up its extended time slice for fairness. */ if (taskc_run->lock_holder_xted) { - kick_current_cpu(p_run); + p_run->scx.slice = 0; return true; } @@ -367,15 +338,27 @@ static bool try_yield_current_cpu(struct task_struct *p_run, bpf_rcu_read_unlock(); if (ret) - kick_current_cpu(p_run); + p_run->scx.slice = 0; return ret; } -static void reset_cpu_preemption_info(struct cpu_ctx *cpuc) +static void reset_cpu_preemption_info(struct cpu_ctx *cpuc, bool released) { - cpuc->lat_cri = 0; - cpuc->stopping_tm_est_ns = SCX_SLICE_INF; + if (released) { + /* + * When the CPU is taken by high priority scheduler, + * set things impossible to preempt. + */ + cpuc->lat_cri = SCX_SLICE_INF; + cpuc->stopping_tm_est_ns = 0; + } else { + /* + * When the CPU is idle, + * set things easy to preempt. + */ + cpuc->lat_cri = 0; + cpuc->stopping_tm_est_ns = SCX_SLICE_INF; + } } -