-
Notifications
You must be signed in to change notification settings - Fork 106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
scx_prev: a simple scheduler tested on OLTP workloads #1275
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 */ | ||
/* | ||
* A variation on scx_simple with CPU selection that prioritizes an idle | ||
* previous CPU over finding a fully idle core (as is done in scx_simple and | ||
* scx_rusty). | ||
* | ||
* Outperforms the in-kernel fair class (v6.12), scx_simple, and scx_rusty on | ||
* OLTP workloads run on systems with simple topology (i.e. non-NUMA, single | ||
* LLC). | ||
* | ||
* Copyright (c) 2025, Oracle and/or its affiliates. | ||
* Copyright (c) 2025, Daniel Jordan <[email protected]> | ||
*/ | ||
#include <scx/common.bpf.h> | ||
|
||
char _license[] SEC("license") = "GPL"; | ||
|
||
UEI_DEFINE(uei); | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); | ||
__uint(key_size, sizeof(u32)); | ||
__uint(value_size, sizeof(u64)); | ||
__uint(max_entries, 4); /* [local, select_fail, prev_cpu, idle_cpu] */ | ||
} stats SEC(".maps"); | ||
|
||
static void stat_inc(u32 idx) | ||
{ | ||
u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx); | ||
if (cnt_p) | ||
(*cnt_p)++; | ||
} | ||
|
||
s32 BPF_STRUCT_OPS(prev_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) | ||
{ | ||
s32 cpu; | ||
|
||
if (p->nr_cpus_allowed == 1) { | ||
if (scx_bpf_test_and_clear_cpu_idle(prev_cpu)) { | ||
cpu = prev_cpu; | ||
goto insert; | ||
} else { | ||
return prev_cpu; | ||
} | ||
} | ||
|
||
if (scx_bpf_test_and_clear_cpu_idle(prev_cpu)) { | ||
stat_inc(2); /* prev_cpu */ | ||
cpu = prev_cpu; | ||
goto insert; | ||
} | ||
|
||
cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); | ||
if (cpu >= 0) { | ||
stat_inc(3); /* idle_cpu */ | ||
goto insert; | ||
} | ||
|
||
stat_inc(1); /* select_fail */ | ||
|
||
return prev_cpu; | ||
|
||
insert: | ||
stat_inc(0); /* local */ | ||
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); | ||
|
||
return cpu; | ||
} | ||
|
||
void BPF_STRUCT_OPS(prev_exit, struct scx_exit_info *ei) | ||
{ | ||
UEI_RECORD(uei, ei); | ||
} | ||
|
||
SCX_OPS_DEFINE(prev_ops, | ||
.select_cpu = (void *)prev_select_cpu, | ||
.exit = (void *)prev_exit, | ||
.name = "prev" | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 */ | ||
/* | ||
* Copyright (c) 2025, Oracle and/or its affiliates. | ||
* Copyright (c) 2025, Daniel Jordan <[email protected]> | ||
*/ | ||
#include <stdio.h> | ||
#include <unistd.h> | ||
#include <signal.h> | ||
#include <libgen.h> | ||
#include <bpf/bpf.h> | ||
#include <scx/common.h> | ||
|
||
#include "scx_prev.bpf.skel.h" | ||
|
||
const char help_fmt[] = | ||
"A variation on scx_simple with CPU selection that prioritizes an idle\n" | ||
"previous CPU over finding a fully idle core.\n" | ||
"\n" | ||
"See the top-level comment in .bpf.c for more details.\n" | ||
"\n" | ||
"Usage: %s [-i sec] [-v]\n" | ||
"\n" | ||
" -h Display this help and exit\n" | ||
" -i Sampling interval for statistics in seconds\n" | ||
" -v Print libbpf debug messages\n"; | ||
|
||
static bool verbose; | ||
static unsigned stat_interval = 1; | ||
static volatile int exit_req; | ||
|
||
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) | ||
{ | ||
if (level == LIBBPF_DEBUG && !verbose) | ||
return 0; | ||
return vfprintf(stderr, format, args); | ||
} | ||
|
||
static void sigint_handler(int unused) | ||
{ | ||
exit_req = 1; | ||
} | ||
|
||
static void read_stats(struct scx_prev *skel, __u64 *stats) | ||
{ | ||
int nr_cpus = libbpf_num_possible_cpus(); | ||
__u64 cnts[4][nr_cpus]; | ||
__u32 idx; | ||
|
||
memset(stats, 0, sizeof(stats[0]) * 4); | ||
|
||
for (idx = 0; idx < 4; idx++) { | ||
int ret, cpu; | ||
|
||
ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), | ||
&idx, cnts[idx]); | ||
if (ret < 0) | ||
continue; | ||
for (cpu = 0; cpu < nr_cpus; cpu++) | ||
stats[idx] += cnts[idx][cpu]; | ||
} | ||
} | ||
|
||
int main(int argc, char **argv) | ||
{ | ||
struct scx_prev *skel; | ||
struct bpf_link *link; | ||
__u32 opt; | ||
__u64 ecode; | ||
|
||
libbpf_set_print(libbpf_print_fn); | ||
signal(SIGINT, sigint_handler); | ||
signal(SIGTERM, sigint_handler); | ||
restart: | ||
skel = SCX_OPS_OPEN(prev_ops, scx_prev); | ||
|
||
while ((opt = getopt(argc, argv, "hi:v")) != -1) { | ||
switch (opt) { | ||
case 'i': | ||
stat_interval = strtoull(optarg, NULL, 0); | ||
if (!stat_interval) | ||
stat_interval = 1; | ||
break; | ||
case 'v': | ||
verbose = true; | ||
break; | ||
default: | ||
fprintf(stderr, help_fmt, basename(argv[0])); | ||
return opt != 'h'; | ||
} | ||
} | ||
|
||
SCX_OPS_LOAD(skel, prev_ops, scx_prev, uei); | ||
link = SCX_OPS_ATTACH(skel, prev_ops, scx_prev); | ||
|
||
while (!exit_req && !UEI_EXITED(skel, uei)) { | ||
__u64 stats[4]; | ||
|
||
read_stats(skel, stats); | ||
printf("local=%llu select_fail=%llu prev_cpu=%llu idle_cpu=%llu\n", | ||
stats[0], stats[1], stats[2], stats[3]); | ||
fflush(stdout); | ||
sleep(stat_interval); | ||
} | ||
|
||
bpf_link__destroy(link); | ||
ecode = UEI_REPORT(skel, uei); | ||
scx_prev__destroy(skel); | ||
|
||
if (UEI_ECODE_RESTART(ecode)) | ||
goto restart; | ||
return 0; | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this condition is always false,
ops.select_cpu()
is always skipped if the task can only run on 1 cpu.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's true, thanks, I see how ->select_cpu() is always skipped in the in-kernel scheduler core for nr_cpus_allowed == 1. I'll send a follow up deleting the unused branch.