Skip to content

Commit

Permalink
scx_rusty: XXX
Browse files Browse the repository at this point in the history
  • Loading branch information
htejun committed Jan 30, 2025
1 parent 9e7418b commit 745d207
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 154 deletions.
99 changes: 0 additions & 99 deletions scheds/rust/scx_rusty/src/bpf/intf.h
Original file line number Diff line number Diff line change
@@ -1,99 +0,0 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.

// This software may be used and distributed according to the terms of the
// GNU General Public License version 2.
#ifndef __INTF_H
#define __INTF_H

#include <stdbool.h>
#ifndef __kptr
#ifdef __KERNEL__
#error "__kptr_ref not defined in the kernel"
#endif
#define __kptr
#endif

#ifndef __KERNEL__
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#endif

#ifdef LSP
#define __bpf__
#include "../../../../include/scx/ravg.bpf.h"
#else
#include <scx/ravg.bpf.h>
#endif

enum consts {
MAX_CPUS = 512,
MAX_DOMS = 64, /* limited to avoid complex bitmask ops */
MAX_NUMA_NODES = MAX_DOMS, /* Assume at least 1 domain per NUMA node */
CACHELINE_SIZE = 64,
NO_DOM_FOUND = MAX_DOMS + 1,

LB_DEFAULT_WEIGHT = 100,
LB_MIN_WEIGHT = 1,
LB_MAX_WEIGHT = 10000,
LB_LOAD_BUCKETS = 100, /* Must be a factor of LB_MAX_WEIGHT */
LB_WEIGHT_PER_BUCKET = LB_MAX_WEIGHT / LB_LOAD_BUCKETS,

/* Time constants */
MSEC_PER_SEC = 1000LLU,
USEC_PER_MSEC = 1000LLU,
NSEC_PER_USEC = 1000LLU,
NSEC_PER_MSEC = USEC_PER_MSEC * NSEC_PER_USEC,
USEC_PER_SEC = USEC_PER_MSEC * MSEC_PER_SEC,
NSEC_PER_SEC = NSEC_PER_USEC * USEC_PER_SEC,

/* Constants used for determining a task's deadline */
DL_RUNTIME_SCALE = 2, /* roughly scales average runtime to */
/* same order of magnitude as waker */
/* and blocked frequencies */
DL_MAX_LATENCY_NS = (50 * NSEC_PER_MSEC),
DL_FREQ_FT_MAX = 100000,
DL_MAX_LAT_PRIO = 39,

/*
* When userspace load balancer is trying to determine the tasks to push
* out from an overloaded domain, it looks at the the following number
* of recently active tasks of the domain. While this may lead to
* spurious migration victim selection failures in pathological cases,
* this isn't a practical problem as the LB rounds are best-effort
* anyway and will be retried until loads are balanced.
*/
MAX_DOM_ACTIVE_TPTRS = 1024,
};

/* Statistics */
enum stat_idx {
/* The following fields add up to all dispatched tasks */
RUSTY_STAT_WAKE_SYNC,
RUSTY_STAT_SYNC_PREV_IDLE,
RUSTY_STAT_PREV_IDLE,
RUSTY_STAT_GREEDY_IDLE,
RUSTY_STAT_PINNED,
RUSTY_STAT_DIRECT_DISPATCH,
RUSTY_STAT_DIRECT_GREEDY,
RUSTY_STAT_DIRECT_GREEDY_FAR,
RUSTY_STAT_DSQ_DISPATCH,
RUSTY_STAT_GREEDY_LOCAL,
RUSTY_STAT_GREEDY_XNUMA,

/* Extra stats that don't contribute to total */
RUSTY_STAT_REPATRIATE,
RUSTY_STAT_KICK_GREEDY,
RUSTY_STAT_LOAD_BALANCE,

/* Errors */
RUSTY_STAT_TASK_GET_ERR,

/* Deadline related stats */
RUSTY_STAT_DL_CLAMP,
RUSTY_STAT_DL_PRESET,

RUSTY_NR_STATS,
};

#endif /* __INTF_H */
5 changes: 4 additions & 1 deletion scheds/rust/scx_rusty/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
#include <lib/sdt_task.h>
#endif

#include "intf.h"
#include "types.h"
#include "lb_domain.h"

Expand All @@ -61,6 +60,10 @@ char _license[] SEC("license") = "GPL";

UEI_DEFINE(uei);

const enum ravg_consts __btf_anchor_ravg_consts = RAVG_VAL_BITS;
const enum consts __btf_anchor_consts = LB_DEFAULT_WEIGHT;
const enum stat_idx __btf_anchor_stat_idx = RUSTY_NR_STATS;

/*
* const volatiles are set during initialization and treated as consts by the
* jit compiler.
Expand Down
76 changes: 76 additions & 0 deletions scheds/rust/scx_rusty/src/bpf/types.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,82 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.

// This software may be used and distributed according to the terms of the
// GNU General Public License version 2.
#ifndef __TYPES_H
#define __TYPES_H

#include <stdbool.h>

enum consts {
MAX_CPUS = 512,
MAX_DOMS = 64, /* limited to avoid complex bitmask ops */
MAX_NUMA_NODES = MAX_DOMS, /* Assume at least 1 domain per NUMA node */
CACHELINE_SIZE = 64,
NO_DOM_FOUND = MAX_DOMS + 1,

LB_DEFAULT_WEIGHT = 100,
LB_MIN_WEIGHT = 1,
LB_MAX_WEIGHT = 10000,
LB_LOAD_BUCKETS = 100, /* Must be a factor of LB_MAX_WEIGHT */
LB_WEIGHT_PER_BUCKET = LB_MAX_WEIGHT / LB_LOAD_BUCKETS,

/* Time constants */
MSEC_PER_SEC = 1000LLU,
USEC_PER_MSEC = 1000LLU,
NSEC_PER_USEC = 1000LLU,
NSEC_PER_MSEC = USEC_PER_MSEC * NSEC_PER_USEC,
USEC_PER_SEC = USEC_PER_MSEC * MSEC_PER_SEC,
NSEC_PER_SEC = NSEC_PER_USEC * USEC_PER_SEC,

/* Constants used for determining a task's deadline */
DL_RUNTIME_SCALE = 2, /* roughly scales average runtime to */
/* same order of magnitude as waker */
/* and blocked frequencies */
DL_MAX_LATENCY_NS = (50 * NSEC_PER_MSEC),
DL_FREQ_FT_MAX = 100000,
DL_MAX_LAT_PRIO = 39,

/*
* When userspace load balancer is trying to determine the tasks to push
* out from an overloaded domain, it looks at the the following number
* of recently active tasks of the domain. While this may lead to
* spurious migration victim selection failures in pathological cases,
* this isn't a practical problem as the LB rounds are best-effort
* anyway and will be retried until loads are balanced.
*/
MAX_DOM_ACTIVE_TPTRS = 1024,
};

/* Statistics */
enum stat_idx {
/* The following fields add up to all dispatched tasks */
RUSTY_STAT_WAKE_SYNC,
RUSTY_STAT_SYNC_PREV_IDLE,
RUSTY_STAT_PREV_IDLE,
RUSTY_STAT_GREEDY_IDLE,
RUSTY_STAT_PINNED,
RUSTY_STAT_DIRECT_DISPATCH,
RUSTY_STAT_DIRECT_GREEDY,
RUSTY_STAT_DIRECT_GREEDY_FAR,
RUSTY_STAT_DSQ_DISPATCH,
RUSTY_STAT_GREEDY_LOCAL,
RUSTY_STAT_GREEDY_XNUMA,

/* Extra stats that don't contribute to total */
RUSTY_STAT_REPATRIATE,
RUSTY_STAT_KICK_GREEDY,
RUSTY_STAT_LOAD_BALANCE,

/* Errors */
RUSTY_STAT_TASK_GET_ERR,

/* Deadline related stats */
RUSTY_STAT_DL_CLAMP,
RUSTY_STAT_DL_PRESET,

RUSTY_NR_STATS,
};

/*
* XXXETSAL This is convoluted for a reason. We have a three way conflict here:
*
Expand Down
10 changes: 0 additions & 10 deletions scheds/rust/scx_rusty/src/bpf_intf.rs

This file was deleted.

19 changes: 9 additions & 10 deletions scheds/rust/scx_rusty/src/load_balance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,13 @@ use scx_utils::LoadAggregator;
use scx_utils::LoadLedger;
use sorted_vec::SortedVec;

use crate::bpf_intf;
use crate::bpf_skel::*;
use crate::stats::DomainStats;
use crate::stats::NodeStats;
use crate::DomainGroup;

const DEFAULT_WEIGHT: f64 = bpf_intf::consts_LB_DEFAULT_WEIGHT as f64;
const RAVG_FRAC_BITS: u32 = bpf_intf::ravg_consts_RAVG_FRAC_BITS;
const DEFAULT_WEIGHT: f64 = types::consts::LB_DEFAULT_WEIGHT.0 as f64;
const RAVG_FRAC_BITS: u32 = types::ravg_consts::RAVG_FRAC_BITS.0;

fn now_monotonic() -> u64 {
let mut time = libc::timespec {
Expand Down Expand Up @@ -465,7 +464,7 @@ pub struct LoadBalancer<'a, 'b> {
// Verify that the number of buckets is a factor of the maximum weight to
// ensure that the range of weight can be split evenly amongst every bucket.
const_assert_eq!(
bpf_intf::consts_LB_MAX_WEIGHT % bpf_intf::consts_LB_LOAD_BUCKETS,
types::consts::LB_MAX_WEIGHT.0 % types::consts::LB_LOAD_BUCKETS.0,
0
);

Expand All @@ -481,7 +480,7 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
skel,
skip_kworkers,

infeas_threshold: bpf_intf::consts_LB_MAX_WEIGHT as f64,
infeas_threshold: types::consts::LB_MAX_WEIGHT.0 as f64,

nodes: SortedVec::new(),

Expand Down Expand Up @@ -559,7 +558,7 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
}

fn calculate_load_avgs(&mut self) -> Result<LoadLedger> {
const NUM_BUCKETS: u64 = bpf_intf::consts_LB_LOAD_BUCKETS as u64;
const NUM_BUCKETS: u64 = types::consts::LB_LOAD_BUCKETS.0 as u64;
let now_mono = now_monotonic();
let load_half_life = self.skel.maps.rodata_data.load_half_life;

Expand Down Expand Up @@ -597,8 +596,8 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
}

fn bucket_range(&self, bucket: u64) -> (f64, f64) {
const MAX_WEIGHT: u64 = bpf_intf::consts_LB_MAX_WEIGHT as u64;
const NUM_BUCKETS: u64 = bpf_intf::consts_LB_LOAD_BUCKETS as u64;
const MAX_WEIGHT: u64 = types::consts::LB_MAX_WEIGHT.0 as u64;
const NUM_BUCKETS: u64 = types::consts::LB_LOAD_BUCKETS.0 as u64;
const WEIGHT_PER_BUCKET: u64 = MAX_WEIGHT / NUM_BUCKETS;

if bucket >= NUM_BUCKETS {
Expand All @@ -613,7 +612,7 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
}

fn bucket_weight(&self, bucket: u64) -> usize {
const WEIGHT_PER_BUCKET: f64 = bpf_intf::consts_LB_WEIGHT_PER_BUCKET as f64;
const WEIGHT_PER_BUCKET: f64 = types::consts::LB_WEIGHT_PER_BUCKET.0 as f64;
let (min_weight, _) = self.bucket_range(bucket);

// Use the mid-point of the bucket when determining weight
Expand All @@ -629,7 +628,7 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
dom.queried_tasks = true;

// Read active_tasks and update read_idx and gen.
const MAX_TPTRS: u64 = bpf_intf::consts_MAX_DOM_ACTIVE_TPTRS as u64;
const MAX_TPTRS: u64 = types::consts::MAX_DOM_ACTIVE_TPTRS.0 as u64;
let dom_ctx = unsafe { &mut *self.skel.maps.bss_data.dom_ctxs[dom.id] };
let active_tasks = &mut dom_ctx.active_tasks;

Expand Down
67 changes: 33 additions & 34 deletions scheds/rust/scx_rusty/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
// GNU General Public License version 2.
mod bpf_skel;
pub use bpf_skel::*;
pub mod bpf_intf;

mod domain;
use domain::DomainGroup;
Expand Down Expand Up @@ -61,8 +60,8 @@ use scx_utils::Topology;
use scx_utils::UserExitInfo;
use scx_utils::NR_CPU_IDS;

const MAX_DOMS: usize = bpf_intf::consts_MAX_DOMS as usize;
const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize;
const MAX_DOMS: usize = types::consts::MAX_DOMS.0 as usize;
const MAX_CPUS: usize = types::consts::MAX_CPUS.0 as usize;

/// scx_rusty: A multi-domain BPF / userspace hybrid scheduler
///
Expand Down Expand Up @@ -278,7 +277,7 @@ impl StatsCtx {
let stats_map = &skel.maps.stats;
let mut stats: Vec<u64> = Vec::new();

for stat in 0..bpf_intf::stat_idx_RUSTY_NR_STATS {
for stat in 0..types::stat_idx::RUSTY_NR_STATS.0 {
let cpu_stat_vec = stats_map
.lookup_percpu(&stat.to_ne_bytes(), libbpf_rs::MapFlags::ANY)
.with_context(|| format!("Failed to lookup stat {}", stat))?
Expand All @@ -302,7 +301,7 @@ impl StatsCtx {
Self {
cpu_busy: 0,
cpu_total: 0,
bpf_stats: vec![0u64; bpf_intf::stat_idx_RUSTY_NR_STATS as usize],
bpf_stats: vec![0u64; types::stat_idx::RUSTY_NR_STATS.0 as usize],
time_used: Duration::default(),
}
}
Expand Down Expand Up @@ -498,18 +497,18 @@ impl<'a> Scheduler<'a> {
}

fn cluster_stats(&self, sc: &StatsCtx, node_stats: BTreeMap<usize, NodeStats>) -> ClusterStats {
let stat = |idx| sc.bpf_stats[idx as usize];
let total = stat(bpf_intf::stat_idx_RUSTY_STAT_WAKE_SYNC)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_SYNC_PREV_IDLE)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_PREV_IDLE)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_IDLE)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_PINNED)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_DISPATCH)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY_FAR)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_DSQ_DISPATCH)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_LOCAL)
+ stat(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_XNUMA);
let stat = |idx: types::stat_idx| sc.bpf_stats[idx.0 as usize];
let total = stat(types::stat_idx::RUSTY_STAT_WAKE_SYNC)
+ stat(types::stat_idx::RUSTY_STAT_SYNC_PREV_IDLE)
+ stat(types::stat_idx::RUSTY_STAT_PREV_IDLE)
+ stat(types::stat_idx::RUSTY_STAT_GREEDY_IDLE)
+ stat(types::stat_idx::RUSTY_STAT_PINNED)
+ stat(types::stat_idx::RUSTY_STAT_DIRECT_DISPATCH)
+ stat(types::stat_idx::RUSTY_STAT_DIRECT_GREEDY)
+ stat(types::stat_idx::RUSTY_STAT_DIRECT_GREEDY_FAR)
+ stat(types::stat_idx::RUSTY_STAT_DSQ_DISPATCH)
+ stat(types::stat_idx::RUSTY_STAT_GREEDY_LOCAL)
+ stat(types::stat_idx::RUSTY_STAT_GREEDY_XNUMA);
let stat_pct = |idx| stat(idx) as f64 / total as f64 * 100.0;

let cpu_busy = if sc.cpu_total != 0 {
Expand Down Expand Up @@ -537,26 +536,26 @@ impl<'a> Scheduler<'a> {

cpu_busy,
load: node_stats.iter().map(|(_k, v)| v.load).sum::<f64>(),
nr_migrations: sc.bpf_stats[bpf_intf::stat_idx_RUSTY_STAT_LOAD_BALANCE as usize],
nr_migrations: sc.bpf_stats[types::stat_idx::RUSTY_STAT_LOAD_BALANCE.0 as usize],

task_get_err: sc.bpf_stats[bpf_intf::stat_idx_RUSTY_STAT_TASK_GET_ERR as usize],
task_get_err: sc.bpf_stats[types::stat_idx::RUSTY_STAT_TASK_GET_ERR.0 as usize],
time_used: sc.time_used.as_secs_f64(),

sync_prev_idle: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_SYNC_PREV_IDLE),
wake_sync: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_WAKE_SYNC),
prev_idle: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_PREV_IDLE),
greedy_idle: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_IDLE),
pinned: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_PINNED),
direct: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_DISPATCH),
greedy: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY),
greedy_far: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DIRECT_GREEDY_FAR),
dsq_dispatch: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DSQ_DISPATCH),
greedy_local: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_LOCAL),
greedy_xnuma: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_GREEDY_XNUMA),
kick_greedy: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_KICK_GREEDY),
repatriate: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_REPATRIATE),
dl_clamp: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DL_CLAMP),
dl_preset: stat_pct(bpf_intf::stat_idx_RUSTY_STAT_DL_PRESET),
sync_prev_idle: stat_pct(types::stat_idx::RUSTY_STAT_SYNC_PREV_IDLE),
wake_sync: stat_pct(types::stat_idx::RUSTY_STAT_WAKE_SYNC),
prev_idle: stat_pct(types::stat_idx::RUSTY_STAT_PREV_IDLE),
greedy_idle: stat_pct(types::stat_idx::RUSTY_STAT_GREEDY_IDLE),
pinned: stat_pct(types::stat_idx::RUSTY_STAT_PINNED),
direct: stat_pct(types::stat_idx::RUSTY_STAT_DIRECT_DISPATCH),
greedy: stat_pct(types::stat_idx::RUSTY_STAT_DIRECT_GREEDY),
greedy_far: stat_pct(types::stat_idx::RUSTY_STAT_DIRECT_GREEDY_FAR),
dsq_dispatch: stat_pct(types::stat_idx::RUSTY_STAT_DSQ_DISPATCH),
greedy_local: stat_pct(types::stat_idx::RUSTY_STAT_GREEDY_LOCAL),
greedy_xnuma: stat_pct(types::stat_idx::RUSTY_STAT_GREEDY_XNUMA),
kick_greedy: stat_pct(types::stat_idx::RUSTY_STAT_KICK_GREEDY),
repatriate: stat_pct(types::stat_idx::RUSTY_STAT_REPATRIATE),
dl_clamp: stat_pct(types::stat_idx::RUSTY_STAT_DL_CLAMP),
dl_preset: stat_pct(types::stat_idx::RUSTY_STAT_DL_PRESET),

direct_greedy_cpus: self.tuner.direct_greedy_mask.as_raw_slice().to_owned(),
kick_greedy_cpus: self.tuner.kick_greedy_mask.as_raw_slice().to_owned(),
Expand Down

0 comments on commit 745d207

Please sign in to comment.