diff --git a/EXCLUSIONS.md b/EXCLUSIONS.md index 5542c43c..ef983609 100644 --- a/EXCLUSIONS.md +++ b/EXCLUSIONS.md @@ -1,5 +1,5 @@ elastic/ebpf is tested against a matrix of kernels. The code contained in this -repository is intended for use with linux kernel version 5.10.10 or higher, +repository is intended for use with linux kernel version 5.10.16 or higher, with BTF (and other requisite configs) enabled. The following is a list of kernels where the ebpf programs fail to load or diff --git a/GPL/Events/Process/Probe.bpf.c b/GPL/Events/Process/Probe.bpf.c index 211fda6d..18e25c7b 100644 --- a/GPL/Events/Process/Probe.bpf.c +++ b/GPL/Events/Process/Probe.bpf.c @@ -16,12 +16,6 @@ #include "Helpers.h" #include "PathResolver.h" -/* tty_write */ -DECL_FUNC_ARG(redirected_tty_write, iter); -DECL_FUNC_ARG(redirected_tty_write, buf); -DECL_FUNC_ARG(redirected_tty_write, count); -DECL_FUNC_ARG_EXISTS(redirected_tty_write, iter); - SEC("tp_btf/sched_process_fork") int BPF_PROG(sched_process_fork, const struct task_struct *parent, const struct task_struct *child) { @@ -241,18 +235,14 @@ int BPF_KPROBE(kprobe__commit_creds, struct cred *new) return commit_creds__enter(new); } -static int tty_write__enter(const char *buf, ssize_t count, struct file *f) +#define MAX_NR_SEGS 8 + +static int tty_write__enter(struct kiocb *iocb, struct iov_iter *from) { if (is_consumer()) goto out; - if (count <= 0) - goto out; - - struct ebpf_process_tty_write_event *event = bpf_ringbuf_reserve(&ringbuf, sizeof(*event), 0); - if (!event) - goto out; - + struct file *f = BPF_CORE_READ(iocb, ki_filp); struct tty_file_private *tfp = (struct tty_file_private *)BPF_CORE_READ(f, private_data); struct tty_struct *tty = BPF_CORE_READ(tfp, tty); @@ -263,108 +253,75 @@ static int tty_write__enter(const char *buf, ssize_t count, struct file *f) // https://elixir.bootlin.com/linux/v5.19.9/source/drivers/tty/tty_io.c#L2643 bool is_master = false; struct ebpf_tty_dev master = {}; + struct ebpf_tty_dev slave = {}; if (BPF_CORE_READ(tty, driver, type) == TTY_DRIVER_TYPE_PTY && BPF_CORE_READ(tty, driver, subtype) == PTY_TYPE_MASTER) { struct tty_struct *tmp = BPF_CORE_READ(tty, link); ebpf_tty_dev__fill(&master, tty); - ebpf_tty_dev__fill(&event->tty, tmp); + ebpf_tty_dev__fill(&slave, tmp); is_master = true; } else { - ebpf_tty_dev__fill(&event->tty, tty); + ebpf_tty_dev__fill(&slave, tty); } - event->hdr.type = EBPF_EVENT_PROCESS_TTY_WRITE; - event->hdr.ts = bpf_ktime_get_ns(); - - u64 len = count > TTY_OUT_MAX ? TTY_OUT_MAX : count; - event->tty_out_len = len; - event->tty_out_truncated = count > TTY_OUT_MAX ? count - TTY_OUT_MAX : 0; - - const struct task_struct *task = (struct task_struct *)bpf_get_current_task(); - ebpf_pid_info__fill(&event->pids, task); - ebpf_ctty__fill(&event->ctty, task); - bpf_get_current_comm(event->comm, TASK_COMM_LEN); - - if (event->tty.major == 0 && event->tty.minor == 0) { - bpf_ringbuf_discard(event, 0); + if (slave.major == 0 && slave.minor == 0) { goto out; } - if (bpf_probe_read_user(event->tty_out, len, (void *)buf)) { - bpf_printk("tty_write__enter: error reading buf\n"); - bpf_ringbuf_discard(event, 0); + if ((is_master && !(master.termios.c_lflag & ECHO)) && !(slave.termios.c_lflag & ECHO)) { goto out; } - if ((is_master && !(master.termios.c_lflag & ECHO)) && !(event->tty.termios.c_lflag & ECHO)) { - bpf_printk("tty_write__enter: discarding %s\n", event->tty_out); - bpf_ringbuf_discard(event, 0); - goto out; - } + const struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + u64 nr_segs = BPF_CORE_READ(from, nr_segs); + nr_segs = nr_segs > MAX_NR_SEGS ? MAX_NR_SEGS : nr_segs; + const struct iovec *iov = BPF_CORE_READ(from, iov); - bpf_ringbuf_submit(event, 0); + for (u8 seg = 0; seg < nr_segs; seg++) { + struct ebpf_process_tty_write_event *event = + bpf_ringbuf_reserve(&ringbuf, sizeof(*event), 0); + if (!event) + goto out; + + struct iovec *cur_iov = (struct iovec *)&iov[seg]; + const char *base = BPF_CORE_READ(cur_iov, iov_base); + size_t len = BPF_CORE_READ(cur_iov, iov_len); + if (len <= 0) { + bpf_ringbuf_discard(event, 0); + continue; + } + + event->hdr.type = EBPF_EVENT_PROCESS_TTY_WRITE; + event->hdr.ts = bpf_ktime_get_ns(); + u64 len_cap = len > TTY_OUT_MAX ? TTY_OUT_MAX : len; + event->tty_out_len = len_cap; + event->tty_out_truncated = len > TTY_OUT_MAX ? len - TTY_OUT_MAX : 0; + event->tty = slave; + ebpf_pid_info__fill(&event->pids, task); + ebpf_ctty__fill(&event->ctty, task); + bpf_get_current_comm(event->comm, TASK_COMM_LEN); + + if (bpf_probe_read_user(event->tty_out, len_cap, (void *)base)) { + bpf_printk("tty_write__enter: error reading base\n"); + bpf_ringbuf_discard(event, 0); + goto out; + } + + bpf_ringbuf_submit(event, 0); + } out: return 0; } SEC("fentry/tty_write") -int BPF_PROG(fentry__tty_write) +int BPF_PROG(fentry__tty_write, struct kiocb *iocb, struct iov_iter *from) { - const char *buf; - ssize_t count; - struct file *f; - - if (FUNC_ARG_EXISTS(redirected_tty_write, iter)) { - struct iov_iter *ii = FUNC_ARG_READ(___type(ii), redirected_tty_write, iter); - buf = BPF_CORE_READ(ii, iov, iov_base); - count = BPF_CORE_READ(ii, iov, iov_len); - - struct kiocb *iocb = (struct kiocb *)ctx[0]; - f = BPF_CORE_READ(iocb, ki_filp); - } else { - buf = FUNC_ARG_READ(___type(buf), redirected_tty_write, buf); - count = FUNC_ARG_READ(___type(count), redirected_tty_write, count); - - f = (struct file *)ctx[0]; - } - - return tty_write__enter(buf, count, f); + return tty_write__enter(iocb, from); } SEC("kprobe/tty_write") -int BPF_KPROBE(kprobe__tty_write) +int BPF_KPROBE(kprobe__tty_write, struct kiocb *iocb, struct iov_iter *from) { - const char *buf; - ssize_t count; - struct file *f; - - if (FUNC_ARG_EXISTS(redirected_tty_write, iter)) { - struct iov_iter ii; - if (FUNC_ARG_READ_PTREGS(ii, redirected_tty_write, iter)) { - bpf_printk("kprobe__tty_write: error reading iov_iter\n"); - goto out; - } - buf = BPF_CORE_READ(ii.iov, iov_base); - count = BPF_CORE_READ(ii.iov, iov_len); - - struct kiocb *iocb = (struct kiocb *)PT_REGS_PARM1(ctx); - f = BPF_CORE_READ(iocb, ki_filp); - } else { - if (FUNC_ARG_READ_PTREGS(buf, redirected_tty_write, buf)) { - bpf_printk("kprobe__tty_write: error reading buf\n"); - goto out; - } - if (FUNC_ARG_READ_PTREGS(count, redirected_tty_write, count)) { - bpf_printk("kprobe__tty_write: error reading count\n"); - goto out; - } - - f = (struct file *)PT_REGS_PARM1(ctx); - } - - return tty_write__enter(buf, count, f); - -out: - return 0; + return tty_write__enter(iocb, from); } diff --git a/README.md b/README.md index 8369cd7f..ac21740e 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ located under the `GPL/` directory while all non-GPL code is located under the ## Event Sourcing -On newer kernels (5.10.10+), Elastic endpoint uses eBPF to source the various +On newer kernels (5.10.16+), Elastic endpoint uses eBPF to source the various security events it ultimately sends up to an Elasticsearch cluster (e.g. process execution, file creation, file rename). On older kernels, this data is sourced via diff --git a/non-GPL/Events/EventsTrace/EventsTrace.c b/non-GPL/Events/EventsTrace/EventsTrace.c index 959739a8..60d5224f 100644 --- a/non-GPL/Events/EventsTrace/EventsTrace.c +++ b/non-GPL/Events/EventsTrace/EventsTrace.c @@ -18,7 +18,6 @@ #include #include -#include #include #include @@ -54,10 +53,6 @@ enum cmdline_opts { NETWORK_CONNECTION_ATTEMPTED, NETWORK_CONNECTION_ACCEPTED, NETWORK_CONNECTION_CLOSED, - - // Features - BPF_TRAMP, - CMDLINE_MAX }; @@ -78,10 +73,6 @@ static uint64_t cmdline_to_lib[CMDLINE_MAX] = { x(NETWORK_CONNECTION_ACCEPTED) x(NETWORK_CONNECTION_CLOSED) #undef x - -#define x(name) [name] = EBPF_FEATURE_##name, - x(BPF_TRAMP) -#undef x // clang-format on }; @@ -105,17 +96,13 @@ static const struct argp_option opts[] = { "Print network connection closed events", 0}, {"print-features-on-init", 'i', NULL, false, "Print a message with feature information when probes have been successfully loaded", 1}, - {"features-autodetect", 'd', NULL, false, "Autodetect features based on running kernel", 1}, - {"set-bpf-tramp", EBPF_FEATURE_BPF_TRAMP, NULL, false, "Set feature supported: bpf trampoline", - 1}, {"unbuffer-stdout", 'u', NULL, false, "Disable userspace stdout buffering", 2}, {"libbpf-verbose", 'v', NULL, false, "Log verbose libbpf logs to stderr", 2}, {}, }; -uint64_t g_events_env = 0; -uint64_t g_features_env = 0; -uint64_t g_features_autodetect = 0; +uint64_t g_events_env = 0; +uint64_t g_features_env = 0; bool g_print_features_init = 0; bool g_unbuffer_stdout = 0; @@ -136,9 +123,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'a': g_events_env = UINT64_MAX; break; - case 'd': - g_features_autodetect = 1; - break; case FILE_DELETE: case FILE_CREATE: case FILE_RENAME: @@ -154,9 +138,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case NETWORK_CONNECTION_CLOSED: g_events_env |= cmdline_to_lib[key]; break; - case BPF_TRAMP: - g_features_env |= cmdline_to_lib[key]; - break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -218,11 +199,6 @@ static void out_int(const char *name, const long value) printf("\"%s\":%ld", name, value); } -static void out_bool(const char *name, const bool value) -{ - printf("\"%s\":\"%s\"", name, value ? "TRUE" : "FALSE"); -} - static void out_string(const char *name, const char *value) { printf("\"%s\":\"", name); @@ -269,9 +245,6 @@ static void out_tty_dev(const char *name, struct ebpf_tty_dev *tty_dev) out_int("winsize_rows", tty_dev->winsize.rows); out_comma(); out_int("winsize_cols", tty_dev->winsize.cols); - out_comma(); - out_bool("ECHO", tty_dev->termios.c_lflag & ECHO); - out_object_end(); } @@ -720,12 +693,7 @@ int main(int argc, char **argv) if (g_libbpf_verbose) ebpf_set_verbose_logging(); - struct ebpf_event_ctx_opts opts = {.events = g_events_env, .features = g_features_env}; - - if (g_features_autodetect) - ebpf_detect_system_features(&opts.features); - - err = ebpf_event_ctx__new(&ctx, event_ctx_callback, opts); + err = ebpf_event_ctx__new(&ctx, event_ctx_callback, g_events_env); if (err < 0) { fprintf(stderr, "Could not create event context: %d %s\n", err, strerror(-err)); @@ -733,7 +701,7 @@ int main(int argc, char **argv) } if (g_print_features_init) - print_init_msg(opts.features); + print_init_msg(ebpf_event_ctx__get_features(ctx)); while (!exiting) { err = ebpf_event_ctx__next(ctx, 10); diff --git a/non-GPL/Events/Lib/EbpfEvents.c b/non-GPL/Events/Lib/EbpfEvents.c index ded31aec..471d09fe 100644 --- a/non-GPL/Events/Lib/EbpfEvents.c +++ b/non-GPL/Events/Lib/EbpfEvents.c @@ -21,6 +21,9 @@ #include "EventProbe.skel.h" +#define KERNEL_VERSION(maj, min, patch) \ + (((maj) << 16) | ((min) << 8) | (patch > 255 ? 255 : (patch))) + bool log_verbose = false; static int verbose(const char *fmt, ...); @@ -30,6 +33,7 @@ struct ring_buf_cb_ctx { }; struct ebpf_event_ctx { + uint64_t features; struct ring_buffer *ringbuf; struct EventProbe_bpf *probe; struct ring_buf_cb_ctx *cb_ctx; @@ -194,22 +198,6 @@ static int probe_fill_relos(struct btf *btf, struct EventProbe_bpf *obj) } err = err ?: FILL_FUNC_RET_IDX(obj, btf, vfs_rename); - /* From https://github.com/elastic/ebpf/pull/116#issue-1327583872 - * - * tty_write BTF info is not available on ARM64 kernels built - * with pahole < 1.22 due to a bug in pahole. - * Use redirected_tty_write BTF info as function signature check - * since it changes in the exact same version as tty_write (5.10.10-5.10.11) - * and has the same parameters/indexes we need. - * This could break in the future if any of the signature changes. - */ - if (FILL_FUNC_ARG_EXISTS(obj, btf, redirected_tty_write, iter)) { - err = err ?: FILL_FUNC_ARG_IDX(obj, btf, redirected_tty_write, buf); - err = err ?: FILL_FUNC_ARG_IDX(obj, btf, redirected_tty_write, count); - } else { - err = err ?: FILL_FUNC_ARG_IDX(obj, btf, redirected_tty_write, iter); - } - return err; } @@ -239,9 +227,12 @@ static inline int probe_set_autoload(struct btf *btf, struct EventProbe_bpf *obj err = err ?: bpf_program__set_autoload(obj->progs.fexit__tcp_v6_connect, false); } - // tty_write BTF information is not available on all supported kernels - // due to a pahole bug. - // If it is not present we can't attach a fentry/ program to it, so fallback to a kprobe. + // tty_write BTF information is not available on all supported kernels due + // to a pahole bug, see: + // https://rhysre.net/how-an-obscure-arm64-link-option-broke-our-bpf-probe.html + // + // If BTF is not present we can't attach a fentry/ program to it, so + // fallback to a kprobe. if (has_bpf_tramp && BTF_FUNC_EXISTS(btf, tty_write)) { err = err ?: bpf_program__set_autoload(obj->progs.kprobe__tty_write, false); } else { @@ -375,16 +366,122 @@ static bool system_has_bpf_tramp() return ret; } -int ebpf_detect_system_features(uint64_t *features) +static uint64_t detect_system_features() { - if (!features) - return -EINVAL; + uint64_t features = 0; - *features = 0; if (system_has_bpf_tramp()) - *features |= EBPF_FEATURE_BPF_TRAMP; + features |= EBPF_FEATURE_BPF_TRAMP; - return 0; + return features; +} + +static bool system_has_btf(void) +{ + struct btf *btf = btf__load_vmlinux_btf(); + if (libbpf_get_error(btf)) { + verbose("Kernel does not support BTF, bpf events are not supported\n"); + return false; + } else { + btf__free(btf); + return true; + } +} + +static uint64_t get_kernel_version(void) +{ + int maj = 0, min = 0, patch = 0; + + // Ubuntu kernels do not report the true upstream kernel source version in + // utsname.release, they report the "ABI version", which is the upstream + // kernel major.minor with some extra ABI information, e.g.: + // 5.15.0-48-generic. The upstream patch version is always set to 0. + // + // Ubuntu provides a file under procfs that reports the actual upstream + // source version, so we use that instead if it exists. + if (access("/proc/version_signature", R_OK) == 0) { + FILE *f = fopen("/proc/version_signature", "r"); + if (f) { + // Example: Ubuntu 5.15.0-48.54-generic 5.15.53 + if (fscanf(f, "%*s %*s %d.%d.%d\n", &maj, &min, &patch) == 3) { + fclose(f); + return KERNEL_VERSION(maj, min, patch); + } + + fclose(f); + } + + verbose("Ubuntu version file exists but could not be parsed, using uname\n"); + } + + struct utsname un; + if (uname(&un) == -1) { + verbose("uname failed: %d: %s\n", errno, strerror(errno)); + return 0; + } + + char *debian_start = strstr(un.version, "Debian"); + if (debian_start != NULL) { + // We're running on Debian. + // + // Like Ubuntu, what Debian reports in the un.release buffer is the + // "ABI version", which is the major.minor of the upstream, with the + // patch always set to 0 (and some further ABI numbers). e.g.: + // 5.10.0-18-amd64 + // + // See the following docs for more info: + // https://kernel-team.pages.debian.net/kernel-handbook/ch-versions.html + // + // Unlike Ubuntu, Debian does not provide a special procfs file + // indicating the actual upstream source. Instead, it puts the actual + // upstream source version into the un.version field, after the string + // "Debian": + // + // $ uname -a + // Linux bullseye 5.10.0-18-amd64 #1 SMP Debian 5.10.140-1 (2022-09-02) x86_64 GNU/Linux + // + // $ uname -v + // #1 SMP Debian 5.10.140-1 (2022-09-02) + // + // Due to this, we pull the upstream kernel source out of un.version here. + if (sscanf(debian_start, "Debian %d.%d.%d", &maj, &min, &patch) != 3) { + verbose("could not parse uname version string: %s\n", un.version); + return 0; + } + + return KERNEL_VERSION(maj, min, patch); + } + + // We're not on Ubuntu or Debian, un.release should tell us the actual + // upstream source + if (sscanf(un.release, "%d.%d.%d", &maj, &min, &patch) != 3) { + verbose("could not parse uname release string: %d: %s\n", errno, strerror(errno)); + return 0; + } + + return KERNEL_VERSION(maj, min, patch); +} + +static bool kernel_version_is_supported(void) +{ + // We only support Linux 5.10.16+ + // + // Linux commit e114dd64c0071500345439fc79dd5e0f9d106ed (went in in + // 5.11/5.10.16) fixed a verifier bug that (as of 9/28/2022) causes our + // probes to fail to load. + // + // Theoretically, we could push support back to 5.8 without any + // foundational changes (the BPF ringbuffer was added in 5.8, we'd need to + // use per-cpu perfbuffers prior to that), but, for the time being, it's + // been decided that this is more hassle than it's worth. + uint64_t kernel_version = get_kernel_version(); + if (kernel_version < KERNEL_VERSION(5, 10, 16)) { + verbose("kernel version is < 5.10.16 (version code: %x), bpf events are not supported\n", + kernel_version); + return false; + } + + return true; } static int libbpf_verbose_print(enum libbpf_print_level lvl, const char *fmt, va_list args) @@ -410,13 +507,26 @@ int ebpf_set_verbose_logging() return 0; } -int ebpf_event_ctx__new(struct ebpf_event_ctx **ctx, - ebpf_event_handler_fn cb, - struct ebpf_event_ctx_opts opts) +uint64_t ebpf_event_ctx__get_features(struct ebpf_event_ctx *ctx) +{ + return ctx->features; +} + +int ebpf_event_ctx__new(struct ebpf_event_ctx **ctx, ebpf_event_handler_fn cb, uint64_t events) { struct EventProbe_bpf *probe = NULL; struct btf *btf = NULL; + // Our probes aren't 100% guaranteed to load if these two facts are true + // e.g. maybe someone compiled a kernel without kprobes or bpf trampolines. + // However, checking these two things should cover the vast majority of + // failure cases, allowing us to print a more understandable message than + // what you'd get if you just tried to load the probes. + if (!kernel_version_is_supported() || !system_has_btf()) { + verbose("this system does not support BPF events (see logs)\n"); + return -ENOTSUP; + } + // ideally we'd be calling // // ```c @@ -442,6 +552,8 @@ int ebpf_event_ctx__new(struct ebpf_event_ctx **ctx, if (err != 0) goto out_destroy_probe; + uint64_t features = detect_system_features(); + btf = btf__load_vmlinux_btf(); if (libbpf_get_error(btf)) { verbose("could not load system BTF (does the kernel have BTF?)"); @@ -464,7 +576,7 @@ int ebpf_event_ctx__new(struct ebpf_event_ctx **ctx, if (err != 0) goto out_destroy_probe; - err = probe_set_autoload(btf, probe, opts.features); + err = probe_set_autoload(btf, probe, features); if (err != 0) goto out_destroy_probe; @@ -484,8 +596,9 @@ int ebpf_event_ctx__new(struct ebpf_event_ctx **ctx, err = -ENOMEM; goto out_destroy_probe; } - (*ctx)->probe = probe; - probe = NULL; + (*ctx)->probe = probe; + (*ctx)->features = features; + probe = NULL; struct ring_buffer_opts rb_opts; rb_opts.sz = sizeof(rb_opts); @@ -497,7 +610,7 @@ int ebpf_event_ctx__new(struct ebpf_event_ctx **ctx, } (*ctx)->cb_ctx->cb = cb; - (*ctx)->cb_ctx->events_mask = opts.events; + (*ctx)->cb_ctx->events_mask = events; (*ctx)->ringbuf = ring_buffer__new(bpf_map__fd((*ctx)->probe->maps.ringbuf), ring_buf_cb, (*ctx)->cb_ctx, &rb_opts); diff --git a/non-GPL/Events/Lib/EbpfEvents.h b/non-GPL/Events/Lib/EbpfEvents.h index 9aba0160..c9b4a465 100644 --- a/non-GPL/Events/Lib/EbpfEvents.h +++ b/non-GPL/Events/Lib/EbpfEvents.h @@ -24,16 +24,9 @@ struct ebpf_event_ctx; typedef int (*ebpf_event_handler_fn)(struct ebpf_event_header *); -struct ebpf_event_ctx_opts { - uint64_t events; - uint64_t features; -}; - /* Turn on logging of all libbpf debug logs to stderr */ int ebpf_set_verbose_logging(); -int ebpf_detect_system_features(uint64_t *features); - /* Allocates a new context based on requested events and capabilities. * * If ctx is NULL, the function returns right after loading and attaching the @@ -43,9 +36,9 @@ int ebpf_detect_system_features(uint64_t *features); * on success. Returns an error on failure. If ctx is NULL, * returns 0 on success or less than 0 on failure. */ -int ebpf_event_ctx__new(struct ebpf_event_ctx **ctx, - ebpf_event_handler_fn cb, - struct ebpf_event_ctx_opts opts); +int ebpf_event_ctx__new(struct ebpf_event_ctx **ctx, ebpf_event_handler_fn cb, uint64_t events); + +uint64_t ebpf_event_ctx__get_features(struct ebpf_event_ctx *ctx); /* Consumes as many events as possible from the event context and returns the * number consumed. diff --git a/testing/testrunner/eventstrace.go b/testing/testrunner/eventstrace.go index 97b53302..04c81663 100644 --- a/testing/testrunner/eventstrace.go +++ b/testing/testrunner/eventstrace.go @@ -132,7 +132,7 @@ func (et *EventsTraceInstance) Stop() error { func NewEventsTrace(ctx context.Context, args ...string) *EventsTraceInstance { var et EventsTraceInstance - args = append(args, "--print-features-on-init", "--unbuffer-stdout", "--libbpf-verbose", "--features-autodetect") + args = append(args, "--print-features-on-init", "--unbuffer-stdout", "--libbpf-verbose") et.Cmd = exec.CommandContext(ctx, eventsTraceBinPath, args...) stdout, err := et.Cmd.StdoutPipe()