Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

luzer: reserve and handoff ctrs to lf #2

Merged
merged 2 commits into from
Dec 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Documentation with usecases, API etc.
- Support for command line arguments for libfuzzer.
- Environment variable to disable parsing of command line arguments for libfuzzer - `LUZER_NOT_USE_CLI_ARGS_FOR_LF`.
- Two ways to approximate amount of counters for interpreted code.

### Fixed
- Interfering coverage instrumentation of fuzzer internals (#11)
- Interpreted code counter never handed to libfuzzer. (#12)
- Bad lifetime and initization of struct sigaction.
1 change: 1 addition & 0 deletions luzer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ set(LUZER_SOURCES luzer.c
tracer.c
counters.c
luzer_args.c
io.cc
${CMAKE_CURRENT_BINARY_DIR}/version.c)

add_library(${CMAKE_PROJECT_NAME} SHARED ${LUZER_SOURCES})
Expand Down
71 changes: 71 additions & 0 deletions luzer/io.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* SPDX-License-Identifier: ISC
*
*/
#include <vector>
#include <string>
#include <cstdint>
/**
* Okay, we all know this is bad, but unless we want to include third-party
* headers or libs to do crossplatform IO (damn Windows cannot into readdir)
* we better use whatever libfuzzer... shyly gives to us with no guarantees.
* Remember - those things do not have ATTRIBUTE_INTERFACE in LF's codebase.
* Bu-u-u-ut libfuzzer is pretty much in maintenance mode so I think it's
* safe.
* What's worse than using non-public-API is using C++. But this project already
* uses clang++ with 'fuzzed_data_provider.cc'. Hey, libfuzzer IS written in C++.
*/

extern "C" {
#include "macros.h"

int map_over_dir_contents(char const *dirpath, int (*user_cb)(uint8_t const *data, size_t length));
}

/**
* See link for source of this
* https://github.com/llvm/llvm-project/blob/493cc71d72c471c841b490f30dd8f26f3a0d89de/compiler-rt/lib/fuzzer/FuzzerDefs.h#L41
*/
typedef std::vector<uint8_t> Unit;

/**
* See link for source of this
* https://github.com/llvm/llvm-project/blob/493cc71d72c471c841b490f30dd8f26f3a0d89de/compiler-rt/lib/fuzzer/FuzzerIO.cpp#L101
*/
namespace fuzzer {
void ReadDirToVectorOfUnits(
const char *Path,
std::vector<Unit> *V, long *Epoch,
size_t MaxSize,
bool ExitOnError,
std::vector<std::string> *VPaths);
bool IsDirectory(const std::string &Path);
}

NO_SANITIZE int
map_over_dir_contents(char const *dirpath, int (*user_cb)(uint8_t const * data, size_t length))
{
if (nullptr == user_cb || nullptr == dirpath) {
return -1;
}

if (!fuzzer::IsDirectory(dirpath)) {
return -2;
}

std::vector<Unit> seed_corpus;
fuzzer::ReadDirToVectorOfUnits(
dirpath,
&seed_corpus,
/*Epoch = */nullptr,
/*MaxSize = */SIZE_MAX,
/*ExitOnError = */false,
/*VPaths = */nullptr
);

for (auto unit : seed_corpus) {
user_cb(unit.data(), unit.size());
}
return 0;
}

170 changes: 153 additions & 17 deletions luzer/luzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include "luzer_args.h"
#include "luzer.h"

#define GLOBAL_BYTECODE_TO_COUNTERS_SCALE 4

#define TEST_ONE_INPUT_FUNC "luzer_test_one_input"
#define CUSTOM_MUTATOR_FUNC "luzer_custom_mutator"
#define CUSTOM_MUTATOR_LIB "libcustom_mutator.so.1"
Expand Down Expand Up @@ -231,15 +233,6 @@ luaL_test_one_input(lua_State *L)

NO_SANITIZE int
TestOneInput(const uint8_t* data, size_t size) {
const counter_and_pc_table_range alloc = allocate_counters_and_pcs();
if (alloc.counters_start && alloc.counters_end) {
__sanitizer_cov_8bit_counters_init(alloc.counters_start,
alloc.counters_end);
}
if (alloc.pctable_start && alloc.pctable_end) {
__sanitizer_cov_pcs_init(alloc.pctable_start, alloc.pctable_end);
}

lua_State *L = get_global_lua_state();
char *buf = calloc(size + 1, sizeof(char));
memcpy(buf, data, size);
Expand Down Expand Up @@ -325,6 +318,133 @@ load_custom_mutator_lib(void) {
return 0;
}

/**
* Tries to asses how much bytecode there are loaded.
*
* I looked into lua's introspection capabilities, could not find anything good.
* There is https://github.com/leegao/see.lua but I don't think it is a good idea
* to make a PR with code for 5 different versions of a library to someone else's lib.
* Their idea is simple - decode bytecode in runtime.
* There is also https://github.com/siffiejoe/lua-getsize but reasoning is the same.
* They take signatures for 'struct Prototype' with them. Having them allow to
* see what Lua interpreter thinks sizes are.
* So here we sit, in quite a pickle, yearning for a lua-native crossplatform solution.
*
* Basically, this is stupid and straigtforward - table tree walk from '_G'.
* '_G' is Lua's special table for global stuff.
* 'string.dump' works even in latest LuaJIT. Bytecode is not crossplatform but we don't
* need that.
* This will count everything in global scope and in proper packages due to 'package.loaded'.
* I found no way to access anything local without a reference to an activation record.
*
* It may be possible to find every stack somehow and walk every frame and do a 'getlocal'
* and 'getupvalue' on them. No 'getstack' from within Lua tho, so one will have to write
* that in C.
* With 'struct Prototype' locals would be a cakewalk.
*
* This also can be written in C, but I see no reason for it. It should run only once.
* And C implementation would require much more time.
*/
NO_SANITIZE static inline __attribute__((unused)) int
lua_approx_global_bytecode_size(lua_State *L)
{
int error = 0;
static char const lua_func_source[] = ""
"function _CountGlobalBytecodeSize()\n"
"local seen = {}\n"
"local bytecode_size = 0\n"
"local function what(x) return debug.getinfo(x, 'S').what end\n"
"local function recurse(table_to_count, tables_to_recurse)\n"
"if table_to_count == nil and #tables_to_recurse == 0 then\n"
"return\n"
"end\n"
"seen[table_to_count] = true\n"
"for k, v in pairs(table_to_count) do\n"
"if type(v) == 'function' and what(v) == 'Lua' then\n"
"-- we dont care for already-seen funcs\n"
"bytecode_size = bytecode_size + string.len(string.dump(v))\n"
"end\n"
"if type(v) == 'table' and not seen[v] then\n"
"tables_to_recurse[#tables_to_recurse+1] = v\n"
"seen[v]=true\n"
"end\n"
"end\n"
"local next_table = table.remove(tables_to_recurse)\n"
"-- tail call is expected\n"
"return recurse(next_table, tables_to_recurse)\n"
"end\n"
"recurse(_G, {})\n"
"return bytecode_size\n"
"end\n"
"return _CountGlobalBytecodeSize()\n"
"";
error = luaL_loadbuffer(L, lua_func_source, strlen(lua_func_source), "line") || lua_pcall(L, 0, 1, 0);
if (error) {
fprintf(stderr, "%s", lua_tostring(L, -1));
lua_pop(L, 1); /* pop error message from the stack */
return -1;
}
/* NOTE: there is no guarantees for lua_Number type
* it is usually 'double', but totally okay for lua install to have it be 'float' or even 'long'.
* Any C compile-time checks I know would require C11 compiler and even then will just produce warn
*/
lua_Number inner_lua_retval = lua_tonumber(L, -1);
lua_pop(L, 1);
/* let compiler do the implicit conversion and remember we theoretically can be too large for int */
return inner_lua_retval;
}


extern int map_over_dir_contents(char const *dirpath, int (*user_cb)(uint8_t const * data, size_t length));

/**
* Runs target over some inputs to assess how much counters we really need
*
* Now, without interpreter introspection, another way to count how much counters
* we need is to... simply count how much can we trigger. LF doesn't have special run
* modes for this; so we do this hack-y way. Alternative hook just counts trigger times,
* not unique positions, so we should probably need no additional multipliers to get
* less collisions.
*
* All regular files below the path (means recursive walk) would be used as a seed input.
*/
NO_SANITIZE static inline int
lua_preseed_counters(lua_State *L, char const * seed_dir_path)
{
int retval = 0;
char const * path_copy = strdup(seed_dir_path);
if (NULL == path_copy) {
return -3;
}
lua_sethook(L, collector_debug_hook, LUA_MASKCALL | LUA_MASKLINE, 0);
retval = map_over_dir_contents(path_copy, TestOneInput);
free((void*)path_copy);
lua_sethook(L, NULL, 0, 0);
return retval;
}

NO_SANITIZE static inline int
lua_ctrs_alloc_notify_lf(lua_State *L)
{
static int init_cntr = 0;
static counter_and_pc_table_range alloc;
if (0 == init_cntr) {
alloc = allocate_counters_and_pcs();
init_cntr = 1;
}
if (alloc.counters_start && alloc.counters_end) {
__sanitizer_cov_8bit_counters_init(alloc.counters_start, alloc.counters_end);
} else {
luaL_error(L, "counters not allocated");
}
if (alloc.pctable_start && alloc.pctable_end) {
__sanitizer_cov_pcs_init(alloc.pctable_start, alloc.pctable_end);
} else {
luaL_error(L, "pcs not allocated");
}
return 0;
}

NO_SANITIZE static int
luaL_fuzz(lua_State *L)
{
Expand All @@ -348,16 +468,10 @@ luaL_fuzz(lua_State *L)
}
lua_setglobal(L, TEST_ONE_INPUT_FUNC);

/**
* Hook is called when the Lua interpreter calls a function and when the
* interpreter is about to start the execution of a new line of code, or
* when it jumps back in the code (even to the same line).
* https://www.lua.org/pil/23.2.html
*/
lua_sethook(L, debug_hook, LUA_MASKCALL | LUA_MASKLINE, 0);
lua_pushboolean(L, 1);

struct sigaction act;
/* this should have a proper lifetime and at least zero-initialization */
static struct sigaction act;
act.sa_handler = sig_handler;
sigaction(SIGINT, &act, NULL);
sigaction(SIGSEGV, &act, NULL);
Expand All @@ -369,6 +483,28 @@ luaL_fuzz(lua_State *L)
lua_pop(L, -1);

set_global_lua_state(L);

/* now we need to allocate counters for interpreted code
* but how much? let us try to approximate */
/* strategy 1: scan lua interpreter from inside, count bytecode */
reserve_counters(lua_approx_global_bytecode_size(L) * GLOBAL_BYTECODE_TO_COUNTERS_SCALE);

/* strategy 2: run the target with select inputs, count how many times the hook even triggers */
if (NULL != corpus_path) {
if (lua_preseed_counters(L, corpus_path)) {
fprintf(stderr, "WARN: luzer tried but failed to preseed counters\n");
}
}
lua_ctrs_alloc_notify_lf(L);

/**
* Hook is called when the Lua interpreter calls a function and when the
* interpreter is about to start the execution of a new line of code, or
* when it jumps back in the code (even to the same line).
* https://www.lua.org/pil/23.2.html
*/
lua_sethook(L, debug_hook, LUA_MASKCALL | LUA_MASKLINE, 0);

int rc = LLVMFuzzerRunDriver(&argc, &argv, &TestOneInput);
luaL_cleanup(L);

Expand Down
20 changes: 18 additions & 2 deletions luzer/tracer.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ _trace_branch(uint64_t idx)
increment_counter(idx);
}

static inline unsigned int lhash(const char *key, size_t offset)
NO_SANITIZE static inline unsigned int
lhash(const char *key, size_t offset)
{
const char *const last = &key[strlen(key) - 1];
uint32_t h = LHASH_INIT;
Expand All @@ -61,11 +62,26 @@ static inline unsigned int lhash(const char *key, size_t offset)
* https://github.com/lunarmodules/luacov/blob/master/src/luacov/runner.lua#L102-L117
* https://github.com/lunarmodules/luacov/blob/78f3d5058c65f9712e6c50a0072ad8160db4d00e/src/luacov/runner.lua#L439-L450
*/
void debug_hook(lua_State *L, lua_Debug *ar)
NO_SANITIZE void
debug_hook(lua_State *L, lua_Debug *ar)
{
lua_getinfo(L, "Sln", ar);
if (ar && ar->source && ar->currentline) {
const unsigned int new_location = lhash(ar->source, ar->currentline);
_trace_branch(new_location);
}
}

/**
* this one is used before we allocate counters to get general idea
* about how much of them do we need for interpreted code
*/
NO_SANITIZE void
collector_debug_hook(lua_State *L, lua_Debug *ar)
{
lua_getinfo(L, "Sln", ar);
if (ar && ar->source && ar->currentline) {
reserve_counter();
}
}

1 change: 1 addition & 0 deletions luzer/tracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
#define LUZER_TRACER_H_

void debug_hook(lua_State *L, lua_Debug *ar);
void collector_debug_hook(lua_State *L, lua_Debug *ar);

#endif // LUZER_TRACER_H_
Loading