diff --git a/CHANGELOG.md b/CHANGELOG.md index c40732f..fd18e2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,3 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Integration with libFuzzer's `FuzzedDataProvider`. - Examples with tests. - Documentation with usecases, API etc. +- Two ways to approximate amount of counters for interpreted code. + +### Fixed +- Interpreted code counter never handed to libfuzzer. (#12) +- Bad lifetime and initization of struct sigaction. diff --git a/luzer/CMakeLists.txt b/luzer/CMakeLists.txt index 785a9e2..ca8b7da 100644 --- a/luzer/CMakeLists.txt +++ b/luzer/CMakeLists.txt @@ -23,6 +23,7 @@ set(LUZER_SOURCES luzer.c fuzzed_data_provider.cc tracer.c counters.c + io.cc ${CMAKE_CURRENT_BINARY_DIR}/version.c) add_library(${CMAKE_PROJECT_NAME} SHARED ${LUZER_SOURCES}) diff --git a/luzer/counters.c b/luzer/counters.c index 68d1ebb..3701b64 100644 --- a/luzer/counters.c +++ b/luzer/counters.c @@ -22,15 +22,15 @@ void __sanitizer_cov_pcs_init(uint8_t* pcs_beg, uint8_t* pcs_end); } /* extern "C" */ #endif -static const int kDefaultNumCounters = 1 << 20; +static const size_t kDefaultNumCounters = 1 << 20; // Number of counters requested by Lua instrumentation. -int counter_index = 0; +size_t counter_index = 0; // Number of counters given to Libfuzzer. -int counter_index_registered = 0; +size_t counter_index_registered = 0; // Maximum number of counters and pctable entries that may be reserved and also // the number that are allocated. -int max_counters = 0; +size_t max_counters = 0; // Counter Allocations. These are allocated once, before __sanitize_... are // called and can only be deallocated by test_only_reset_counters. unsigned char* counters = NULL; @@ -51,32 +51,34 @@ test_only_reset_counters(void) { counter_index_registered = 0; } -NO_SANITIZE int -reserve_counters(int counters) { +NO_SANITIZE size_t +reserve_counters(size_t amount) { int ret = counter_index; - counter_index += counters; + counter_index += amount; return ret; } -NO_SANITIZE int +NO_SANITIZE size_t reserve_counter(void) { return counter_index++; } NO_SANITIZE void -increment_counter(int counter_index) +increment_counter(size_t index) { - if (counters != NULL && pctable != NULL) { - // `counters` is an allocation of length `max_counters`. If we reserve more - // than the allocated number of counters, we'll wrap around and overload - // old counters, trading away fuzzing quality for limits on memory usage. - counters[counter_index % max_counters]++; + if (counters != NULL) { + // Global array `counters` is an allocation of length `max_counters`. + // But we use only registered amount of them. + // If we reserve more than the allocated number of counters, we'll wrap + // around and overload old counters, trading away fuzzing quality + // for limits on memory usage. + counters[index % counter_index_registered]++; } } NO_SANITIZE void -set_max_counters(int max) +set_max_counters(size_t max) { if (counters != NULL && pctable != NULL) { fprintf(stderr, "Internal error: attempt to set max number of counters after " @@ -89,7 +91,7 @@ set_max_counters(int max) max_counters = max; } -NO_SANITIZE int +NO_SANITIZE size_t get_max_counters(void) { return max_counters; @@ -122,7 +124,7 @@ allocate_counters_and_pcs(void) { } } - const int next_index = MIN(counter_index, max_counters); + const size_t next_index = MIN(counter_index, max_counters); if (counter_index_registered >= next_index) { // There are no counters to pass. Perhaps because we've reserved more than // max_counters, or because no counters have been reserved since this was diff --git a/luzer/counters.h b/luzer/counters.h index 4bfa9f4..4786c5c 100644 --- a/luzer/counters.h +++ b/luzer/counters.h @@ -1,5 +1,6 @@ #ifndef LUZER_COUNTERS_H_ #define LUZER_COUNTERS_H_ +#include struct PCTableEntry { void* pc; @@ -8,21 +9,21 @@ struct PCTableEntry { // Sets the global number of counters. // Must not be called after InitializeCountersWithLLVM is called. -void set_max_counters(int max); +void set_max_counters(size_t max); // Returns the maximum number of allocatable luzer counters. If more than this // many counters are reserved, luzer reuses counters, lowering fuzz quality. -int get_max_counters(void); +size_t get_max_counters(void); // Returns a new counter index. -int reserve_counter(void); +size_t reserve_counter(void); // Reserves a number of counters with contiguous indices, and returns the first // index. -int reserve_counters(int counters); +size_t reserve_counters(size_t amount); // Increments a counter at the given index. If more than the maximum number of // counters has been reserved, reuse counters. -void increment_counter(int counter_index); +void increment_counter(size_t index); typedef struct counter_and_pc_table_range { unsigned char* counters_start; diff --git a/luzer/io.cc b/luzer/io.cc new file mode 100644 index 0000000..69c1b7b --- /dev/null +++ b/luzer/io.cc @@ -0,0 +1,91 @@ +/* + * SPDX-License-Identifier: ISC + * + */ +#include +#include +#include +/** + * Okay, we all know this is bad, but unless we want to include third-party + * headers or libs to do crossplatform IO (damn Windows cannot into readdir) + * we better use whatever libfuzzer... shyly gives to us with no guarantees. + * Remember - those things do not have ATTRIBUTE_INTERFACE in LF's codebase. + * Bu-u-u-ut libfuzzer is pretty much in maintenance mode so I think it's + * safe. + * What's worse than using non-public-API is using C++. But this project already + * uses clang++ with 'fuzzed_data_provider.cc'. Hey, libfuzzer IS written in C++. + */ + +extern "C" { +#include "macros.h" + + int map_over_dir_contents(char const *dirpath, int (*user_cb)(uint8_t const *data, size_t length)); +} + +/** + * See links for source of this + * https://github.com/llvm/llvm-project/blob/493cc71d72c471c841b490f30dd8f26f3a0d89de/compiler-rt/lib/fuzzer/FuzzerIO.cpp#L101 + * https://github.com/llvm/llvm-project/blob/493cc71d72c471c841b490f30dd8f26f3a0d89de/compiler-rt/lib/fuzzer/FuzzerDefs.h#L41 + */ +namespace fuzzer { +#if __clang_major__ <= 13 + template + class fuzzer_allocator: public std::allocator { + public: + fuzzer_allocator() = default; + + template + fuzzer_allocator(const fuzzer_allocator&) {} + + template + struct rebind { typedef fuzzer_allocator other; }; + }; + + template + using Vector = std::vector>; +#else // __clang_major__ <= 13 + template + using Vector = std::vector; +#endif + + typedef Vector Unit; + + void ReadDirToVectorOfUnits( + const char *Path, + Vector *V, + long *Epoch, + size_t MaxSize, + bool ExitOnError, + Vector *VPaths = 0 + ); + + bool IsDirectory(const std::string &Path); +} + +NO_SANITIZE int +map_over_dir_contents(char const *dirpath, int (*user_cb)(uint8_t const * data, size_t length)) +{ + if (nullptr == user_cb || nullptr == dirpath) { + return -1; + } + + if (!fuzzer::IsDirectory(dirpath)) { + return -2; + } + + fuzzer::Vector seed_corpus; + + fuzzer::ReadDirToVectorOfUnits( + dirpath, + &seed_corpus, + /*Epoch = */nullptr, + /*MaxSize = */SIZE_MAX, + /*ExitOnError = */false, + /*VPaths = */nullptr + ); + + for (auto unit : seed_corpus) { + user_cb(unit.data(), unit.size()); + } + return 0; +} diff --git a/luzer/luzer.c b/luzer/luzer.c index e6e94d3..2a059ac 100644 --- a/luzer/luzer.c +++ b/luzer/luzer.c @@ -26,6 +26,8 @@ #include "version.h" #include "luzer.h" +#define GLOBAL_BYTECODE_TO_COUNTERS_SCALE 4 + #define TEST_ONE_INPUT_FUNC "luzer_test_one_input" #define CUSTOM_MUTATOR_FUNC "luzer_custom_mutator" #define CUSTOM_MUTATOR_LIB "libcustom_mutator.so.1" @@ -230,15 +232,6 @@ luaL_test_one_input(lua_State *L) NO_SANITIZE int TestOneInput(const uint8_t* data, size_t size) { - const counter_and_pc_table_range alloc = allocate_counters_and_pcs(); - if (alloc.counters_start && alloc.counters_end) { - __sanitizer_cov_8bit_counters_init(alloc.counters_start, - alloc.counters_end); - } - if (alloc.pctable_start && alloc.pctable_end) { - __sanitizer_cov_pcs_init(alloc.pctable_start, alloc.pctable_end); - } - lua_State *L = get_global_lua_state(); char *buf = calloc(size + 1, sizeof(char)); memcpy(buf, data, size); @@ -324,6 +317,133 @@ load_custom_mutator_lib(void) { return 0; } +/** + * Tries to asses how much bytecode there are loaded. + * + * I looked into lua's introspection capabilities, could not find anything good. + * There is https://github.com/leegao/see.lua but I don't think it is a good idea + * to make a PR with code for 5 different versions of a library to someone else's lib. + * Their idea is simple - decode bytecode in runtime. + * There is also https://github.com/siffiejoe/lua-getsize but reasoning is the same. + * They take signatures for 'struct Prototype' with them. Having them allow to + * see what Lua interpreter thinks sizes are. + * So here we sit, in quite a pickle, yearning for a lua-native crossplatform solution. + * + * Basically, this is stupid and straigtforward - table tree walk from '_G'. + * '_G' is Lua's special table for global stuff. + * 'string.dump' works even in latest LuaJIT. Bytecode is not crossplatform but we don't + * need that. + * This will count everything in global scope and in proper packages due to 'package.loaded'. + * I found no way to access anything local without a reference to an activation record. + * + * It may be possible to find every stack somehow and walk every frame and do a 'getlocal' + * and 'getupvalue' on them. No 'getstack' from within Lua tho, so one will have to write + * that in C. + * With 'struct Prototype' locals would be a cakewalk. + * + * This also can be written in C, but I see no reason for it. It should run only once. + * And C implementation would require much more time. + */ +NO_SANITIZE static inline __attribute__((unused)) int +lua_approx_global_bytecode_size(lua_State *L) +{ + int error = 0; + static char const lua_func_source[] = "" + "function _CountGlobalBytecodeSize()\n" + "local seen = {}\n" + "local bytecode_size = 0\n" + "local function what(x) return debug.getinfo(x, 'S').what end\n" + "local function recurse(table_to_count, tables_to_recurse)\n" + "if table_to_count == nil and #tables_to_recurse == 0 then\n" + "return\n" + "end\n" + "seen[table_to_count] = true\n" + "for k, v in pairs(table_to_count) do\n" + "if type(v) == 'function' and what(v) == 'Lua' then\n" + "-- we dont care for already-seen funcs\n" + "bytecode_size = bytecode_size + string.len(string.dump(v))\n" + "end\n" + "if type(v) == 'table' and not seen[v] then\n" + "tables_to_recurse[#tables_to_recurse+1] = v\n" + "seen[v]=true\n" + "end\n" + "end\n" + "local next_table = table.remove(tables_to_recurse)\n" + "-- tail call is expected\n" + "return recurse(next_table, tables_to_recurse)\n" + "end\n" + "recurse(_G, {})\n" + "return bytecode_size\n" + "end\n" + "return _CountGlobalBytecodeSize()\n" + ""; + error = luaL_loadbuffer(L, lua_func_source, strlen(lua_func_source), "line") || lua_pcall(L, 0, 1, 0); + if (error) { + fprintf(stderr, "%s", lua_tostring(L, -1)); + lua_pop(L, 1); /* pop error message from the stack */ + return -1; + } + /* NOTE: there is no guarantees for lua_Number type + * it is usually 'double', but totally okay for lua install to have it be 'float' or even 'long'. + * Any C compile-time checks I know would require C11 compiler and even then will just produce warn + */ + lua_Number inner_lua_retval = lua_tonumber(L, -1); + lua_pop(L, 1); + /* let compiler do the implicit conversion and remember we theoretically can be too large for int */ + return inner_lua_retval; +} + + +extern int map_over_dir_contents(char const *dirpath, int (*user_cb)(uint8_t const * data, size_t length)); + +/** + * Runs target over some inputs to assess how much counters we really need + * + * Now, without interpreter introspection, another way to count how much counters + * we need is to... simply count how much can we trigger. LF doesn't have special run + * modes for this; so we do this hack-y way. Alternative hook just counts trigger times, + * not unique positions, so we should probably need no additional multipliers to get + * less collisions. + * + * All regular files below the path (means recursive walk) would be used as a seed input. + */ +NO_SANITIZE static inline int +lua_preseed_counters(lua_State *L, char const * seed_dir_path) +{ + int retval = 0; + char const * path_copy = strdup(seed_dir_path); + if (NULL == path_copy) { + return -3; + } + lua_sethook(L, collector_debug_hook, LUA_MASKCALL | LUA_MASKLINE, 0); + retval = map_over_dir_contents(path_copy, TestOneInput); + free((void*)path_copy); + lua_sethook(L, NULL, 0, 0); + return retval; +} + +NO_SANITIZE static inline int +lua_ctrs_alloc_notify_lf(lua_State *L) +{ + static int init_cntr = 0; + static counter_and_pc_table_range alloc; + if (0 == init_cntr) { + alloc = allocate_counters_and_pcs(); + init_cntr = 1; + } + if (alloc.counters_start && alloc.counters_end) { + __sanitizer_cov_8bit_counters_init(alloc.counters_start, alloc.counters_end); + } else { + luaL_error(L, "counters not allocated"); + } + if (alloc.pctable_start && alloc.pctable_end) { + __sanitizer_cov_pcs_init(alloc.pctable_start, alloc.pctable_end); + } else { + luaL_error(L, "pcs not allocated"); + } + return 0; +} + NO_SANITIZE static int luaL_fuzz(lua_State *L) { @@ -394,16 +514,10 @@ luaL_fuzz(lua_State *L) } lua_setglobal(L, TEST_ONE_INPUT_FUNC); - /** - * Hook is called when the Lua interpreter calls a function and when the - * interpreter is about to start the execution of a new line of code, or - * when it jumps back in the code (even to the same line). - * https://www.lua.org/pil/23.2.html - */ - lua_sethook(L, debug_hook, LUA_MASKCALL | LUA_MASKLINE, 0); lua_pushboolean(L, 1); - struct sigaction act; + /* this should have a proper lifetime and at least zero-initialization */ + static struct sigaction act; act.sa_handler = sig_handler; sigaction(SIGINT, &act, NULL); sigaction(SIGSEGV, &act, NULL); @@ -415,6 +529,28 @@ luaL_fuzz(lua_State *L) lua_pop(L, -1); set_global_lua_state(L); + + /* now we need to allocate counters for interpreted code + * but how much? let us try to approximate */ + /* strategy 1: scan lua interpreter from inside, count bytecode */ + reserve_counters(lua_approx_global_bytecode_size(L) * GLOBAL_BYTECODE_TO_COUNTERS_SCALE); + + /* strategy 2: run the target with select inputs, count how many times the hook even triggers */ + if (NULL != corpus_path) { + if (lua_preseed_counters(L, corpus_path)) { + fprintf(stderr, "WARN: luzer tried but failed to preseed counters\n"); + } + } + lua_ctrs_alloc_notify_lf(L); + + /** + * Hook is called when the Lua interpreter calls a function and when the + * interpreter is about to start the execution of a new line of code, or + * when it jumps back in the code (even to the same line). + * https://www.lua.org/pil/23.2.html + */ + lua_sethook(L, debug_hook, LUA_MASKCALL | LUA_MASKLINE, 0); + int rc = LLVMFuzzerRunDriver(&argc, &argv, &TestOneInput); luaL_cleanup(L); diff --git a/luzer/tracer.c b/luzer/tracer.c index 793c5d7..a528421 100644 --- a/luzer/tracer.c +++ b/luzer/tracer.c @@ -41,7 +41,8 @@ _trace_branch(uint64_t idx) increment_counter(idx); } -static inline unsigned int lhash(const char *key, size_t offset) +NO_SANITIZE static inline unsigned int +lhash(const char *key, size_t offset) { const char *const last = &key[strlen(key) - 1]; uint32_t h = LHASH_INIT; @@ -61,7 +62,8 @@ static inline unsigned int lhash(const char *key, size_t offset) * https://github.com/lunarmodules/luacov/blob/master/src/luacov/runner.lua#L102-L117 * https://github.com/lunarmodules/luacov/blob/78f3d5058c65f9712e6c50a0072ad8160db4d00e/src/luacov/runner.lua#L439-L450 */ -void debug_hook(lua_State *L, lua_Debug *ar) +NO_SANITIZE void +debug_hook(lua_State *L, lua_Debug *ar) { lua_getinfo(L, "Sln", ar); if (ar && ar->source && ar->currentline) { @@ -69,3 +71,17 @@ void debug_hook(lua_State *L, lua_Debug *ar) _trace_branch(new_location); } } + +/** + * this one is used before we allocate counters to get general idea + * about how much of them do we need for interpreted code + */ +NO_SANITIZE void +collector_debug_hook(lua_State *L, lua_Debug *ar) +{ + lua_getinfo(L, "Sln", ar); + if (ar && ar->source && ar->currentline) { + reserve_counter(); + } +} + diff --git a/luzer/tracer.h b/luzer/tracer.h index 232d4a3..7d649fe 100644 --- a/luzer/tracer.h +++ b/luzer/tracer.h @@ -2,5 +2,6 @@ #define LUZER_TRACER_H_ void debug_hook(lua_State *L, lua_Debug *ar); +void collector_debug_hook(lua_State *L, lua_Debug *ar); #endif // LUZER_TRACER_H_