From cc00253240cbfecad45fbd9870541f1fa4ca4a23 Mon Sep 17 00:00:00 2001 From: AndrewQuijano Date: Fri, 3 Jan 2025 14:58:29 -0500 Subject: [PATCH] Adding dwarf2 and pri_taint debug prints to help with LAVA debugging --- panda/plugins/dwarf2/dwarf2.cpp | 137 ++++++++++++-------- panda/plugins/pri_taint/pri_taint.cpp | 179 +++++++++++++++++++------- 2 files changed, 215 insertions(+), 101 deletions(-) diff --git a/panda/plugins/dwarf2/dwarf2.cpp b/panda/plugins/dwarf2/dwarf2.cpp index 6a72013d465..c81e18c471d 100644 --- a/panda/plugins/dwarf2/dwarf2.cpp +++ b/panda/plugins/dwarf2/dwarf2.cpp @@ -1039,7 +1039,7 @@ void pri_dwarf_plog(const char *file_callee, const char *fn_callee, uint64_t lno Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; // create a call or ret message - if (isCall){ + if (isCall) { ple.dwarf2_call = dwarf; } else{ @@ -1850,6 +1850,7 @@ bool load_debug_info(const char *dbg_prefix, const char *basename, uint64_t base return true; } +// You need this code to run to fill out TaintQueryPri bool read_debug_info(const char* dbg_prefix, const char *basename, uint64_t base_address, bool needs_reloc) { printf ("read_debug_info %s\n", dbg_prefix); @@ -1879,6 +1880,7 @@ bool looking_for_libc=false; const char *libc_host_path=NULL; std::string libc_name; +// Call back to the loaded plugin void on_library_load(CPUState *cpu, target_ulong pc, char *guest_lib_name, target_ulong base_addr, target_ulong size) { printf ("on_library_load guest_lib_name=%s\n", guest_lib_name); if (!correct_asid(cpu)) { @@ -1890,7 +1892,7 @@ void on_library_load(CPUState *cpu, target_ulong pc, char *guest_lib_name, targe //printf("Trying to load symbols for %s at %#x.\n", lib_name, base_addr); std::string lib = std::string(guest_lib_name); std::size_t found = lib.find(guest_debug_path); - if (found == std::string::npos){ + if (found == std::string::npos) { char *lib_name = strdup((host_mount_path + lib).c_str()); printf("access(%s, F_OK): %x\n", lib_name, access(lib_name, F_OK)); if (access(lib_name, F_OK) == -1) { @@ -1899,9 +1901,7 @@ void on_library_load(CPUState *cpu, target_ulong pc, char *guest_lib_name, targe } if (looking_for_libc && lib.find(libc_name) != std::string::npos) { -// if (lib.find("libc-2.13") != std::string::npos) { lib_name = strdup(libc_host_path); -// lib_name = strdup("/mnt/lava-32-qcow/usr/lib/debug/lib/i386-linux-gnu/i686/cmov/libc-2.13.so"); printf ("actually loading lib_name = %s\n", lib_name); bool needs_reloc = true; // elf_base != base_addr; read_debug_info(lib_name, basename(lib_name), base_addr, needs_reloc); @@ -1931,34 +1931,50 @@ void on_library_load(CPUState *cpu, target_ulong pc, char *guest_lib_name, targe } // We want to catch all loaded modules, but don't want to -// check every single call. This is a compromise -- check -// every 1000 calls. If we had a callback in OSI for -// on_library_load we could do away with this hack. -int mod_check_count = 0; +// check every single call. We use a callback in OSI for +// on_library_load. bool main_exec_initialized = false; -#define MOD_CHECK_FREQ 1000 bool ensure_main_exec_initialized(CPUState *cpu) { //if (!correct_asid(cpu)) return; OsiProc *p = get_current_process(cpu); GArray *libs = NULL; libs = get_mappings(cpu, p); free_osiproc(p); - if (!libs) + if (!libs) { + printf("get_mappings failed\n"); return false; - - //printf("[ensure_main_exec_initialized] looking at libraries\n"); + } + printf("[ensure_main_exec_initialized] looking at libraries for %s\n", proc_to_monitor); for (unsigned i = 0; i < libs->len; i++) { char fname[260] = {}; OsiModule *m = &g_array_index(libs, OsiModule, i); - if (!m->file) continue; - if (!m->name) continue; - std::string lib = std::string(m->file); if (debug) { - printf("[ensure_main_exec_initialized] looking at file %s\n", m->file); + printf("Iteration %d within the for loop of libraries in main_exec_initialized\n", i); + } + if (!m->file) { + if (debug) { + printf("Invalid file from OsiModule\n"); + } + continue; + } + if (!m->name) { + if (debug) { + printf("Invalid name from OsiModule\n"); + } + continue; + } + std::string lib = std::string(m->file); + + if (0 != strncmp(m->name, proc_to_monitor, strlen(m->name))) { + if (debug) { + printf("[ensure_main_exec_initialized] looking at file %s, skip this\n", m->file); + printf("[ensure_main_exec_initialized] looking at name %s, skip this\n", m->name); + } + continue; } - if (0 != strncmp(m->name, proc_to_monitor, strlen(m->name))) continue; - //printf("[ensure_main_exec_initialized] looking at file %s\n", m->file); + printf("[ensure_main_exec_initialized] Found that file, time to try loading... %s\n", m->file); + //std::size_t found = lib.find(guest_debug_path); //if (found == std::string::npos) continue; //std::string host_name = lib.substr(0, found) + @@ -2062,12 +2078,15 @@ void on_call(CPUState *cpu, target_ulong pc) { if (it == line_range_list.end() || pc < it->lowpc ){ auto it_dyn = addr_to_dynl_function.find(pc); if (it_dyn != addr_to_dynl_function.end()){ - if (debug) printf ("CALL: Found line info for 0x" TARGET_FMT_lx "\n", pc); + if (debug) { + printf ("CALL: Found line info for 0x" TARGET_FMT_lx "\n", pc); + } pri_runcb_on_fn_start(cpu, pc, NULL, it_dyn->second.c_str()); } else { - if (debug) + if (debug) { printf("CALL: Could not find line info for 0x" TARGET_FMT_lx "\n", pc); + } } return; } @@ -2129,7 +2148,7 @@ void on_ret(CPUState *cpu, target_ulong pc_func) { std::string file_name = it->filename; std::string funct_name = funcaddrs[cur_function]; cur_line = it->line_number; - //printf("RET: [%s] [0x%llx]-%s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(),cur_function, funct_name.c_str(),cur_line,pc_func); + //printf("RET: [%s] [0x%llx]-%s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(),cur_function, funct_name.c_str(),cur_line,pc_func) if (logCallSites) { dwarf_log_callsite(cpu, file_name.c_str(), funct_name.c_str(), cur_line, false); } @@ -2141,10 +2160,10 @@ void __livevar_iter(CPUState *cpu, std::vector vars, liveVarCB f, void *args, - target_ulong fp){ + target_ulong fp) { //printf("size of vars: %ld\n", vars.size()); - for (auto it : vars){ - std::string var_name = it.var_name; + for (auto it : vars) { + std::string var_name = it.var_name; DwarfVarType var_type {type_map[it.fname][it.cu][it.var_type], it.dec_line, var_name}; //enum LocType { LocReg, LocMem, LocConst, LocErr }; target_ulong var_loc; @@ -2179,14 +2198,14 @@ int livevar_find(CPUState *cpu, std::vector vars, liveVarPred pred, void *args, - VarInfo &ret_var){ + VarInfo &ret_var) { target_ulong fp = dwarf2_get_cur_fp(cpu, pc); - if (fp == (target_ulong) -1){ + if (fp == (target_ulong) -1) { printf("Error: was not able to get the Frame Pointer for the function %s at @ 0x" TARGET_FMT_lx "\n", funcaddrs[cur_function].c_str(), pc); return 0; } - for (auto it : vars){ + for (auto it : vars) { target_ulong var_loc; //process_dwarf_locs(locdesc[i]->ld_s, locdesc[i]->ld_cents); //printf("\n"); @@ -2206,7 +2225,7 @@ int livevar_find(CPUState *cpu, * end PPPs ******************************************************************** */ int compare_address(void *var_ty, const char *var_nm, LocType loc_t, target_ulong loc, void *query_address){ - switch (loc_t){ + switch (loc_t) { case LocReg: break; case LocMem: @@ -2238,7 +2257,7 @@ void dwarf_get_vma_symbol (CPUState *cpu, target_ulong pc, target_ulong vma, cha //VarInfo ret_var = VarInfo(NULL, NULL, NULL, 0); VarInfo ret_var; - if (livevar_find(cpu, pc, funcvars[fn_address], compare_address, (void *) &vma, ret_var)){ + if (livevar_find(cpu, pc, funcvars[fn_address], compare_address, (void *) &vma, ret_var)) { *symbol_name = (char *)ret_var.var_name.c_str(); return; } @@ -2257,7 +2276,7 @@ void dwarf_get_pc_source_info(CPUState *cpu, target_ulong pc, SrcInfo *info, int return; } auto it = std::lower_bound(line_range_list.begin(), line_range_list.end(), pc, CompareRangeAndPC()); - if (it == line_range_list.end() || pc < it->lowpc ){ + if (it == line_range_list.end() || pc < it->lowpc) { auto it_dyn = addr_to_dynl_function.find(pc); if (it_dyn != addr_to_dynl_function.end()){ //printf("In a a plt function\n"); @@ -2272,7 +2291,7 @@ void dwarf_get_pc_source_info(CPUState *cpu, target_ulong pc, SrcInfo *info, int return; } - if (it->lowpc == it->highpc){ + if (it->lowpc == it->highpc) { //printf("In a a plt function\n"); *rc = 1; return; @@ -2289,11 +2308,11 @@ void dwarf_get_pc_source_info(CPUState *cpu, target_ulong pc, SrcInfo *info, int void dwarf_all_livevar_iter(CPUState *cpu, target_ulong pc, liveVarCB f, - void *args){ + void *args) { //void (*f)(const char *var_ty, const char *var_nm, LocType loc_t, target_ulong loc)){ - if (inExecutableSource){ + if (inExecutableSource) { target_ulong fp = dwarf2_get_cur_fp(cpu, pc); - if (fp == (target_ulong) -1){ + if (fp == (target_ulong) -1) { printf("Error: was not able to get the Frame Pointer for the function %s at @ 0x" TARGET_FMT_lx "\n", funcaddrs[cur_function].c_str(), pc); return; @@ -2307,8 +2326,10 @@ void dwarf_all_livevar_iter(CPUState *cpu, void dwarf_funct_livevar_iter(CPUState *cpu, target_ulong pc, liveVarCB f, - void *args){ - //printf("iterating through live vars\n"); + void *args) { + if (debug) { + printf("iterating through live vars\n"); + } if (inExecutableSource) { target_ulong fp = dwarf2_get_cur_fp(cpu, pc); if (fp == (target_ulong) -1){ @@ -2328,13 +2349,15 @@ void dwarf_global_livevar_iter(CPUState *cpu, } bool translate_callback_dwarf(CPUState *cpu, target_ulong pc) { - if (!correct_asid(cpu)) return false; - + if (!correct_asid(cpu)) { + return false; + } auto it2 = std::lower_bound(line_range_list.begin(), line_range_list.end(), pc, CompareRangeAndPC()); // after the call to lower_bound the `pc` should be between it2->lowpc and it2->highpc // if it2 == line_range_list.end() we know we definitely didn't find out pc in our line_range_list - if (it2 == line_range_list.end() || pc < it2->lowpc) + if (it2 == line_range_list.end() || pc < it2->lowpc) { return false; + } return true; /* // This is just the linear search to confirm binary search (lower_bound) is @@ -2350,10 +2373,13 @@ bool translate_callback_dwarf(CPUState *cpu, target_ulong pc) { int exec_callback_dwarf(CPUState *cpu, target_ulong pc) { inExecutableSource = false; - if (!correct_asid(cpu)) return 0; + if (!correct_asid(cpu)) { + return 0; + } auto it2 = std::lower_bound(line_range_list.begin(), line_range_list.end(), pc, CompareRangeAndPC()); - if (it2 == line_range_list.end() || pc < it2->lowpc) + if (it2 == line_range_list.end() || pc < it2->lowpc) { return 0; + } inExecutableSource = true; if (it2->lowpc == it2->highpc) { inExecutableSource = false; @@ -2364,15 +2390,17 @@ int exec_callback_dwarf(CPUState *cpu, target_ulong pc) { cur_line = it2->line_number; //printf("[%s] [0x%llx]-%s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(),cur_function, funct_name.c_str(),cur_line,pc); - if (funcaddrs.find(cur_function) == funcaddrs.end()) + if (funcaddrs.find(cur_function) == funcaddrs.end()) { return 0; - if (cur_function == 0) + } + if (cur_function == 0) { return 0; + } //printf("[%s] [0x%llx]-%s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(),cur_function, funct_name.c_str(),cur_line,pc); //__livevar_iter(env, pc, funcvars[cur_function], push_var_if_live); //__livevar_iter(env, pc, global_var_list, push_var_if_live); //__livevar_iter(env, pc, global_var_list, print_var_if_live); - if (cur_line != prev_line){ + if (cur_line != prev_line) { //printf("[%s] %s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(), funct_name.c_str(),cur_line,pc); pri_runcb_on_after_line_change (cpu, pc, prev_file_name.c_str(), prev_funct_name.c_str(), prev_line); pri_runcb_on_before_line_change(cpu, pc, file_name.c_str(), funct_name.c_str(), cur_line); @@ -2385,7 +2413,7 @@ int exec_callback_dwarf(CPUState *cpu, target_ulong pc) { prev_function = cur_function; prev_line = cur_line; } - //if (funcaddrs.find(pc) != funcaddrs.end()){ + //if (funcaddrs.find(pc) != funcaddrs.end()) { // on_call(env, pc); //} return 0; @@ -2412,24 +2440,27 @@ uint32_t guest_strncpy(CPUState *cpu, char *buf, size_t maxlen, target_ulong gue typedef void (* on_proc_change_t)(CPUState *env, target_ulong asid, OsiProc *proc); void handle_asid_change(CPUState *cpu, target_ulong asid, OsiProc *p) { -// printf ("handle_asid_change\n"); if (!p) { return; } if (!p->name) { return; } if (debug) { printf("p-name: %s proc-to-monitor: %s\n", p->name, proc_to_monitor); } -// printf ("...really\n"); - //if (strcmp(p->name, proc_to_monitor) != 0) { if (strncmp(p->name, proc_to_monitor, strlen(p->name)) == 0) { target_ulong current_asid = panda_current_asid(cpu); monitored_asid.insert(current_asid); printf ("monitoring asid " TARGET_FMT_lx "\n", current_asid); } if (correct_asid(cpu) && !main_exec_initialized){ + if (debug) { + printf ("correct_asid, executing main_exec_initialized\n"); + } main_exec_initialized = ensure_main_exec_initialized(cpu); } + if (!main_exec_initialized) { + printf("The ensure_main_exec_intialized function failed on handle_asid_change\n"); + // exit(1); + } //free_osiproc(p); - } // XXX: osi_foo is largetly commented out and basically does nothing // I am keeping it here as a reminder of maybe tracking of a data structure @@ -2502,9 +2533,8 @@ void osi_foo(CPUState *cpu, TranslationBlock *tb) { return; } - - #endif + bool init_plugin(void *self) { #if defined(TARGET_I386) panda_arg_list *args_gen = panda_get_args("general"); @@ -2517,6 +2547,7 @@ bool init_plugin(void *self) { // monitored_asid = 0; } panda_arg_list *args = panda_get_args("dwarf2"); + debug = panda_parse_bool_opt(args, "debug", "enable debug output"); guest_debug_path = panda_parse_string_req(args, "g_debugpath", "path to binary/build dir on guest machine"); host_debug_path = panda_parse_string_req(args, "h_debugpath", "path to binary/build dir on host machine"); host_mount_path = panda_parse_string_opt(args, "host_mount_path", "dbg", "path to mounted guest file system"); @@ -2540,7 +2571,7 @@ bool init_plugin(void *self) { panda_require("pri"); panda_require("asidstory"); - //panda_require("osi_linux"); + // panda_require("osi_linux"); // make available the api for assert(init_callstack_instr_api()); assert(init_osi_linux_api()); @@ -2577,7 +2608,7 @@ bool init_plugin(void *self) { // if debug path actually points to a file, then make host_debug_path the // directory that contains the executable bin_path = std::string(host_debug_path); - //host_debug_path = dirname(strdup(host_debug_path)); + // host_debug_path = dirname(strdup(host_debug_path)); host_debug_path = dirname(strdup(host_debug_path)); } else { printf("Don\'t know what host_debug_path: %s is, but it is not a file or directory\n", host_debug_path); diff --git a/panda/plugins/pri_taint/pri_taint.cpp b/panda/plugins/pri_taint/pri_taint.cpp index 981c235d8f7..ed5b18db449 100644 --- a/panda/plugins/pri_taint/pri_taint.cpp +++ b/panda/plugins/pri_taint/pri_taint.cpp @@ -40,13 +40,11 @@ void uninit_plugin(void *); int get_loglevel() ; void set_loglevel(int new_loglevel); } -bool linechange_taint = true; -bool hypercall_taint = true; -bool chaff_bugs = false; -Panda__SrcInfoPri *si = NULL; + const char *global_src_filename = NULL; uint64_t global_src_linenum; unsigned global_ast_loc_id; +uint64_t global_funcaddr; bool debug = false; #define dprintf(...) if (debug) { printf(__VA_ARGS__); fflush(stdout); } @@ -86,7 +84,7 @@ Addr make_greg(uint64_t r, uint16_t off) { return ra; } void print_membytes(CPUState *env, target_ulong a, target_ulong len) { - unsigned char c = (unsigned char)0; + unsigned char c = (unsigned char) 0; printf("{ "); for (int i = 0; i < len; i++) { if (-1 == panda_virtual_memory_read(env, a+i, (uint8_t *) &c, sizeof(char))) { @@ -102,15 +100,34 @@ void print_membytes(CPUState *env, target_ulong a, target_ulong len) { #define LAVA_TAINT_QUERY_MAX_LEN (target_ulong)64ULL #if defined(TARGET_I386) void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, const char *astnodename) { + if (debug) { + printf("[pri_taint] Attempt to lava_taint_query\n"); + } + // can't do a taint query if it is not a valid register (loc) or if // the buf_len is greater than the register size (assume size of guest pointer) if (loc_t == LocReg && (buf >= CPU_NB_REGS || buf_len >= sizeof(target_ulong) || - buf_len == (target_ulong)-1)) + buf_len == (target_ulong) -1)) { + if (debug) { + printf("[pri_taint] The register is not balid OR buf_len > register size\n"); + } return; - if (loc_t == LocErr || loc_t == LocConst) + } + if (loc_t == LocErr || loc_t == LocConst) { + if (debug) { + printf("[pri_taint] The Location is either error OR constant. Shouldn't happen based on pfun()\n"); + } return; - if (!pandalog || !taint2_enabled() || taint2_num_labels_applied() == 0) + } + if (!pandalog || !taint2_enabled() || taint2_num_labels_applied() == 0) { + if (debug) { + printf("[pri_taint] No Panda log, Taint2 not enabled, or No taint2 num labeled applied\n"); + } return; + } + if (debug) { + printf("[pri_taint] OK, Seems like I can Lava Taint! LFG!\n"); + } CPUState *cpu = first_cpu; CPUArchState *env = (CPUArchState *)cpu->env_ptr; @@ -118,10 +135,13 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con hwaddr phys = loc_t == LocMem ? panda_virt_to_phys(cpu, buf) : 0; ram_addr_t RamOffset = RAM_ADDR_INVALID; - if (phys == (hwaddr)-1 || PandaPhysicalAddressToRamOffset(&RamOffset, phys, false) != MEMTX_OK) return; + if (phys == (hwaddr) -1 + || PandaPhysicalAddressToRamOffset(&RamOffset, phys, false) != MEMTX_OK) { + return; + } if (debug) { - printf("Querying \"%s\": " TARGET_FMT_lu " bytes @ 0x" TARGET_FMT_lx " phys 0x" TARGET_FMT_plx ", strnlen=%d", astnodename, buf_len, buf, phys, is_strnlen); + // printf("Querying \"%s\": " TARGET_FMT_lu " bytes @ 0x" TARGET_FMT_lx " phys 0x" TARGET_FMT_plx ", strnlen=%d", astnodename, buf_len, buf, phys, is_strnlen); print_membytes(cpu, buf, is_strnlen? 32 : buf_len); printf("\n"); } @@ -152,11 +172,22 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con uint32_t num_tainted = 0; for (uint32_t i = 0; i < len; i++) { Addr a = loc_t == LocMem ? make_maddr(RamOffset + i) : make_greg(buf, i); /* HACK: presumes for the same physical page ram_addr_t(x + i) == ram_addr_t(x) + i */ - if (taint2_query(a)) num_tainted++; + if (taint2_query(a)) { + num_tainted++; + } } // If nothing's tainted and we aren't doing chaff bugs, return. - if (!chaff_bugs && num_tainted == 0) return; + if (num_tainted == 0) { + if (debug) { + printf("[pri_taint] Nothing is tainted!\n"); + } + return; + } + + if (debug) { + printf("[pri_taint] Starting to write the Panda Log now in pri_taint\n"); + } // 1. write the pandalog entry that tells us something was tainted on this extent Panda__TaintQueryPri tqh = PANDA__TAINT_QUERY_PRI__INIT; @@ -180,6 +211,7 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con // 2. iterate over the bytes in the extent and pandalog detailed info about taint std::vector tq; for (uint32_t offset = 0; offset < len; offset++) { + // uint32_t pa_indexed = phys + offset; Addr a = loc_t == LocMem ? make_maddr(RamOffset + offset) : make_greg(buf, offset); /* HACK: presumes for the same physical page ram_addr_t(x + i) == ram_addr_t(x) + i */ if (taint2_query(a)) { if (loc_t == LocMem) { @@ -198,7 +230,7 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con // 4. write out callstack info tqh.call_stack = pandalog_callstack_create(); - dprintf("num taint queries: %lu\n", tq.size()); + dprintf("[pri_taint] num taint queries: %lu\n", tq.size()); tqh.n_taint_query = tq.size(); tqh.taint_query = tq.data(); Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; @@ -207,7 +239,9 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con pandalog_callstack_free(tqh.call_stack); free(tqh.src_info); - for (Panda__TaintQuery *ptq : tq) pandalog_taint_query_free(ptq); + for (Panda__TaintQuery *ptq : tq) { + pandalog_taint_query_free(ptq); + } } #endif struct args { @@ -215,18 +249,25 @@ struct args { const char *src_filename; uint64_t src_linenum; unsigned ast_loc_id; + uint64_t funcaddr; }; #if defined(TARGET_I386) -void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc, void *in_args){ - if (!taint2_enabled()) +void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc, void *in_args) { + if (!taint2_enabled()) { + if (debug) { + printf("[pri_taint] Taint2 was not enabled (pfun called)\n"); + } return; + } // lava autogenerated variables start with this string const char *blacklist[] = {"kbcieiubweuhc", "phs", "phs_addr"} ; size_t i; for (i = 0; i < sizeof(blacklist)/sizeof(blacklist[0]); i++) { if (strncmp(var_nm, blacklist[i], strlen(blacklist[i])) == 0) { - //printf(" Found a lava generated string: %s", var_nm); + if (debug) { + printf("[pri_taint] Found a lava generated string: %s", var_nm); + } return; } } @@ -239,18 +280,20 @@ void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc global_src_filename = args->src_filename; global_src_linenum = args->src_linenum; global_ast_loc_id = args->ast_loc_id; + global_funcaddr = args->funcaddr; //target_ulong guest_dword; //std::string ty_string = std::string(var_ty); //size_t num_derefs = std::count(ty_string.begin(), ty_string.end(), '*'); //size_t i; - switch (loc_t){ + switch (loc_t) { case LocReg: - dprintf("VAR REG: %s %s in Reg " TARGET_FMT_lu "\n", var_ty, var_nm, loc); + dprintf("[pri_taint] VAR REG: %s %s in Reg " TARGET_FMT_lu "\n", var_ty, var_nm, loc); dwarf2_type_iter(pfun_cpu, loc, loc_t, (DwarfVarType *) var_ty_void, lava_taint_query, 3); break; case LocMem: - if (debug) - printf("VAR MEM: %s %s @ 0x" TARGET_FMT_lx "\n", var_ty, var_nm, loc); + if (debug) { + printf("[pri_taint] VAR MEM: %s %s @ 0x" TARGET_FMT_lx "\n", var_ty, var_nm, loc); + } dwarf2_type_iter(pfun_cpu, loc, loc_t, (DwarfVarType *) var_ty_void, lava_taint_query, 3); break; case LocConst: @@ -263,9 +306,9 @@ void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc default: assert(1==0); } - free(si); + // free(si); } - +/* void on_line_change(CPUState *cpu, target_ulong pc, const char *file_Name, const char *funct_name, unsigned long long lno){ if (taint2_enabled()){ struct args args = {cpu, file_Name, lno, 0}; @@ -280,21 +323,41 @@ void on_fn_start(CPUState *cpu, target_ulong pc, const char *file_Name, const ch pri_funct_livevar_iter(cpu, pc, (liveVarCB) pfun, (void *)&args); } +// Trace logging in the level of source code +void hypercall_log_trace(unsigned ast_loc_id) { + Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; + Panda__SourceTraceId stid = PANDA__SOURCE_TRACE_ID__INIT; + stid.ast_loc_id = ast_loc_id; + ple.source_trace_id = &stid; + pandalog_write_entry(&ple); +} +*/ #ifdef TARGET_I386 // Support all features of label and query program -bool i386_hypercall_callback(CPUState *cpu){ +bool i386_hypercall_callback(CPUState *cpu) { + if (debug) { + printf("[pri_taint] Calling i386 hypercall callback!\n"); + } bool ret = false; CPUArchState *env = (CPUArchState*)cpu->env_ptr; - if (taint2_enabled() && pandalog) { + if (taint2_enabled()) { // LAVA Hypercall target_ulong addr = panda_virt_to_phys(cpu, env->regs[R_EAX]); if ((int)addr == -1) { - printf ("panda hypercall with ptr to invalid PandaHypercallStruct: vaddr=0x%x paddr=0x%x\n", + printf ("[pri_taint] panda hypercall with ptr to invalid PandaHypercallStruct: vaddr=0x%x paddr=0x%x\n", (uint32_t) env->regs[R_EAX], (uint32_t) addr); } - else { + else if (pandalog) { + if (debug) { + printf("[pri_taint] Hypercall is OK and Panda Log is set\n"); + } PandaHypercallStruct phs; - panda_virtual_memory_rw(cpu, env->regs[R_EAX], (uint8_t *) &phs, sizeof(phs), false); + panda_virtual_memory_read(cpu, env->regs[R_EAX], (uint8_t *) &phs, sizeof(phs)); + + // To be used for chaff bugs? + uint64_t funcaddr = 0; + panda_virtual_memory_read(cpu, phs.info, (uint8_t*)&funcaddr, sizeof(target_ulong)); + if (phs.magic == 0xabcd) { // if the phs action is a pri_query point, see // lava/include/pirate_mark_lava.h @@ -303,29 +366,49 @@ bool i386_hypercall_callback(CPUState *cpu){ SrcInfo info; int rc = pri_get_pc_source_info(cpu, pc, &info); if (!rc) { - struct args args = {cpu, info.filename, info.line_number, phs.src_filename}; - dprintf("panda hypercall: [%s], " + struct args args = {cpu, info.filename, info.line_number, phs.src_filename, funcaddr}; + dprintf("[pri_taint] panda hypercall: [%s], " "ln: %4ld, pc @ 0x" TARGET_FMT_lx "\n", info.filename, info.line_number,pc); pri_funct_livevar_iter(cpu, pc, (liveVarCB) pfun, (void *)&args); - //pri_all_livevar_iter(cpu, pc, (liveVarCB) pfun, (void *)&args); //lava_attack_point(phs); } + else { + if (debug) { + printf("[pri_taint] pri_get_pc_src_info has failed: %d != 0.\n", rc); + } + } ret = true; + // hypercall_log_trace(phs.src_filename); + } + else { + if (debug) { + printf("[pri_taint] Invalid action value in PHS struct: %d != 13.\n", phs.action); + } } } else { - printf ("Invalid magic value in PHS struct: %x != 0xabcd.\n", phs.magic); + printf("[pri_taint] Invalid magic value in PHS struct: %x != 0xabcd.\n", phs.magic); + } + } + else { + if (debug) { + printf("[pri_taint] No Panda Log even though hypercall seemed OK!\n"); } } } + else { + if (debug) { + printf("[pri_taint] taint2 is not enabled (hypercall)\n"); + } + } return ret; } #endif // TARGET_I386 -bool guest_hypercall_callback(CPUState *cpu){ +bool guest_hypercall_callback(CPUState *cpu) { #ifdef TARGET_I386 return i386_hypercall_callback(cpu); #endif @@ -354,33 +437,33 @@ bool init_plugin(void *self) { #if defined(TARGET_I386) panda_arg_list *args = panda_get_args("pri_taint"); - hypercall_taint = panda_parse_bool_opt(args, "hypercall", "Register tainting on a panda hypercall callback"); - linechange_taint = panda_parse_bool_opt(args, "linechange", "Register tainting on every line change in the source code (default)"); - chaff_bugs = panda_parse_bool_opt(args, "chaff", "Record untainted extents for chaff bugs."); - // default linechange_taint to true if there is no hypercall taint - if (!hypercall_taint) - linechange_taint = true; + debug = panda_parse_bool_opt(args, "debug", "enable debug output"); + panda_require("callstack_instr"); assert(init_callstack_instr_api()); panda_require("pri"); assert(init_pri_api()); panda_require("dwarf2"); assert(init_dwarf2_api()); - panda_require("taint2"); assert(init_taint2_api()); - if (hypercall_taint) { - panda_cb pcb; - pcb.guest_hypercall = guest_hypercall_callback; - panda_register_callback(self, PANDA_CB_GUEST_HYPERCALL, pcb); - } - if (linechange_taint){ - PPP_REG_CB("pri", on_before_line_change, on_line_change); + panda_cb pcb; + pcb.guest_hypercall = guest_hypercall_callback; + panda_register_callback(self, PANDA_CB_GUEST_HYPERCALL, pcb); + printf("[pri_taint] This plugin is activated!\n"); + + // If taint isn't already enabled, turn it on. + if (!taint2_enabled()) { + printf("[pri_taint] enabling taint now!\n"); + taint2_enable_taint(); } + return true; +#else + printf("[pri_taint] This plugin is only supported on x86\n"); + return false; //taint2_track_taint_state(); #endif - return true; }