From 881a05c497bdfcab3aa7480e0dd6ae35fb3e8235 Mon Sep 17 00:00:00 2001 From: AndrewQuijano Date: Sat, 20 Jul 2024 21:35:11 +0000 Subject: [PATCH] [WIP][Scripts] Complete Python3, static linking, PyPanda API working --- dependencies/ubuntu_22.04_build.txt | 3 + init-host.py | 11 +-- install.sh | 6 +- scripts/add_queries.sh | 22 ++--- scripts/bug_mining.py | 143 +++++++++++++++------------- scripts/competition.py | 2 +- scripts/competition.sh | 1 - scripts/docker-shell.sh | 26 +++-- scripts/fninstr.py | 40 ++++---- scripts/funcs.sh | 3 +- scripts/inject.py | 2 +- scripts/inject.sh | 2 +- scripts/lava.py | 2 +- scripts/lava.sh | 27 +++--- scripts/process_compile_commands.py | 34 ++++--- scripts/reset_db.sh | 43 +++++++++ scripts/run-on-fuzzed-input.py | 2 +- scripts/setup_postgres.sh | 34 ++++--- scripts/shell.sh | 3 +- scripts/vars.sh | 20 ++-- setup_container.sh | 0 tools/btrace/sw-btrace-to-compiledb | 3 +- tools/lavaTool/get_c_files.py | 14 ++- 23 files changed, 253 insertions(+), 190 deletions(-) create mode 100644 scripts/reset_db.sh mode change 100644 => 100755 scripts/setup_postgres.sh mode change 100644 => 100755 setup_container.sh diff --git a/dependencies/ubuntu_22.04_build.txt b/dependencies/ubuntu_22.04_build.txt index 02ad8070..db4d73de 100644 --- a/dependencies/ubuntu_22.04_build.txt +++ b/dependencies/ubuntu_22.04_build.txt @@ -41,3 +41,6 @@ libzmq3-dev # I need this for making LavaTool g++-10 + +# Install dwarf dump, you need this for 64-bit bugs +dwarfdump \ No newline at end of file diff --git a/init-host.py b/init-host.py index e3b30c24..d37c8be8 100755 --- a/init-host.py +++ b/init-host.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # import argparse import os @@ -18,9 +18,6 @@ from colorama import Fore from colorama import Style -# if moyix server is down, this image will also work -QCOW_URL = "https://panda.re/qcows/linux/debian/7.3/x86/debian_7.3_x86.qcow" -QCOW_FILE_NAME = "wheezy_panda2.qcow2" TAR_URL = "ftp://ftp.astron.com/pub/file/file-5.22.tar.gz" LAVA_DIR = dirname(abspath(sys.argv[0])) @@ -78,12 +75,6 @@ def main(): else: progress("Found existing target_bins/{}".format(basename(TAR_URL))) - if not isfile(join(LAVA_DIR, basename(QCOW_FILE_NAME))): - progress("Downloading {}".format(basename(QCOW_URL))) - run(["wget", "--no-check-certificate", QCOW_URL, "-O", QCOW_FILE_NAME]) - else: - progress("Found existing {}".format(basename(QCOW_FILE_NAME))) - if not isfile(join(LAVA_DIR, "host.json")): progress("Building host.json") # Build host.json diff --git a/install.sh b/install.sh index ce99bb82..fafa1c81 100755 --- a/install.sh +++ b/install.sh @@ -40,7 +40,9 @@ else fi curl -LJO https://github.com/panda-re/panda/releases/download/v1.8.23/pandare_22.04.deb -$SUDO apt install ./pandare_22.04.deb +mv *.deb /tmp +$SUDO apt-get -y install /tmp/pandare_22.04.deb +rm /tmp/*.deb progress "Installed build dependencies" @@ -48,6 +50,6 @@ pip3 install --upgrade pip pip3 install -r requirements.txt progress "Installed Python requirements" -$SUDO bash ./setup_container.sh +bash ./setup_container.sh progress "Installed LAVA" diff --git a/scripts/add_queries.sh b/scripts/add_queries.sh index cfebcc44..f062ef67 100755 --- a/scripts/add_queries.sh +++ b/scripts/add_queries.sh @@ -38,7 +38,7 @@ USAGE() { } set -e # Exit on error -#set -x # Debug mode +set -x # Debug mode if [ $# -lt 1 ]; then USAGE $0 @@ -86,8 +86,8 @@ progress "queries" 0 "Configuring..." mkdir -p lava-install configure_file=${configure_cmd%% *} if [ -e "$configure_file" ]; then - CC=/usr/lib/llvm-11/bin/clang \ - CXX=/usr/lib/llvm-11/bin/clang++ \ + CC=$llvm/bin/clang \ + CXX=$llvm/bin/clang++ \ CFLAGS="-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/" \ $configure_cmd --prefix=$(pwd)/lava-install fi @@ -103,8 +103,8 @@ for i in ${MAKES[@]}; do IFS=' ' read -ra ARGS <<< $i echo "$lava/tools/btrace/sw-btrace ${ARGS[@]}" - CC=/usr/lib/llvm-11/bin/clang \ - CXX=/usr/lib/llvm-11/bin/clang++ \ + CC=$llvm/bin/clang \ + CXX=$llvm/bin/clang++ \ CFLAGS="-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/" \ $lava/tools/btrace/sw-btrace ${ARGS[@]} IFS='&&' @@ -119,7 +119,7 @@ bash -c $install progress "queries" 0 "Creating compile_commands.json..." # Delete any pre-existing compile commands.json (could be in archive by mistake) rm -f compile_commands.json -$lava/tools/btrace/sw-btrace-to-compiledb /usr/lib/llvm-11/lib/clang/11/include +$lava/tools/btrace/sw-btrace-to-compiledb $llvm/lib/clang/11/include if [ -e "$directory/$name/extra_compile_commands.json" ]; then sed -i '$d' compile_commands.json echo "," >> compile_commands.json @@ -130,7 +130,7 @@ git commit -m 'Add compile_commands.json.' cd .. -c_files=$(python $lava/tools/lavaTool/get_c_files.py $source) +c_files=$($python $lava/tools/lavaTool/get_c_files.py $source) c_dirs=$(for i in $c_files; do dirname $i; done | sort | uniq) progress "queries" 0 "Copying include files..." @@ -151,7 +151,7 @@ done #progress "queries" 0 "Initialize variables..." #for i in $c_files; do -# $lava/src_clang/build/lavaTool -action=init \ +# /src_clang/build/lavaTool -action=init \ # -p="$source/compile_commands.json" \ # -src-prefix=$(readlink -f "$source") \ # $i @@ -167,7 +167,7 @@ fninstr=$directory/$name/fninstr echo "Creating fninstr [$fninstr]" echo -e "\twith command: \"python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles\"" -python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles +$python $lava/scripts/fninstr.py -d -o $fninstr $fnfiles if [[ ! -z "$df_fn_blacklist" ]]; then cmd=$(echo "sed -i /${df_fn_blacklist}/d $fninstr") @@ -209,13 +209,13 @@ fi # Do we need to explicitly apply replacements in the root source directory # This causes clang-apply-replacements to segfault when run a 2nd time #pushd "$directory/$name/$source" -#$llvm_src/Release/bin/clang-apply-replacements . +#/usr/lib/llvm-11/bin/clang-apply-replacements . #popd for i in $c_dirs; do echo "Applying replacements to $i" pushd $i - $llvm_src/Release/bin/clang-apply-replacements . + $llvm/bin/clang-apply-replacements . popd done diff --git a/scripts/bug_mining.py b/scripts/bug_mining.py index 834e2501..89f63d6a 100644 --- a/scripts/bug_mining.py +++ b/scripts/bug_mining.py @@ -34,6 +34,8 @@ from vars import parse_vars from os.path import abspath, join from pandare import Panda +from pandare.extras import dwarfdump + host_json = abspath(sys.argv[1]) project_name = sys.argv[2] @@ -53,24 +55,25 @@ curtail = 0 installdir = None -isoname = None command_args = None # Replace create_recording in first link # https://github.com/panda-re/panda/blob/dev/panda/scripts/run_guest.py#L151-L189 -# https://docs.panda.re/#recordings # https://github.com/panda-re/panda/blob/dev/panda/python/core/pandare/panda.py#L2595-L2645 @panda.queue_blocking def create_recording(): - # I assume qemu_path is just 'panda-system-i386', `panda-system-x86_64`, etc global command_args global installdir - global isoname print("args", command_args) print("install dir", installdir) - print("isoname", isoname) guest_command = subprocess.list2cmdline(command_args) - panda.record_cmd(guest_command=guest_command, copy_directory=installdir, iso_name=isoname) + # Technically the first two steps of record_cmd + # but running executable ONLY works with absolute paths + panda.revert_sync('root') + panda.copy_to_guest(installdir, absolute_paths=True) + + # Pass in None for snap_name since I already did the revert_sync already + panda.record_cmd(guest_command=guest_command, snap_name=None) panda.stop_run() @@ -106,8 +109,6 @@ def progress(msg): tick() - - input_file = abspath(project["config_dir"] + "/" + sys.argv[3]) input_file_base = os.path.basename(input_file) print("bug_mining.py %s %s" % (project_name, input_file)) @@ -116,11 +117,6 @@ def progress(msg): # global curtail curtail = int(sys.argv[4]) -chaff = project.get('chaff', False) - -panda_os_string = project.get('panda_os_string', - 'linux-32-debian:3.2.0-4-686-pae') - lavadir = dirname(dirname(abspath(sys.argv[0]))) progress("Entering {}".format(project['output_dir'])) @@ -135,8 +131,8 @@ def progress(msg): # e.g. file-5.22-true.iso installdir = join(sourcedir, 'lava-install') input_file_guest = join(installdir, input_file_base) -isoname = '{}-{}.iso'.format(sourcedir, input_file_base) -command_args = shlex.split(project['command'].format( +command_args = shlex.split( + project['command'].format( install_dir=pipes.quote(installdir), input_file=input_file_guest)) shutil.copy(input_file, installdir) @@ -159,67 +155,75 @@ def progress(msg): progress("Starting first and only replay, tainting on file open...") # process name - if command_args[0].startswith('LD_PRELOAD'): + cmdpath = command_args[1] proc_name = basename(command_args[1]) else: + cmdpath = command_args[0] proc_name = basename(command_args[0]) -pandalog = "{}/queries-{}.plog".format(project['output_dir'], os.path.basename(isoname)) -pandalog_json = "{}/queries-{}.json".format(project['output_dir'], os.path.basename(isoname)) +binpath = os.path.join(installdir, "bin", proc_name) +if not os.path.exists(binpath): + binpath = os.path.join(installdir, "lib", proc_name) + if not os.path.exists(binpath): + binpath = os.path.join(installdir, proc_name) + +pandalog = "{}/queries-{}.plog".format(project['output_dir'], input_file_base) +pandalog_json = "{}/queries-{}.json".format(project['output_dir'], input_file_base) print("pandalog = [%s] " % pandalog) -panda_args = { - 'pri': {}, - 'pri_dwarf': { - 'proc': proc_name, - 'g_debugpath': installdir, - 'h_debugpath': installdir - }, - 'pri_taint': { - 'hypercall': True, - 'chaff': chaff - }, - 'taint2': {'no_tp': True}, - 'tainted_branch': {}, - 'file_taint': { - 'pos': True, - 'cache_process_details_on_basic_block': True, - } -} +dwarf_cmd = ["dwarfdump", "-dil", cmdpath] +dwarfout = subprocess.check_output(dwarf_cmd) +dwarfdump.parse_dwarfdump(dwarfout, binpath) + +# Based on this example: +# https://github.com/panda-re/panda/blob/dev/panda/python/examples/file_taint/file_taint.py +panda.set_pandalog(pandalog) + +panda.load_plugin("pri") +panda.load_plugin("dwarf2", + args={ + 'proc': proc_name, + 'g_debugpath': installdir, + 'h_debugpath': installdir + }) +# pri_taint is almost same as Zhenghao's hypercall +# Chaffx64 branch says these are needed? +# if panda.arch != 'i386': +# panda.load_plugin('hypercall') +# panda.load_plugin('stackprob') + +panda.load_plugin("taint2", + args={ + 'no_tp': True + }) +panda.load_plugin("tainted_branch") if 'use_stdin' in project and project['use_stdin']: - panda_args['file_taint']['first_instr'] = 1 - panda_args['file_taint']['use_stdin'] = proc_name + panda.load_plugin("file_taint", + args={ + 'filename' : input_file_guest, + 'pos': True, + 'cache_process_details_on_basic_block': True, + 'first_instr' : 1, + 'use_stdin' : proc_name, + 'verbose' : True + }) else: - panda_args['file_taint']['enable_taint_on_open'] = True - -qemu_args = [ - project['qemu'], '-replay', isoname, - '-pandalog', pandalog, '-os', panda_os_string -] - -for plugin, plugin_args in panda_args.items(): - qemu_args.append('-panda') - arg_string = ",".join(["{}={}".format(arg, val) - for arg, val in plugin_args.items()]) - qemu_args.append('{}{}{}'.format(plugin, ':' - if arg_string else '', arg_string)) - -# Use -panda-plugin-arg to account for commas and colons in filename. -qemu_args.extend(['-panda-arg', 'file_taint:filename=' + input_file_guest]) - -dprint("qemu args: [{}]".format(subprocess.list2cmdline(qemu_args))) -sys.stdout.flush() -try: - subprocess.check_call(qemu_args, stderr=subprocess.STDOUT) -except subprocess.CalledProcessError: - if qemu_use_rr: - qemu_args = ['rr', 'record', project['qemu'], '-replay', isoname] - subprocess.check_call(qemu_args) - else: - raise + panda.load_plugin("file_taint", + args={ + 'filename' : input_file_guest, + 'pos': True, + 'cache_process_details_on_basic_block': True, + 'enable_taint_on_open': True, + 'verbose' : True + }) +panda.load_plugin("pri_taint") + +# Default name is 'recording' +# https://github.com/panda-re/panda/blob/dev/panda/python/core/pandare/panda.py#L2595 +panda.run_replay("recording") replay_time = tock() print("taint analysis complete %.2f seconds" % replay_time) @@ -227,9 +231,14 @@ def progress(msg): tick() +# I attempted to upgrade the version, but panda had trouble including something +# for now, we can use the python implementation, although it is slower +# https://github.com/protocolbuffers/protobuf/releases/tag/v21.0 +# https://stackoverflow.com/questions/52040428/how-to-update-protobuf-runtime-library +os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python' progress("Calling the FBI on queries.plog...") convert_json_args = ['python3', '-m', 'pandare.plog_reader', pandalog] -print("panda log JSON invocation: [%s]" % (subprocess.list2cmdline(convert_json_args))) +print("panda log JSON invocation: [%s] > %s" % (subprocess.list2cmdline(convert_json_args), pandalog_json)) try: with open(pandalog_json, 'wb') as fd: subprocess.check_call(convert_json_args, stdout=fd, stderr=sys.stderr) @@ -251,6 +260,8 @@ def progress(msg): dprint("fbi invocation: [%s]" % (subprocess.list2cmdline(fbi_args))) sys.stdout.flush() +import sys +sys.exit(0) try: subprocess.check_call(fbi_args, stdout=sys.stdout, stderr=sys.stderr) except subprocess.CalledProcessError as e: diff --git a/scripts/competition.py b/scripts/competition.py index 7f5fa767..231736c9 100755 --- a/scripts/competition.py +++ b/scripts/competition.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import argparse import datetime diff --git a/scripts/competition.sh b/scripts/competition.sh index 0a8756bf..f5f4df7b 100755 --- a/scripts/competition.sh +++ b/scripts/competition.sh @@ -4,7 +4,6 @@ # Json file required params # # lava: directory of lava repository -# pandahost: what remote host to run panda on trap '' PIPE set -e # Exit on error diff --git a/scripts/docker-shell.sh b/scripts/docker-shell.sh index ffef8cab..91ba3c12 100755 --- a/scripts/docker-shell.sh +++ b/scripts/docker-shell.sh @@ -1,17 +1,22 @@ #!/bin/bash -# Single argument of project name will get container name +# Single argument of project name will get Docker name # from project config. Then 2nd optional argument is command to run # With no arguments, just give us a shell lava="$(dirname $(dirname $(readlink -f $0)))" -if [ "$#" -eq 0 ]; then - container="lava32" -else +# This project_name is a dummy value, we just want shell access +project_name="toy" +. `dirname $0`/vars.sh + +echo "You are connecting to the Docker container: ${dockername}" + +if [ "$#" -ne 0 ]; then project_name=$1 + echo "using project ${project_name}" cmd="${@:2}" -#Container name (lava32 or lava32debug) comes from config + # Docker name (lava32 or lava32debug) comes from config . `dirname $0`/vars.sh docker_map_args="-v $tarfiledir:$tarfiledir" @@ -19,13 +24,14 @@ else docker_map_args="$docker_map_args -v $directory:$directory" fi - if ! ( docker images ${container} | grep -q ${container} ); then - docker build -t ${container} "$(dirname $(dirname $(readlink -f $0)))/docker/debug" + if ! ( docker images ${dockername} | grep -q ${dockername} ); then + docker build -t ${dockername} "$(dirname $(dirname $(readlink -f $0)))/docker/debug" fi - - [ "$extradockerargs" = "null" ] && extradockerargs=""; +else + echo "No extra args" fi +[ "$extradockerargs" = "null" ] && extradockerargs=""; whoami="$(whoami)" path="" cmd="sudo -u $whoami bash -c -- \"$cmd\"" @@ -57,4 +63,4 @@ docker run --rm -it \ --cap-add=SYS_PTRACE \ $docker_map_args \ $extradockerargs \ - ${container} sh -c "trap '' PIPE; $cmd" + ${dockername} sh -c "trap '' PIPE; $cmd" diff --git a/scripts/fninstr.py b/scripts/fninstr.py index 3b37ff7a..926acc7c 100644 --- a/scripts/fninstr.py +++ b/scripts/fninstr.py @@ -19,6 +19,8 @@ # TODO use vars.py to figure this out instead of arguments parser.add_argument('-d', '--dataflow', action="store_true", default=False, help="lava is using dataflow") +parser.add_argument('-r', '--read', action="store_true", default=True, + help="Read the LAVA output YAML file") parser.add_argument('-i', '--input', action="store", default=None, help="name of input yaml file from LavaFnTool") parser.add_argument('-o', '--output', action="store", default=None, @@ -104,10 +106,11 @@ def merge(v, vors): return vors + v -if True: +if args.read: for filename in rest: print("FILE [%s] " % filename) - y = yaml.load(open(filename)) + with open(filename, 'r') as file: + y = yaml.safe_load(file) assert (y is not None), "Missing output file from fninstr" for x in y: # print x @@ -126,19 +129,19 @@ def merge(v, vors): fpa = FnPtrAssign(x['fnPtrAssign']) addtohl(fpas, fpa.name, fpa) - f = open("getfns.pickle", "wb") - pickle.dump(fundefs, f) - pickle.dump(prots, f) - pickle.dump(calls, f) - pickle.dump(fpas, f) - f.close() + with open("getfns.pickle", "wb") as f: + pickle.dump(fundefs, f) + pickle.dump(prots, f) + pickle.dump(calls, f) + pickle.dump(fpas, f) + else: - f = open("getfns.pickle", "rb") - fundefs = pickle.load(f) - prots = pickle.load(f) - calls = pickle.load(f) - fpas = pickle.load(f) - f.close() + with open("getfns.pickle", "rb") as f: + fundefs = pickle.load(f) + prots = pickle.load(f) + calls = pickle.load(f) + fpas = pickle.load(f) + """ @@ -303,8 +306,7 @@ def merge(v, vors): if instr_judgement[name] == OKI: print("Intrumenting fun [%s]" % name) -f = open(args.output, "w") -for name in instr_judgement.keys(): - if instr_judgement[name] == OKI: - f.write("NOFILENAME %s\n" % name) -f.close() +with open(args.output, "w") as f: + for name in instr_judgement.keys(): + if instr_judgement[name] == OKI: + f.write("NOFILENAME %s\n" % name) diff --git a/scripts/funcs.sh b/scripts/funcs.sh index 9ed591bd..0dc546eb 100755 --- a/scripts/funcs.sh +++ b/scripts/funcs.sh @@ -69,6 +69,7 @@ if [ -z "$LAVA_FUNCS_INCLUDED" ]; then bash -c "$command" >> "$logfile" 2>&1 elif [ "$remote_machine" == "docker" ]; then echo docker run $dockername sh -c "$command" + DOCKER_IP=$(ifconfig docker0 | grep 'inet ' | awk '{print $2}') docker run --rm -it \ -e "HTTP_PROXY=$HTTP_PROXY" \ -e "HTTPS_PROXY=$HTTPS_PROXY" \ @@ -81,7 +82,7 @@ if [ -z "$LAVA_FUNCS_INCLUDED" ]; then -v /etc/shadow:/etc/shadow:ro \ -v /etc/gshadow:/etc/gshadow:ro \ -v /home:/home:ro \ - --add-host=database:172.17.0.1 \ + --add-host=database:$DOCKER_IP \ $docker_map_args \ $extradockerargs \ $dockername sh -c "trap '' PIPE; su -l $(whoami) -c \"$command\"" \ diff --git a/scripts/inject.py b/scripts/inject.py index 911e23a7..6e31a156 100755 --- a/scripts/inject.py +++ b/scripts/inject.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import argparse import atexit diff --git a/scripts/inject.sh b/scripts/inject.sh index ae50e1e9..365fef03 100755 --- a/scripts/inject.sh +++ b/scripts/inject.sh @@ -4,7 +4,7 @@ # Json file required params # # lava: directory of lava repository -# pandahost: what remote host to run panda on + trap '' PIPE set -e # Exit on error diff --git a/scripts/lava.py b/scripts/lava.py index cd3b3cf8..970f07c7 100644 --- a/scripts/lava.py +++ b/scripts/lava.py @@ -576,7 +576,7 @@ def __init__(self, project): tar_files = subprocess.check_output(['tar', 'tf', project['tarfile']], stderr=sys.stderr) - self.source_root = tar_files.splitlines()[0].split(os.path.sep)[0] + self.source_root = tar_files.decode().splitlines()[0].split(os.path.sep)[0] self.queries_build = join(self.top_dir, self.source_root) self.bugs_top_dir = join(self.top_dir, 'bugs') diff --git a/scripts/lava.sh b/scripts/lava.sh index d8508db5..cd684ec9 100755 --- a/scripts/lava.sh +++ b/scripts/lava.sh @@ -27,7 +27,6 @@ # name: a name for this project (used to create directories) # inputs: a list of inputs that will be used to find potential bugs (think coverage) # buildhost: what remote host to build source on -# pandahost: what remote host to run panda and postgres on # testinghost: what host to test injected bugs on # fixupscript: script to run after add_query to fix up src before make # @@ -105,7 +104,7 @@ fi if [[ $demo -eq 1 ]] then - gnome-terminal --geometry=90x40 -x bash -c "python $(dirname $0)/demo.py $json; read" & + gnome-terminal --geometry=90x40 -x bash -c "$python $(dirname $0)/demo.py $json; read" & fi progress "everything" 1 "JSON file is $json" @@ -132,9 +131,9 @@ RESET_DB() { lf="$logs/dbwipe.log" truncate "$lf" progress "everything" 1 "Resetting lava db -- logging to $lf" - run_remote "$buildhost" "dropdb -U postgres -h database $db || true" "$lf" - run_remote "$buildhost" "createdb -U postgres -h database $db || true" "$lf" - run_remote "$buildhost" "psql -d $db -h database -f $lava/tools/lavaODB/generated/lava.sql -U postgres" "$lf" + run_remote "$buildhost" "dropdb -U $pguser -h $dbhost $db || true" "$lf" + run_remote "$buildhost" "createdb -U $pguser -h $dbhost $db || true" "$lf" + run_remote "$buildhost" "psql -d $db -h $dbhost -f $lava/tools/lavaODB/generated/lava.sql -U $pguser" "$lf" run_remote "$buildhost" "echo dbwipe complete" "$lf" } @@ -146,6 +145,7 @@ if [ $reset -eq 1 ]; then deldir "$directory/$name/"'*rr-*' # remove all plog files in the directory deldir "$directory/$name/*.plog" + deldir "$directory/$name/*.json" progress "everything" 0 "Truncating logs..." for i in $(ls "$logs" | grep '.log$'); do truncate "$logs/$i" @@ -155,15 +155,12 @@ if [ $reset -eq 1 ]; then echo "reset complete $time_diff seconds" fi - - - if [ $add_queries -eq 1 ]; then tick progress "everything" 1 "Add queries step -- btrace lavatool and fixups" lf="$logs/add_queries.log" truncate "$lf" - progress "everything" 1 "Adding queries to source -- logging to $lf" + progress "everything" 1 "Adding queries to source with type $ATP and $project_name -- logging to $lf" run_remote "$buildhost" "$scripts/add_queries.sh $ATP_TYPE $project_name" "$lf" if [ "$fixupscript" != "null" ]; then lf="$logs/fixups.log" @@ -183,7 +180,8 @@ if [ $make -eq 1 ]; then progress "everything" 1 "Make step -- making 32-bit version with queries" lf="$logs/make.log" truncate "$lf" - run_remote "$buildhost" "cd $sourcedir && CC=/usr/lib/llvm-11/bin/clang CXX=/usr/lib/llvm-11/bin/clang++ CFLAGS='-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/' $makecmd" "$lf" + # Note, adding the static flag is important. We are running the binaries on a PANDA VM, so we have no idea if it will have any libraries we need. + run_remote "$buildhost" "cd $sourcedir && CC=$llvm/bin/clang CXX=$llvm/bin/clang++ CFLAGS='-O0 -m32 -DHAVE_CONFIG_H -g -gdwarf-2 -fno-stack-protector -D_FORTIFY_SOURCE=0 -I. -I.. -I../include -I./src/ -static' $makecmd" "$lf" run_remote "$buildhost" "cd $sourcedir && rm -rf lava-install" "$lf" if [ "$install_simple" == "null" ]; then @@ -211,7 +209,7 @@ if [ $taint -eq 1 ]; then # If we didn't just reset the DB, we need clear out any existing taint labels before running FBI progress "everything" 1 "Clearing taint data from DB" lf="$logs/dbwipe_taint.log" - run_remote "$buildhost" "psql -U postgres -h database -c \"delete from dua_viable_bytes; delete from labelset;\" $db" "$lf" + run_remote "$buildhost" "psql -U $pguser -h $dbhost -c \"delete from dua_viable_bytes; delete from labelset;\" $db" "$lf" fi progress "everything" 1 "Taint step -- running panda and fbi" for input in $inputs @@ -220,16 +218,17 @@ if [ $taint -eq 1 ]; then lf="$logs/bug_mining-$i.log" truncate "$lf" progress "everything" 1 "PANDA taint analysis prospective bug mining -- input $input -- logging to $lf" - run_remote "$buildhost" "$python $scripts/bug_mining.py $hostjson $project_name $input $curtail" "$lf" + run_remote "$buildhost" "$python $scripts/bug_mining.py $hostjson $project_name $input $curtail" "$lf" + exit 0 echo -n "Num Bugs in db: " - bug_count=$(run_remote "$buildhost" "psql -At $db -U postgres -h database -c 'select count(*) from bug'") + bug_count=$(run_remote "$buildhost" "psql -At $db -U $pguser -h $dbhost -c 'select count(*) from bug'") if [ "$bug_count" = "0" ]; then echo "FATAL ERROR: no bugs found" exit 1 fi echo "Found $bug_count bugs" echo - run_remote "$buildhost" "psql $db -U postgres -h database -c 'select count(*), type from bug group by type order by type'" + run_remote "$buildhost" "psql $db -U $pguser -h $dbhost -c 'select count(*), type from bug group by type order by type'" done tock echo "bug_mining complete $time_diff seconds" diff --git a/scripts/process_compile_commands.py b/scripts/process_compile_commands.py index 3d3cc4c6..7b62640e 100644 --- a/scripts/process_compile_commands.py +++ b/scripts/process_compile_commands.py @@ -7,38 +7,36 @@ def process_compile_commands(cc_filename, extra_cc_filename): print('Processing compile_commands') - cc_file = open(cc_filename, 'r') - extra_cc_file = None - if os.path.isfile(extra_cc_filename): - extra_cc_file = open(extra_cc_filename, 'r') - compile_commands = json.load(cc_file) + with open(cc_filename, 'r') as cc_file: + compile_commands = json.load(cc_file) + file_set = set() new_compile_commands = [] for f in compile_commands: if join(f['directory'], f['file']) not in file_set: file_set.add(join(f['directory'], f['file'])) new_compile_commands.append(f) - if extra_cc_file: - extra_compile_commands = json.load(extra_cc_file) + + if os.path.isfile(extra_cc_filename): + with open(extra_cc_filename, 'r') as extra_cc_file: + extra_compile_commands = json.load(extra_cc_file) + for f in extra_compile_commands: new_compile_commands.append(f) - extra_cc_file.close() - cc_file.close() - cc_file = open(cc_filename, 'w') - json.dump(new_compile_commands, cc_file) - cc_file.close() + + with open(cc_filename, 'w') as cc_file: + json.dump(new_compile_commands, cc_file) def get_c_files(bugs_build, cc_filename): - cc_file = open(cc_filename, 'r') - compile_commands = json.load(cc_file) + with open(cc_filename, 'r') as cc_file: + compile_commands = json.load(cc_file) + c_files = set() for f in compile_commands: if not (bugs_build == f['directory']): - c_files.add(os.path.join( - os.path.basename(f['directory']), - f['file'])) + c_files.add(os.path.join(os.path.basename(f['directory']), f['file'])) else: c_files.add(f['file']) - cc_file.close() + return c_files diff --git a/scripts/reset_db.sh b/scripts/reset_db.sh new file mode 100644 index 00000000..1048dda1 --- /dev/null +++ b/scripts/reset_db.sh @@ -0,0 +1,43 @@ + +# Load lava-functions +. `dirname $0`/funcs.sh +lava=$(dirname $(dirname $(readlink -f "$0"))) + +# defaults +ok=0 +reset=0 +reset_db=0 +add_queries=0 +make=0 +taint=0 +inject=0 +num_trials=0 +kt="" +demo=0 +curtail=0 +ATP_TYPE="" +# default bugtypes +bugtypes="ptr_add,rel_write,malloc_off_by_one" +# default # of bugs to be injected at a time +many=50 + +# This is just a dummy values +project_name="toy" + +. `dirname $0`/vars.sh + +sourcedir="$directory/$name/$source" +bugsdir="$directory/$name/bugs" +logs="$directory/$name/logs" + +RESET_DB() { + lf="$logs/dbwipe.log" + truncate "$lf" + progress "everything" 1 "Resetting lava db -- logging to $lf" + run_remote "$buildhost" "dropdb -U $pguser -h $dbhost $db || true" "$lf" + run_remote "$buildhost" "createdb -U $pguser -h $dbhost $db || true" "$lf" + run_remote "$buildhost" "psql -d $db -h $dbhost -f $lava/tools/lavaODB/generated/lava.sql -U $pguser" "$lf" + run_remote "$buildhost" "echo dbwipe complete" "$lf" +} + +RESET_DB diff --git a/scripts/run-on-fuzzed-input.py b/scripts/run-on-fuzzed-input.py index 76bc4213..f14805fd 100755 --- a/scripts/run-on-fuzzed-input.py +++ b/scripts/run-on-fuzzed-input.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import argparse import json diff --git a/scripts/setup_postgres.sh b/scripts/setup_postgres.sh old mode 100644 new mode 100755 index a611cf1c..e5be0396 --- a/scripts/setup_postgres.sh +++ b/scripts/setup_postgres.sh @@ -1,42 +1,46 @@ #!/bin/bash set -ex +# shellcheck disable=SC2034 +sudo="" +if [ $EUID -ne 0 ]; then + SUDO=sudo +fi + PGPASS="${HOME}/.pgpass" +PG_VERSION=$(psql --version | awk '{print $3}' | cut -d '.' -f 1) if [ ! -f "${PGPASS}" ]; then - postgres_depends=$(dpkg-query -W -f='${depends}' 'postgresql') - postgres_pkg=$(echo "${postgres_depends}" | grep -oP 'postgresql-[0-9]+.?[0-9]+') - postgres_version=${postgres_pkg/postgresql-/} - pg_hba="/etc/postgresql/${postgres_version}/main/pg_hba.conf" + pg_hba="/etc/postgresql/${PG_VERSION}/main/pg_hba.conf" postgres_password='postgrespostgres' - sudo sed -i.bak -E 's/^(local\s+all\s+postgres\s+)md5$/\1peer/' "${pg_hba}" - sudo service postgresql reload + $SUDO sed -i.bak -E 's/^(local\s+all\s+postgres\s+)md5$/\1peer/' "${pg_hba}" + $SUDO service postgresql reload password_sql="ALTER USER postgres WITH PASSWORD '${postgres_password}';" - sudo -u postgres psql -c "${password_sql}" + $SUDO -u postgres psql -c "${password_sql}" echo "*:*:*:postgres:${postgres_password}" > "${PGPASS}" chmod 600 "${PGPASS}" - sudo sed -i.bak -E 's/^(local\s+all\s+postgres\s+)peer$/\1md5/' "${pg_hba}" - sudo service postgresql reload + $SUDO sed -i.bak -E 's/^(local\s+all\s+postgres\s+)peer$/\1md5/' "${pg_hba}" + $SUDO service postgresql reload fi # Define the PostgreSQL version -PG_VERSION=$(psql --version | awk '{print $3}' | cut -d '.' -f 1) + # Define the configuration file paths PG_CONF="/etc/postgresql/${PG_VERSION}/main/postgresql.conf" PG_HBA="/etc/postgresql/${PG_VERSION}/main/pg_hba.conf" # Update listen_addresses and password_encryption in postgresql.conf -sed -i "s/#listen_addresses = 'localhost'/listen_addresses = '0.0.0.0, localhost'/g" $PG_CONF -sed -i "s/#password_encryption = scram-sha-256/password_encryption = md5/g" $PG_CONF +$SUDO sed -i "s/#listen_addresses = 'localhost'/listen_addresses = '0.0.0.0, localhost'/g" $PG_CONF +$SUDO sed -i "s/#password_encryption = scram-sha-256/password_encryption = md5/g" $PG_CONF # Update pg_hba.conf -echo "host all all 0.0.0.0/0 md5" >> $PG_HBA -sed -i 's/scram-sha-256/md5/g' $PG_HBA +$SUDO echo "host all all 0.0.0.0/0 md5" >> $PG_HBA +$SUDO sed -i 's/scram-sha-256/md5/g' $PG_HBA # Restart PostgreSQL service -service postgresql restart \ No newline at end of file +$SUDO service postgresql restart diff --git a/scripts/shell.sh b/scripts/shell.sh index ae92ef29..e4ad731b 100755 --- a/scripts/shell.sh +++ b/scripts/shell.sh @@ -21,6 +21,7 @@ fi #docker_map_args="$docker_map_args -v $pb_head_dir:$pb_head_dir -v $google_head_dir:$google_head_dir" command=bash +DOCKER_IP=$(ifconfig docker0 | grep 'inet ' | awk '{print $2}') docker run --rm -it \ -e "HTTP_PROXY=$HTTP_PROXY" \ @@ -34,7 +35,7 @@ docker run --rm -it \ -v /etc/shadow:/etc/shadow:ro \ -v /etc/gshadow:/etc/gshadow:ro \ -v /home:/home:ro \ - --add-host=database:172.17.0.1 \ + --add-host=database:$DOCKER_IP \ $docker_map_args \ $1 sh -c "trap '' PIPE; su -l $(whoami) -c \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/llvm-11/lib; $command\"" \ diff --git a/scripts/vars.sh b/scripts/vars.sh index e96420c2..5182f579 100644 --- a/scripts/vars.sh +++ b/scripts/vars.sh @@ -25,6 +25,16 @@ output_dir="$(jq -r '.output_dir // ""' $hostjson)" config_dir="$(jq -r '.config_dir // ""' $hostjson)/$project_name" tar_dir="$(jq -r '.tar_dir // ""' $hostjson)" db_suffix="$(jq -r '.db_suffix // ""' $hostjson)" +buildhost="$(jq -r '.buildhost // "localhost"' $hostjson)" +testinghost="$(jq -r '.testinghost // "localhost"' $hostjson)" +dockername="$(jq -r '.docker // "lava32"' $hostjson)" +pguser="$(jq -r '.pguser // "postgres"' $hostjson)" +pgpass="$(jq -r '.pgpass // "postgrespostgres"' $hostjson)" +dbhost="$(jq -r '.host // "database"' $hostjson)" + +export PGUSER=$pguser +export PGPASS=$pgpass + json="${config_dir}/$project_name.json" if [ ! -f $json ]; then @@ -38,6 +48,7 @@ db="$(jq -r .db $json)$db_suffix" extradockerargs="$(jq -r .extra_docker_args $json)" exitCode="$(jq -r .expected_exit_code $json)" dataflow="$(jq -r '.dataflow // "false"' $json)" # TODO use everywhere, stop passing as argument +llvm="/usr/lib/llvm-11" # List of function names to blacklist for data_flow injection, merged as fn1\|fn2\|fn3 so we can use sed # Or an empty string if not present @@ -65,9 +76,6 @@ if [ "$(jq -r .injfixupsscript $json)" != "null" ]; then injfixupsscript="${injfixupsscript/\{bug_build\}/$bug_build}" fi -buildhost="$(jq -r '.buildhost // "docker"' $json)" -pandahost="$(jq -r '.pandahost // "docker"' $json)" -testinghost="$(jq -r '.testinghost // "docker"' $json)" logs="$output_dir/$name/logs" makecmd="$(jq -r .make $json)" @@ -76,10 +84,8 @@ install="${install/\{config_dir\}/$config_dir}" # Format string replacement for post_install="$(jq -r .post_install $json)" install_simple=$(jq -r .install_simple $json) configure_cmd=$(jq -r '.configure // "/bin/true"' $json) -container="$(jq -r '.docker // "lava32"' $json)" # Constants scripts="$lava/scripts" -python="/usr/bin/python" -pdb="/usr/bin/python -m pdb " -dockername="lava32" +python="python3" +pdb="python3 -m pdb " diff --git a/setup_container.sh b/setup_container.sh old mode 100644 new mode 100755 diff --git a/tools/btrace/sw-btrace-to-compiledb b/tools/btrace/sw-btrace-to-compiledb index ff914320..1a55ee6c 100755 --- a/tools/btrace/sw-btrace-to-compiledb +++ b/tools/btrace/sw-btrace-to-compiledb @@ -1,5 +1,4 @@ -#!/usr/bin/env python -# Python 2 or 3 -- works on 2.6 and up. +#!/usr/bin/python3 from __future__ import absolute_import, print_function, unicode_literals import json import os diff --git a/tools/lavaTool/get_c_files.py b/tools/lavaTool/get_c_files.py index fd744e39..4c30ebc9 100755 --- a/tools/lavaTool/get_c_files.py +++ b/tools/lavaTool/get_c_files.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 import json import os @@ -17,9 +17,9 @@ def processCompileCommands(srcPath): cFiles = [] modificationNeeded = False pathStr = os.path.join(srcPath, 'compile_commands.json') - jsonFile = open(pathStr, 'r') - compileCommands = json.load(jsonFile) - jsonFile.close() + with open(pathStr, 'r') as jsonFile: + compileCommands = json.load(jsonFile) + newCompileCommands = compileCommands[:] for i in compileCommands: if 'Werror' in i['command']: @@ -37,11 +37,9 @@ def processCompileCommands(srcPath): if modificationNeeded: shutil.copyfile(pathStr, os.path.join(srcPath, 'compile_commands_original.json')) - jsonFile = open(pathStr, 'w') - json.dump(newCompileCommands, jsonFile, indent=4) - jsonFile.close() + with open(pathStr, 'w') as jsonFile: + json.dump(newCompileCommands, jsonFile, indent=4) - jsonFile.close() return newCompileCommands