| #!/usr/bin/env python |
| #===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===# |
| # |
| # The LLVM Compiler Infrastructure |
| # |
| # This file is distributed under the University of Illinois Open Source |
| # License. See LICENSE.TXT for details. |
| # |
| #===------------------------------------------------------------------------===# |
| # Runs the data-flow tracer several times on the same input in order to collect |
| # the complete trace for all input bytes (running it on all bytes at once |
| # may fail if DFSan runs out of labels). |
| # Usage: |
| # |
| # # Collect dataflow for one input, store it in OUTPUT (default is stdout) |
| # collect_data_flow.py BINARY INPUT [OUTPUT] |
| # |
| # # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR |
| # collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR |
| #===------------------------------------------------------------------------===# |
| import atexit |
| import hashlib |
| import sys |
| import os |
| import subprocess |
| import tempfile |
| import shutil |
| |
| tmpdir = "" |
| |
| def cleanup(d): |
| print("removing: %s" % d) |
| shutil.rmtree(d) |
| |
| def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir): |
| print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir, |
| output_dir)) |
| assert not os.path.exists(output_dir) |
| os.mkdir(output_dir) |
| for root, dirs, files in os.walk(corpus_dir): |
| for f in files: |
| path = os.path.join(root, f) |
| sha1 = hashlib.sha1(open(path).read()).hexdigest() |
| output = os.path.join(output_dir, sha1) |
| subprocess.call([self, exe, path, output]) |
| functions_txt = open(os.path.join(output_dir, "functions.txt"), "w") |
| subprocess.call([exe], stdout=functions_txt) |
| |
| |
| def main(argv): |
| exe = argv[1] |
| inp = argv[2] |
| if os.path.isdir(inp): |
| return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3]) |
| size = os.path.getsize(inp) |
| q = [[0, size]] |
| tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-") |
| atexit.register(cleanup, tmpdir) |
| print "tmpdir: ", tmpdir |
| outputs = [] |
| while len(q): |
| r = q.pop() |
| print "******* Trying: ", r |
| tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1])) |
| ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile]) |
| if ret and r[1] - r[0] >= 2: |
| q.append([r[0], (r[1] + r[0]) / 2]) |
| q.append([(r[1] + r[0]) / 2, r[1]]) |
| else: |
| outputs.append(tmpfile) |
| print "******* Success: ", r |
| f = sys.stdout |
| if len(argv) >= 4: |
| f = open(argv[3], "w") |
| merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py") |
| subprocess.call([merge] + outputs, stdout=f) |
| |
| if __name__ == '__main__': |
| main(sys.argv) |