Adding wrapper to get understandable output.

* Saves stdout / stderr to log files. * Prefixes all messages with fuzzer name. * Gives regular status updates. * Successfully passes jobserver details down. * Correctly handles dry-run and question options. Signed-off-by: Tim 'mithro' Ansell <me@mith.ro>
2019-02-01 00:27:56 -08:00 · 2019-02-01 00:27:56 -08:00 · f8c3ada041
parent 378e5f40b1
commit f8c3ada041
3 changed files with 546 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,5 +5,7 @@
 **/specimen_*
 **/output
 run.ok
+stderr.*.log
+stdout.*.log
 __pycache__
 *.pyc
--- a/fuzzers/Makefile
+++ b/fuzzers/Makefile
@ -1,3 +1,5 @@
+SELF_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
+
 # Developer options for quicker runs
 # Run algorithm, but only a small number of iterations
 QUICK=N
@ -11,11 +13,24 @@ BITONLY=N
 all:

 define fuzzer
+
+# $(1) - Fuzzer name/directory
+# $(2) - Space seperated list of dependencies for the fuzzer.
+
+# Make the all target depend on the run.ok file for the fuzzer.
 all: $(1)/run.ok
+
+# Make the clean target run `make clean` in the fuzzer's directory.
 clean::
 	$$(MAKE) -C $(1) clean
+	rm -f $(1)/*.log
+
+# Describe how to create the fuzzer's run.ok file.
+# This command must start with a + to tell make to pass the jobserver
+# parameters downwards.
 $(1)/run.ok: $(addsuffix /run.ok,$(2))
-	$$(MAKE) -C $(1) run
+	+@$(SELF_DIR)/run_fuzzer.py $(1)
+
 endef

 $(eval $(call fuzzer,000-init-db,))
--- a/fuzzers/run_fuzzer.py
+++ b/fuzzers/run_fuzzer.py
@ -0,0 +1,528 @@
+#!/usr/bin/env python3
+
+from datetime import datetime, timedelta
+
+import io
+import os
+import os.path
+import re
+import resource
+import signal
+import subprocess
+import sys
+import traceback
+
+secs_in_min = 60
+secs_in_hour = 60 * secs_in_min
+
+
+def pretty_timedelta_str(d):
+    """Pretty print a time delta object.
+
+    >>> pretty_timedelta_str(timedelta(seconds=2))
+    '2s'
+
+    >>> pretty_timedelta_str(timedelta(seconds=61))
+    '1m01s'
+
+    >>> pretty_timedelta_str(timedelta(seconds=125))
+    '2m05s'
+
+    >>> pretty_timedelta_str(timedelta(seconds=secs_in_hour * 5))
+    '5h'
+
+    Once we get to displaying hours, we don't bother displaying seconds.
+    >>> pretty_timedelta_str(timedelta(seconds=secs_in_hour * 5 + 1))
+    '5h'
+
+    >>> pretty_timedelta_str(timedelta(
+    ...   seconds=secs_in_hour * 5 + secs_in_min * 2 + 1))
+    '5h02m'
+
+    >>> pretty_timedelta_str(timedelta(seconds=secs_in_hour * 5.5))
+    '5h30m'
+    """
+    assert isinstance(d, timedelta)
+    ts = d.total_seconds()
+    assert ts > 0, (d, ts)
+
+    bits = []
+    hours = int(ts / secs_in_hour)
+    if hours > 0:
+        bits.append("{}h".format(hours))
+        ts -= hours * secs_in_hour
+
+    mins = int(ts / secs_in_min)
+    if mins > 0:
+        if hours > 0:
+            bits.append("{:02d}m".format(mins))
+        else:
+            bits.append("{:d}m".format(mins))
+        ts -= mins * secs_in_min
+
+    if hours == 0:
+        if mins > 0:
+            bits.append("{:02d}s".format(int(ts)))
+        else:
+            bits.append("{:d}s".format(int(ts)))
+
+    assert len(bits) > 0, d
+
+    return "".join(bits)
+
+
+class Logger:
+    """Output a bunch of lines with a prefix.
+
+    >>> l = Logger("fuzz", datetime(2001, 11, 1), 6)
+    >>> l._now = lambda: datetime(2001, 11, 1, second=10)
+    >>> l.log("Test!")
+    fuzz   -   10s: Test!
+    >>> l.log("Format {} {t}", [1,], {'t': 2})
+    fuzz   -   10s: Format 1 2
+    >>> l.log('''\\
+    ... Line 1
+    ... Line 2
+    ... Line 3
+    ... ''')
+    fuzz   -   10s: Line 1
+    fuzz   -   10s: Line 2
+    fuzz   -   10s: Line 3
+    """
+
+    def __init__(self, fuzzer, time_start, padding):
+        self.fuzzer = fuzzer + ' ' * (padding - len(fuzzer))
+        self.time_start = time_start
+        self.queue = []
+
+    def _now(self):
+        return datetime.utcnow()
+
+    def log(self, msg, args=None, kw=None, flush=True):
+        """Queue a log message.
+
+        This is safe to do in a signal handler where you can't do io.
+        """
+        if args is None:
+            args = []
+        if kw is None:
+            kw = {}
+
+        time_log = self._now()
+        self.queue.append((time_log, msg, args, kw))
+        if flush:
+            self.flush()
+
+    def flush(self):
+        while len(self.queue) > 0:
+            time_log, msg, args, kw = self.queue.pop(0)
+            self._output(time_log, msg, args, kw)
+
+    def _output(self, time_log, msg, args, kw):
+        running_for = time_log - self.time_start
+        msg = msg.format(*args, **kw)
+
+        log_prefix = "{:s} - {:>5s}: ".format(
+            self.fuzzer, pretty_timedelta_str(running_for))
+
+        msg = "\n".join(log_prefix + x for x in msg.splitlines())
+        print(msg)
+
+
+def last_lines(f, num, blocksize=100):
+    r"""Return n last lines in a file.
+
+    >>> f = io.StringIO("")
+    >>> last_lines(f, 100)
+    ['']
+
+    >>> f = io.StringIO("1\n2")
+    >>> last_lines(f, 100)
+    ['1', '2']
+
+    >>> f = io.StringIO("11\n22\n33\n44\n55")
+    >>> last_lines(f, 100)
+    ['11', '22', '33', '44', '55']
+
+    >>> f = io.StringIO("11\n22\n33\n44\n55")
+    >>> last_lines(f, 100, 2)
+    ['11', '22', '33', '44', '55']
+
+    >>> f = io.StringIO("11\n22\n33\n44\n55")
+    >>> last_lines(f, 100, 3)
+    ['11', '22', '33', '44', '55']
+
+    >>> f = io.StringIO("11\n22\n33\n44\n55")
+    >>> last_lines(f, 2, 2)
+    ['44', '55']
+
+    >>> f = io.StringIO("11\n22\n33\n44\n55")
+    >>> last_lines(f, 2, 3)
+    ['44', '55']
+
+    >>> f = io.StringIO("11\n22\n33\n44\n55")
+    >>> last_lines(f, 2, 4)
+    ['44', '55']
+
+    >>> f = io.StringIO("1\n22\n3333333\n")
+    >>> last_lines(f, 100, 3)
+    ['1', '22', '3333333', '']
+
+    """
+    lines = ['']
+    # Seek to the end of the file
+    f.seek(0, os.SEEK_END)
+    fpos = f.tell()
+
+    while len(lines) < (num + 1) and fpos > 0:
+        bs = min(blocksize, fpos)
+        fpos = fpos - bs
+        f.seek(fpos)
+        data = f.read(bs)
+
+        while True:
+            lpos = data.rfind('\n')
+            lines[0] = data[lpos + 1:] + lines[0]
+            if lpos == -1:
+                break
+            lines.insert(0, '')
+            data = data[:lpos]
+
+    if fpos == 0:
+        lines.insert(0, '')
+
+    return lines[1:]
+
+
+def get_usage():
+    # This function only works if you have a signal handler for the
+    # signal.SIGCHLD signal.
+    raw_usage = resource.getrusage(resource.RUSAGE_CHILDREN)
+    # 0   ru_utime    time in user mode (float)
+    # 1   ru_stime    time in system mode (float)
+    # 2   ru_maxrss   maximum resident set size
+    #
+    # These fields are always zero on Linux
+    # 3   ru_ixrss    shared memory size
+    # 4   ru_idrss    unshared memory size
+    # 5   ru_isrss    unshared stack size
+    return "User:{}s System:{}s".format(
+        int(raw_usage.ru_utime),
+        int(raw_usage.ru_stime),
+    )
+
+
+def get_load():
+    """Return current load average.
+
+    Values is average perecntage of CPU used over 1 minute, 5 minutes, 15
+    minutes.
+    """
+    a1, a5, a15 = os.getloadavg()
+    cpus = os.cpu_count()
+    return a1 / cpus * 100.0, a5 / cpus * 100.0, a15 / cpus * 100.0
+
+
+_pstree_features = None
+
+
+def get_pstree_features():
+    global _pstree_features
+    if _pstree_features is None:
+        _pstree_features = []
+        for f in ['-T', '-l']:
+            try:
+                subprocess.check_output(
+                    "pstree {}".format(f),
+                    stderr=subprocess.STDOUT,
+                    shell=True,
+                )
+                _pstree_features.append(f)
+            except subprocess.CalledProcessError:
+                continue
+    return _pstree_features
+
+
+def get_pstree(pid):
+    """Get processes under current one.
+
+    Requires the pstree until be installed, otherwise returns empty string.
+    """
+    p = subprocess.Popen(
+        "pstree {} {}".format(" ".join(get_pstree_features()), pid),
+        shell=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        stdin=subprocess.DEVNULL,
+        universal_newlines=True,
+    )
+    stdout, stderr = p.communicate()
+
+    # Special case segfaults.
+    if 'Segmentation fault' in stderr:
+        stderr = ''
+        stdout = '(result to big, pstree segfaulted.)'
+
+    # If no error, just return stdout
+    if not stderr:
+        return stdout
+
+    return "{}\n{}".format(stdout, stderr)
+
+
+def run_submake(make_flags):
+    """Check if make_flags indicate that we should execute things.
+
+    See https://www.gnu.org/software/make/manual/html_node/Instead-of-Execution.html#Instead-of-Execution  # noqa
+
+    If this is a dry run or question then we shouldn't execute or output
+    anything.
+
+    The flags end up as single letter versions in the MAKEFLAGS environment
+    variable.
+
+    >>> run_submake('')
+    True
+
+    The following flags are important;
+
+     -n == --dry-run
+
+    >>> run_submake('n')
+    False
+    >>> run_submake('n --blah')
+    False
+    >>> run_submake('--blah n')
+    False
+    >>> run_submake('--blah')
+    True
+    >>> run_submake('--random')
+    True
+
+     -q == --question
+
+    >>> run_submake('q')
+    False
+    >>> run_submake('q --blah')
+    False
+    >>> run_submake('--blah q')
+    False
+    >>> run_submake('--blah')
+    True
+    >>> run_submake('--random')
+    True
+
+      Both --dry-run and --question
+
+    >>> run_submake('qn')
+    False
+    >>> run_submake('nq')
+    False
+    >>> run_submake('--quiant')
+    True
+    """
+    r = re.search(r'(?:^|\s)[^-]*(n|q)[^\s]*(\s|$)', make_flags)
+    if not r:
+        return True
+    return not bool(r.groups()[0])
+
+
+def main(argv):
+    fuzzers = os.path.abspath(os.path.dirname(__file__))
+
+    assert len(argv) == 2, argv
+    fuzzer = argv[1]
+
+    make_cmd = os.environ.get('MAKE', 'make')
+    make_flags = os.environ.get('MAKEFLAGS', '')
+
+    # Should run things?
+    if not run_submake(make_flags):
+        return 0
+
+    time_start = datetime.utcnow()
+
+    fuzzer_length = max(len(f) for f in os.listdir(fuzzers))
+
+    logger = Logger(fuzzer, time_start, fuzzer_length)
+
+    def log(msg, *a, **k):
+        logger.log(msg, a, k, flush=True)
+
+    # Need a signal handler on SIGCHLD otherwise get_resource doesn't return
+    # anything.
+    signal.signal(signal.SIGCHLD, lambda sig, frame: None)
+
+    fuzzer_dir = os.path.join(fuzzers, fuzzer)
+    assert os.path.exists(fuzzer_dir), fuzzer_dir
+    # FIXME: The fuzzer output directory should be different from the top level
+    # fuzzer directory.
+    fuzzer_out = fuzzer_dir
+    if not os.path.exists(fuzzer_out):
+        os.makedirs(fuzzer_out)
+    assert os.path.exists(fuzzer_out)
+
+    fuzzer_runok = os.path.join(fuzzer_out, "run.ok")
+
+    if os.path.exists(fuzzer_runok):
+        last_modified = datetime.fromtimestamp(os.stat(fuzzer_runok).st_mtime)
+
+        log(
+            "Skipping as run.ok exists (updated @ {})",
+            last_modified.isoformat())
+    else:
+        log("Starting @ {}", time_start.isoformat())
+
+    log_suffix = ".{}.log".format(time_start.isoformat())
+    fuzzer_stdout = os.path.join(fuzzer_out, "stdout" + log_suffix)
+    fuzzer_stderr = os.path.join(fuzzer_out, "stderr" + log_suffix)
+
+    # Make sure the stdout file exists
+    stdout_fd = open(fuzzer_stdout, "w")
+    stdout_fd.write("Build started {}\n".format(time_start.isoformat()))
+    stdout_fd.write("-" * 75)
+    stdout_fd.write("\n")
+    stdout_fd.flush()
+    os.fsync(stdout_fd)
+    stdout_fd.close()
+
+    stdout_fd = open(fuzzer_stdout, "w+")
+    stdout_fd.seek(0, os.SEEK_END)
+
+    # Make sure the stderr file exists
+    stderr_fd = open(fuzzer_stderr, "w")
+    stderr_fd.write("Build started {}\n".format(time_start.isoformat()))
+    stderr_fd.write("-" * 75)
+    stderr_fd.write("\n")
+    stderr_fd.flush()
+    os.fsync(stderr_fd)
+    stderr_fd.close()
+
+    stderr_fd = open(fuzzer_stderr, "w+")
+    stderr_fd.seek(0, os.SEEK_END)
+
+    # Play nice with make's jobserver.
+    # See https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver  # noqa
+    if '--jobserver-fds' in make_flags:
+        job_re = re.search('--jobserver-fds=([0-9]+),([0-9]+)', make_flags)
+        assert job_re, make_flags
+        job_rd, job_wr = job_re.groups()
+
+        job_rd = int(job_rd)
+        job_wr = int(job_wr)
+        assert job_rd > 2, (job_rd, job_wr, make_flags)
+        assert job_wr > 2, (job_rd, job_wr, make_flags)
+
+        # Make sure the file descriptors exist..
+        job_rd_fd = os.fdopen(int(job_rd), 'rb', 0)
+        job_wr_fd = os.fdopen(int(job_wr), 'rb', 0)
+    else:
+        job_rd_fd = None
+        job_wr_fd = None
+
+    p = None
+    try:
+        log(
+            "Running {} -C {} run (with MAKEFLAGS='{}')",
+            make_cmd,
+            fuzzer_dir,
+            make_flags,
+        )
+        p = subprocess.Popen(
+            [make_cmd, '-C', fuzzer_dir, 'run'],
+            stdin=None,
+            stdout=stdout_fd,
+            stderr=stderr_fd,
+            # Make sure not to close the fds as make uses fd=(3,4) for process
+            # control.
+            close_fds=False)
+
+        # Close the jobserver files if they were found.
+        if job_rd_fd:
+            job_rd_fd.close()
+        if job_wr_fd:
+            job_wr_fd.close()
+
+        while True:
+            try:
+                retcode = p.wait(timeout=10)
+                p = None
+            except subprocess.TimeoutExpired:
+                retcode = None
+
+            if retcode is not None:
+                break
+            log(
+                "Still running (1m:{:0.2f}%, 5m:{:0.2f}%, 15m:{:0.2f}%).\n{}",
+                *get_load(),
+                get_pstree(p.pid),
+            )
+    except (Exception, KeyboardInterrupt, SystemExit):
+        retcode = -1
+        tb = io.StringIO()
+        traceback.print_exc(file=tb)
+        log(tb.getvalue())
+
+    # Prevent Ctrl-C so we exit properly...
+    signal.signal(signal.SIGINT, lambda sig, frame: logger.log("Dieing!"))
+
+    try:
+        if p is not None:
+            try:
+                retcode = p.wait(1)
+            except subprocess.TimeoutExpired:
+                retcode = -1
+                p.kill()
+                p.wait()
+    except Exception:
+        tb = io.StringIO()
+        traceback.print_exc(file=tb)
+        log(tb.getvalue())
+
+    # Wait for all children to finish.
+    try:
+        while True:
+            log("Child finished: {}", os.waitpid(-1, 0))
+    except ChildProcessError:
+        pass
+
+    log("Finishing ({}).", get_usage())
+
+    time_end = datetime.utcnow()
+    if retcode != 0:
+        # Log the last 100 lines of stderr on a failure
+        error_log = "\n".join(last_lines(open(fuzzer_stderr), 100))
+        log(
+            """\
+Failed @ {time_end} with exit code: {retcode}
+--------------------------------------------------------------------------
+{error_log}
+--------------------------------------------------------------------------
+Failed @ {time_end} with exit code: {retcode}
+""",
+            retcode=retcode,
+            error_log=error_log,
+            time_end=time_end.isoformat())
+
+    else:
+
+        # Log the last 100 lines of a successful run
+        success_log = "\n".join(last_lines(open(fuzzer_stdout), 100))
+        log(
+            """\
+Succeeded! @ {}
+--------------------------------------------------------------------------
+{}
+--------------------------------------------------------------------------
+""", time_end.isoformat(), success_log)
+
+    logger.flush()
+    return retcode
+
+
+if __name__ == "__main__":
+    if "--test" in sys.argv or len(sys.argv) == 1:
+        import doctest
+        doctest.testmod()
+    else:
+        sys.exit(main(sys.argv))