Convert verilator_gantt to python

2021-09-08 08:16:31 -04:00 · 2021-09-08 08:16:31 -04:00 · 4b274a8d4d
parent c678e7ec3e
commit 4b274a8d4d
4 changed files with 419 additions and 492 deletions
--- a/Makefile.in
+++ b/Makefile.in
@ -353,6 +353,7 @@ clang-format:
 PY_PROGRAMS = \
 	bin/verilator_ccache_report \
 	bin/verilator_difftree \
 	bin/verilator_gantt \
 	bin/verilator_profcfunc \
 	examples/xml_py/vl_file_copy \
 	examples/xml_py/vl_hier_graph \
--- a/bin/verilator_gantt
+++ b/bin/verilator_gantt
@ -1,163 +1,135 @@
-#!/usr/bin/env perl
+#!/usr/bin/env python3
-# See copyright, etc in below POD section.
+# pylint: disable=C0103,C0114,C0116,C0301,R0914,R0912,R0915,W0511,eval-used
 ######################################################################
-use warnings;
+import argparse
-use strict;
+import collections
-use Getopt::Long;
+import math
-use IO::File;
+import re
-use Pod::Usage;
+import statistics
-use vars qw($Debug);
+# from pprint import pprint
-$Debug = 0;
+Threads = collections.defaultdict(lambda: {})
-my $Opt_File;
+Mtasks = collections.defaultdict(lambda: {})
-my $Opt_Time_Per_Char = 0;  # rdtsc ticks per char in gantt chart, 0=auto
+Global = {
-my $opt_vcd = "profile_threads.vcd";
+    'args': {},
-
+    'cpuinfo': collections.defaultdict(lambda: {}),
-our %Threads;
+    'rdtsc_cycle_time': 0,
-our %Mtasks;
+    'stats': {}
 our %Global;
 autoflush STDOUT 1;
 autoflush STDERR 1;
 Getopt::Long::config("no_auto_abbrev");
 if (! GetOptions(
          "help"        => \&usage,
          "scale=i"     => \$Opt_Time_Per_Char,
          "debug"       => sub { $Debug = 1; },
          "vcd=s"       => \$opt_vcd,
          "no-vcd!"     => sub { $opt_vcd = undef; },
          "<>"          => \&parameter,
    )) {
    die "%Error: Bad usage, try 'verilator_gantt --help'\n";
 }
-$Opt_File = "profile_threads.dat" if !defined $Opt_File;
+######################################################################
 process($Opt_File);
 write_vcd($opt_vcd) if defined $opt_vcd;
 exit(0);
-#######################################################################
+def process(filename):
    read_data(filename)
    report()
 sub usage {
    pod2usage(-verbose=>2, -exitval=>0, -output=>\*STDOUT);
    exit(1);  # Unreachable
 }
-sub parameter {
+def read_data(filename):
-    my $param = shift;
+    with open(filename) as fh:
-    if (!defined $Opt_File) {
+        re_prof = re.compile(
-        $Opt_File = $param;
+            r'^VLPROF mtask\s(\d+)\sstart\s(\d+)\send\s(\d+)\selapsed\s(\d+)\spredict_time\s(\d+)\scpu\s(\d+)\son thread (\d+)'
-    } else {
+        )
-        die "%Error: Unknown parameter: $param\n";
+        re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.])\s*')
-    }
+        re_arg2 = re.compile(r'VLPROF arg\s+(\S+)\s+([0-9.])\s*$')
-}
+        re_stat = re.compile(r'VLPROF stat\s+(\S+)\s+([0-9.]+)')
        re_time = re.compile(r'rdtsc time = (\d+) ticks')
        re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$')
        re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
        cpu = None
-#######################################################################
+        for line in fh:
            if re_prof.match(line):
                match = re_prof.match(line)
                mtask = int(match.group(1))
                start = int(match.group(2))
                end = int(match.group(3))
                elapsed_time = int(match.group(4))
                predict_time = int(match.group(5))
                cpu = int(match.group(6))
                thread = int(match.group(7))
                if start not in Threads[thread]:
                    Threads[thread][start] = {}
                Threads[thread][start]['mtask'] = mtask
                Threads[thread][start]['end'] = end
                Threads[thread][start]['cpu'] = cpu
-sub process {
+                if 'elapsed' not in Mtasks[mtask]:
-    my $filename = shift;
+                    Mtasks[mtask] = {'end': 0, 'elapsed': 0}
                Mtasks[mtask]['elapsed'] += elapsed_time
                Mtasks[mtask]['predict'] = predict_time
                Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], end)
            elif re.match(r'^VLPROFTHREAD', line):
                None  # pylint: disable=pointless-statement
            elif re_arg1.match(line):
                match = re_arg1.match(line)
                Global['args'][match.group(1)] = match.group(2)
            elif re_arg2.match(line):
                match = re_arg2.match(line)
                Global['args'][match.group(1)] = match.group(2)
            elif re_stat.match(line):
                match = re_stat.match(line)
                Global['stats'][match.group(1)] = match.group(2)
            elif re_proc_cpu.match(line):
                match = re_proc_cpu.match(line)
                cpu = int(match.group(1))
            elif cpu and re_proc_dat.match(line):
                match = re_proc_dat.match(line)
                term = match.group(1)
                value = match.group(2)
                term = re.sub(r'\s+$', '', term)
                term = re.sub(r'\s+', '_', term)
                value = re.sub(r'\s+$', '', value)
                Global['cpuinfo'][cpu][term] = value
            elif re.match(r'^#', line):
                None  # pylint: disable=pointless-statement
            elif Args.debug:
                print("-Unk: %s" % line)
            # TODO -- this is parsing text printed by a client.
            # Really, verilator proper should generate this
            # if it's useful...
            if re_time.match(line):
                Global['rdtsc_cycle_time'] = re_time.group(1)
    read_data($filename);
    report();
 }
-#######################################################################
+def re_match_result(regexp, line, result_to):
    result_to = re.match(regexp, line)
    return result_to
 sub read_data {
    my $filename = shift;
-    %Global = (rdtsc_cycle_time => 0);
+######################################################################
    my $fh = IO::File->new("<$filename") or die "%Error: $! $filename,";
-    my $cpu;
+def report():
-    while (my $line = $fh->getline) {
+    print("Verilator Gantt report")
        if ($line =~ m/VLPROF mtask\s(\d+)\sstart\s(\d+)\send\s(\d+)\selapsed\s(\d+)\spredict_time\s(\d+)\scpu\s(\d+)\son thread (\d+)/) {
            my $mtask = $1;
            my $start = $2;
            my $end = $3;
            my $elapsed_time = $4;
            my $predict_time = $5;
            my $cpu = $6;
            my $thread = $7;
            $Threads{$thread}{$start}{mtask} = $mtask;
            $Threads{$thread}{$start}{end} = $end;
            $Threads{$thread}{$start}{cpu} = $cpu;
-            if (!exists $Mtasks{$mtask}{elapsed}) {
+    print("\nArgument settings:")
-                $Mtasks{$mtask}{elapsed} = 0;
+    for arg in sorted(Global['args'].keys()):
-            }
+        plus = "+" if re.match(r'^\+', arg) else " "
-            $Mtasks{$mtask}{elapsed} += $elapsed_time;
+        print("  %s%s%s" % (arg, plus, Global['args'][arg]))
            $Mtasks{$mtask}{predict} = $predict_time;
            $Mtasks{$mtask}{end} = max($Mtasks{$mtask}{end}, $end);
        }
        elsif ($line =~ /^VLPROFTHREAD/) {}
        elsif ($line =~ m/VLPROF arg\s+(\S+)\+([0-9.])\s*$/
               || $line =~ m/VLPROF arg\s+(\S+)\s+([0-9.])\s*$/) {
            $Global{args}{$1} = $2;
        }
        elsif ($line =~ m/VLPROF stat\s+(\S+)\s+([0-9.]+)/) {
            $Global{stats}{$1} = $2;
        }
        elsif ($line =~ m/^VLPROFPROC processor\s*:\s*(\d+)\s*$/) {
            $cpu = $1;
        }
        elsif (defined $cpu && $line =~ m/^VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$/) {
            my ($term, $value) = ($1, $2);
            $term =~ s/\s+$//;
            $term =~ s/\s+/_/;
            $value =~ s/\s+$//;
            $Global{cpuinfo}{$cpu}{$term} = $value;
        }
        elsif ($line =~ /^#/) {}
        elsif ($Debug) {
            chomp $line;
            print "Unk: $line\n";
        }
        # TODO -- this is parsing text printed by a client.
        # Really, verilator proper should generate this
        # if it's useful...
        if ($line =~ m/rdtsc time = (\d+) ticks/) {
            $Global{rdtsc_cycle_time} = $1;
        }
    }
 }
-#######################################################################
+    nthreads = len(Threads)
-
+    Global['cpus'] = {}
-sub report {
+    for thread in Threads:
    print "Verilator Gantt report\n";
    print "\nArgument settings:\n";
    foreach my $arg (sort keys %{$Global{args}}) {
        my $plus = ($arg =~ /^\+/) ? "+" : " ";
        printf "  %s%s%d\n", $arg, $plus, $Global{args}{$arg};
    }
    my $nthreads = scalar keys %Threads;
    $Global{cpus} = {};
    foreach my $thread (keys %Threads) {
        # Make potentially multiple characters per column
-        foreach my $start (keys %{$Threads{$thread}}) {
+        for start in Threads[thread]:
-            my $cpu = $Threads{$thread}{$start}{cpu};
+            cpu = Threads[thread][start]['cpu']
-            my $elapsed = $Threads{$thread}{$start}{end} - $start;
+            elapsed = Threads[thread][start]['end'] - start
-            $Global{cpus}{$cpu}{cpu_time} += $elapsed;
+            if cpu not in Global['cpus']:
-        }
+                Global['cpus'][cpu] = {'cpu_time': 0}
-    }
+            Global['cpus'][cpu]['cpu_time'] += elapsed
-    my $mt_mtask_time = 0;
+    mt_mtask_time = 0
-    my $long_mtask_time = 0;
+    long_mtask_time = 0
-    my $last_end = 0;
+    last_end = 0
-    foreach my $mtask (keys %Mtasks) {
+    for mtask in Mtasks:
-        $mt_mtask_time += $Mtasks{$mtask}{elapsed};
+        mt_mtask_time += Mtasks[mtask]['elapsed']
-        $last_end = max($last_end, $Mtasks{$mtask}{end});
+        last_end = max(last_end, Mtasks[mtask]['end'])
-        $long_mtask_time = max($long_mtask_time, $Mtasks{$mtask}{elapsed});
+        long_mtask_time = max(long_mtask_time, Mtasks[mtask]['elapsed'])
-    }
+    Global['last_end'] = last_end
    $Global{last_end} = $last_end;
-    report_graph();
+    report_graph()
    # If we know cycle time in the same (rdtsc) units,
    # this will give us an actual utilization number,
@ -167,402 +139,356 @@ sub report {
    # serial mode, to estimate the overhead of data sharing,
    # which will show up in the total elapsed time. (Overhead
    # of synchronization and scheduling should not.)
-    print "\nAnalysis:\n";
+    print("\nAnalysis:")
-    printf "  Total threads             = %d\n", $nthreads;
+    print("  Total threads             = %d" % nthreads)
-    printf "  Total mtasks              = %d\n", scalar(keys %Mtasks);
+    print("  Total mtasks              = %d" % len(Mtasks))
-    my $ncpus = scalar(keys %{$Global{cpus}});
+    ncpus = len(Global['cpus'])
-    printf "  Total cpus used           = %d\n", $ncpus;
+    print("  Total cpus used           = %d" % ncpus)
-    printf "  Total yields              = %d\n", $Global{stats}{yields};
+    print("  Total yields              = %d" % int(Global['stats']['yields']))
-    printf "  Total eval time           = %d rdtsc ticks\n", $Global{last_end};
+    print("  Total eval time           = %d rdtsc ticks" % Global['last_end'])
-    printf "  Longest mtask time        = %d rdtsc ticks\n", $long_mtask_time;
+    print("  Longest mtask time        = %d rdtsc ticks" % long_mtask_time)
-    printf "  All-thread mtask time     = %d rdtsc ticks\n", $mt_mtask_time;
+    print("  All-thread mtask time     = %d rdtsc ticks" % mt_mtask_time)
-    my $long_efficiency = $long_mtask_time/($Global{last_end} || 1);
+    long_efficiency = long_mtask_time / (Global.get('last_end', 1))
-    printf "  Longest-thread efficiency = %0.1f%%\n", $long_efficiency*100;
+    print("  Longest-thread efficiency = %0.1f%%" % (long_efficiency * 100.0))
-    my $mt_efficiency = $mt_mtask_time/($Global{last_end}*$nthreads || 1);
+    mt_efficiency = mt_mtask_time / (Global.get('last_end', 1) * nthreads or 1)
-    printf "  All-thread efficiency     = %0.1f%%\n", $mt_efficiency*100;
+    print("  All-thread efficiency     = %0.1f%%" % (mt_efficiency * 100.0))
-    printf "  All-thread speedup        = %0.1f\n", $mt_efficiency*$nthreads;
+    print("  All-thread speedup        = %0.1f" % (mt_efficiency * nthreads))
-    if ($Global{rdtsc_cycle_time} > 0) {
+    if Global['rdtsc_cycle_time'] > 0:
-        my $ut = $mt_mtask_time / $Global{rdtsc_cycle_time};
+        ut = mt_mtask_time / Global['rdtsc_cycle_time']
-        print "tot_mtask_cpu=$mt_mtask_time cyc=$Global{rdtsc_cycle_time} ut=$ut\n";
+        print("tot_mtask_cpu=" + mt_mtask_time + " cyc=" +
-    }
+              Global['rdtsc_cycle_time'] + " ut=" + ut)
-    my @p2e_ratios;
+    p2e_ratios = []
-    my $min_p2e = 1000000;
+    min_p2e = 1000000
-    my $min_mtask;
+    min_mtask = None
-    my $max_p2e = -1000000;
+    max_p2e = -1000000
-    my $max_mtask;
+    max_mtask = None
    foreach my $mtask (sort keys %Mtasks) {
        if ($Mtasks{$mtask}{elapsed} > 0) {
            if ($Mtasks{$mtask}{predict} == 0) {
                $Mtasks{$mtask}{predict} = 1;  # don't log(0) below
            }
            my $p2e_ratio = log( $Mtasks{$mtask}{predict} / $Mtasks{$mtask}{elapsed} );
            #print "log(p2e $mtask) = $p2e_ratio   (predict $Mtasks{$mtask}{predict}, elapsed $Mtasks{$mtask}{elapsed})\n";
            push @p2e_ratios, $p2e_ratio;
-            if ($p2e_ratio > $max_p2e) {
+    for mtask in sorted(Mtasks.keys()):
-                $max_p2e = $p2e_ratio;
+        if Mtasks[mtask]['elapsed'] > 0:
-                $max_mtask = $mtask;
+            if Mtasks[mtask]['predict'] == 0:
-            }
+                Mtasks[mtask]['predict'] = 1  # don't log(0) below
-            if ($p2e_ratio < $min_p2e) {
+            p2e_ratio = math.log(Mtasks[mtask]['predict'] /
-                $min_p2e = $p2e_ratio;
+                                 Mtasks[mtask]['elapsed'])
-                $min_mtask = $mtask;
+            p2e_ratios.append(p2e_ratio)
            }
        }
    }
-    print "\nStatistics:\n";
+            if p2e_ratio > max_p2e:
-    printf "  min log(p2e) = %0.3f", $min_p2e;
+                max_p2e = p2e_ratio
-    print "  from mtask $min_mtask (predict $Mtasks{$min_mtask}{predict},";
+                max_mtask = mtask
-    print " elapsed $Mtasks{$min_mtask}{elapsed})\n";
+            if p2e_ratio < min_p2e:
-    printf "  max log(p2e) = %0.3f", $max_p2e;
+                min_p2e = p2e_ratio
-    print "  from mtask $max_mtask (predict $Mtasks{$max_mtask}{predict},";
+                min_mtask = mtask
    print " elapsed $Mtasks{$max_mtask}{elapsed})\n";
-    my $stddev = stddev(\@p2e_ratios);
+    print("\nStatistics:")
-    my $mean = mean(\@p2e_ratios);
+    print("  min log(p2e) = %0.3f" % min_p2e, end="")
-    printf "  mean = %0.3f\n", $mean;
+    print("  from mtask %d (predict %d," %
-    printf "  stddev = %0.3f\n", $stddev;
+          (min_mtask, Mtasks[min_mtask]['predict']),
-    printf "  e ^ stddev = %0.3f\n", exp($stddev);
+          end="")
    print(" elapsed %d)" % Mtasks[min_mtask]['elapsed'])
    print("  max log(p2e) = %0.3f" % max_p2e, end="")
    print("  from mtask %d (predict %d," %
          (max_mtask, Mtasks[max_mtask]['predict']),
          end="")
    print(" elapsed %d)" % Mtasks[max_mtask]['elapsed'])
-    report_cpus();
+    stddev = statistics.pstdev(p2e_ratios)
    mean = statistics.mean(p2e_ratios)
    print("  mean = %0.3f" % mean)
    print("  stddev = %0.3f" % stddev)
    print("  e ^ stddev = %0.3f" % math.exp(stddev))
-    if ($nthreads > $ncpus) {
+    report_cpus()
        print "\n";
        print "%Warning: There were fewer CPUs ($ncpus) then threads ($nthreads).\n";
        print "        : See docs on use of numactl.\n";
    } else {
        if ($Global{cpu_socket_cores_warning}) {
            print "\n";
            print "%Warning: Multiple threads scheduled on same hyperthreaded core.\n";
            print "        : See docs on use of numactl.\n";
        }
        if ($Global{cpu_sockets_warning}) {
            print "\n";
            print "%Warning: Threads scheduled on multiple sockets.\n";
            print "        : See docs on use of numactl.\n";
        }
    }
    print "\n";
 }
-sub report_cpus {
+    if nthreads > ncpus:
-    print "\nCPUs:\n";
+        print()
-    # Test - show all cores
+        print("%%Warning: There were fewer CPUs (%d) then threads (%d)." %
-    # for (my $i=0; $i<73; ++$i) { $Global{cpus}{$i} ||= {cpu_time => 0}; }
+              (ncpus, nthreads))
        print("        : See docs on use of numactl.")
    else:
        if 'cpu_socket_cores_warning' in Global:
            print()
            print(
                "%Warning: Multiple threads scheduled on same hyperthreaded core."
            )
            print("        : See docs on use of numactl.")
        if 'cpu_sockets_warning' in Global:
            print()
            print("%Warning: Threads scheduled on multiple sockets.")
            print("        : See docs on use of numactl.")
    print()
    $Global{cpu_sockets} ||= {};
    $Global{cpu_socket_cores} ||= {};
-    foreach my $cpu (sort {$a <=> $b} keys %{$Global{cpus}}) {
+def report_cpus():
-        printf "  cpu %d: ", $cpu;
+    print("\nCPUs:")
        printf "cpu_time=%d", $Global{cpus}{$cpu}{cpu_time};
-        my $socket = $Global{cpuinfo}{$cpu}{physical_id};
+    Global['cpu_sockets'] = collections.defaultdict(lambda: 0)
-        $Global{cpu_sockets}{$socket}++ if defined $socket;
+    Global['cpu_socket_cores'] = collections.defaultdict(lambda: 0)
        printf " socket=%d", $socket if defined $socket;
-        my $core = $Global{cpuinfo}{$cpu}{core_id};
+    for cpu in sorted(Global['cpus'].keys()):
-        $Global{cpu_socket_cores}{$socket."__".$core}++ if defined $socket && defined $core;
+        print("  cpu %d: " % cpu, end='')
-        printf " core=%d", $core if defined $core;
+        print("cpu_time=%d" % Global['cpus'][cpu]['cpu_time'], end='')
-        my $model = $Global{cpuinfo}{$cpu}{model_name};
+        socket = None
-        printf "  %s", $model if defined $model;
+        if cpu in Global['cpuinfo']:
-        print "\n";
+            socket = int(Global['cpuinfo'][cpu]['physical_id'])
-    }
+            Global['cpu_sockets'][socket] += 1
            print(" socket=%d" % socket, end='')
-    $Global{cpu_sockets_warning} = 1
+            core = int(Global['cpuinfo'][cpu]['core_id'])
-        if (scalar keys %{$Global{cpu_sockets}} > 1);
+            Global['cpu_socket_cores'][str(socket) + "__" + str(core)] += 1
-    foreach my $scn (values %{$Global{cpu_socket_cores}}) {
+            print(" core=%d" % core, end='')
        $Global{cpu_socket_cores_warning} = 1 if $scn > 1;
    }
 }
-sub report_graph {
+            model = Global['cpuinfo'][cpu]['model_name']
-    my $time_per = $Opt_Time_Per_Char;
+            if model:
-    if ($time_per == 0) {
+                print("  %s" % model, end='')
-        $time_per = ($Global{last_end} / 40);  # Start with 40 columns
+        print()
-        while ($time_per > 10) {
+
-            my ($graph, $conflicts) = _make_graph($time_per);
+    if len(Global['cpu_sockets']) > 1:
-            last if !$conflicts;
+        Global['cpu_sockets_warning'] = True
-            $time_per = int($time_per/2);
+        for scn in Global['cpu_socket_cores'].values():
-        }
+            if scn > 1:
                Global['cpu_socket_cores_warning'] = True
 def report_graph():
    time_per = Args.scale
    if time_per == 0:
        time_per = Global['last_end'] / 40  # Start with 40 columns
        while time_per > 10:
            (graph, conflicts) = _make_graph(time_per)
            if not conflicts:
                break
            time_per = int(time_per / 2)
        # One more step so we can fit more labels
-        $time_per = int($time_per/2);
+        time_per = int(time_per / 2)
-        $time_per ||= 1;
+        if time_per <= 0:
-    }
+            time_per = 1
-    my ($graph, $conflicts) = _make_graph($time_per);
+    (graph, conflicts) = _make_graph(time_per)
-    print "\nThread gantt graph:\n";
+    print("\nThread gantt graph:")
-    print "  Legend: One character width = $time_per rdtsc ticks\n";
+    print("  Legend: One character width = %s rdtsc ticks" % time_per)
-    print "  Legend: '&' = multiple mtasks in this period (character width)\n";
+    print("  Legend: '&' = multiple mtasks in this period (character width)")
-    my $scale = "   <-".$Global{last_end}." rdtsc total";
+    scale = "   <-%d rdtsc total" % Global['last_end']
-    for (my $col = length($scale);  # -2 for '->' below
+    for col in range(len(scale), int(0.99 + (Global['last_end'] / time_per))):  # pylint: disable=unused-variable
-         $col < ($Global{last_end}/$time_per); ++$col) {
+        scale += "-"
-        $scale .= "-";
+    print("  " + scale + "->")
    }
    print "  $scale->\n";
-    foreach my $thread (sort keys %{$graph}) {
+    for thread in sorted(graph.keys()):
-        print "  t: ";
+        print("  t: ", end="")
-        _print_graph_line($graph->{$thread}, '');
+        _print_graph_line(graph[thread], '')
    }
 }
 sub _make_graph {
    my $time_per = shift;
-    my $graph = {};  # {thread}{column}{char=>'x' or chars=>#}
+def _make_graph(time_per):
-    my $conflicts = 0;
+
-    foreach my $thread (keys %Threads) {
+    # [thread][column] = char or #
    graph = collections.defaultdict(
        lambda: collections.defaultdict(lambda: ''))
    conflicts = 0
    for thread in Threads:
        # Make potentially multiple characters per column
-        foreach my $start (sort {$a <=> $b} keys %{$Threads{$thread}}) {
+        multi_at_col = collections.defaultdict(lambda: '')
-            my $end = $Threads{$thread}{$start}{end};
+        for start in sorted(Threads[thread].keys()):
-            my $mtask = $Threads{$thread}{$start}{mtask};
+            end = Threads[thread][start]['end']
-            my $cpu = $Threads{$thread}{$start}{cpu};
+            # mtask = Threads[thread][start]['mtask']
            cpu = Threads[thread][start]['cpu']
-            my $startcol = _time_col($time_per, $start);
+            startcol = _time_col(time_per, start)
-            my $endcol = _time_col($time_per, $end);
+            endcol = _time_col(time_per, end)
            label = "["
            label += str(cpu)  # Maybe make optional in future
            width = endcol - startcol + 1
            while len(label) < (width - 1):  # -1 for ']'
                label += "-"
            label += "]"
            multi_at_col[startcol] += label
            my $label = "[";
            $label .= "$cpu";  # Maybe make optional in future
            my $width = $endcol - $startcol + 1;
            while (length($label) < ($width-1)) {  # -1 for ']'
                $label .= "-";
            }
            $label .= "]";
            $graph->{$thread}[$startcol]{char} .= $label;
        }
        if ($Debug) {
            print "# Multicol: "; _print_graph_line($graph->{$thread}, '|');
        }
        # Expand line to one char per column
-        for (my $col = 0; $col <= $#{$graph->{$thread}}; ++$col) {
+        for col in multi_at_col:
-            if (my $chars = $graph->{$thread}[$col]{char}) {
+            chars = multi_at_col[col]
-                my $ok = 1;
+            ok = True
-                for (my $coladd = 1; $coladd<length($chars); ++$coladd) {
+            for coladd in range(0, len(chars)):
-                    if ($graph->{$thread}[$col + $coladd]{char}) {
+                if col + coladd in graph[thread]:
-                        $ok = 0; last;
+                    ok = False
-                    }
+                    break
-                }
+            if not ok:
-                if (!$ok) {
+                if re.search(r'\[.*\[', chars):  # Two begins or more
-                    if ($chars =~ /\[.*\[/) {  # Two begins or more
+                    conflicts += 1
-                        $conflicts++;
+                    graph[thread][col] = "&"
-                        $graph->{$thread}[$col]{char} = "&";
+                else:
-                    } else {
+                    graph[thread][col] = "["
-                        $graph->{$thread}[$col]{char} = "[";
+                for coladd in range(1, len(chars)):
-                    }
+                    if col + coladd in graph[thread]:
-                    for (my $coladd = 1; $coladd<length($chars); ++$coladd) {
+                        break
-                        if ($graph->{$thread}[$col + $coladd]{char}) {
+                    graph[thread][col + coladd] = 'x'
-                            last;
+            else:
-                        } else {
+                coladd = 0
-                            $graph->{$thread}[$col + $coladd]{char} = 'x';
+                for char in chars:
-                        }
+                    graph[thread][col + coladd] = char
-                    }
+                    coladd += 1
-                } else {
+
-                    my $coladd = 0;
+    if Args.debug:
-                    foreach my $char (split //, $chars) {
+        print("# Conflicts %d" % conflicts)
-                        $graph->{$thread}[$col+$coladd]{char} = $char;
+    return (graph, conflicts)
-                        ++$coladd;
+
-                    }
+
-                }
+def _print_graph_line(graph_thread, sep):
-            }
+    at = 0
    for col in sorted(graph_thread.keys()):
        while at < col:
            print(' ', end="")
            at += 1
        c = graph_thread[col]
        print(c + sep, end="")
        at += len(c)
    print()
 def _time_col(time_per, time):
    return int(time / time_per)
 ######################################################################
 def write_vcd(filename):
    print("Writing %s" % filename)
    with open(filename, "w") as fh:
        vcd = {
            'values':
            collections.defaultdict(lambda: {}),  # {<time>}{<code>} = value
            'sigs': {
                'threads': {},
                'cpus': {},
                'mtasks': {},
                'Stats': {}
            }  # {<module>}{<sig}} = code
        }
-        if ($Debug) {
+        code = 0
            print "# Singlcol: "; _print_graph_line($graph->{$thread}, '|');
        }
    }
    print "# Conflicts $conflicts\n" if $Debug;
    return ($graph, $conflicts);
 }
-sub _print_graph_line {
+        parallelism = collections.defaultdict(lambda: 0)
-    my $graph_thread = shift;
+        for thread in sorted(Threads.keys()):
-    my $sep = shift;
+            sig = "thread%d_mtask" % thread
-    for (my $col = 0; $col <= $#{$graph_thread}; ++$col) {
+            if sig not in vcd['sigs']['threads']:
-        my $c = $graph_thread->[$col]{char}; $c=' ' if !defined $c;
+                vcd['sigs']['threads'][sig] = code
-        print $c, $sep;
+                code += 1
-    }
+            mcode = vcd['sigs']['threads'][sig]
    print "\n";
 }
-sub _time_col {
+            for start in sorted(Threads[thread]):
-    my $time_per = shift;
+                end = Threads[thread][start]['end']
-    my $time = shift;
+                mtask = Threads[thread][start]['mtask']
-    return int($time/$time_per);
+                cpu = Threads[thread][start]['cpu']
-}
+                vcd['values'][start][mcode] = mtask
                vcd['values'][end][mcode] = None
                parallelism[start] += 1
                parallelism[end] -= 1
-#######################################################################
+                sig = "cpu%d_thread" % cpu
                if sig not in vcd['sigs']['cpus']:
                    vcd['sigs']['cpus'][sig] = code
                    code += 1
                ccode = vcd['sigs']['cpus'][sig]
                vcd['values'][start][ccode] = thread
                vcd['values'][end][ccode] = None
-sub write_vcd {
+                sig = "mtask%d_cpu" % mtask
-    my $filename = shift;
+                if sig not in vcd['sigs']['mtasks']:
-    print "Writing $filename\n";
+                    vcd['sigs']['mtasks'][sig] = code
-    my $fh = IO::File->new(">$filename") or die "%Error: $! $filename,";
+                    code += 1
-    my $vcd = {values => {},  # {<time>}{<code>} = value
+                ccode = vcd['sigs']['mtasks'][sig]
-               sigs => {},  # {<module>}{<sig}} = code
+                vcd['values'][start][ccode] = cpu
-               code => 0,
+                vcd['values'][end][ccode] = None
    };
-    my %parallelism;
+        # Parallelism graph
-    foreach my $thread (keys %Threads) {
+        vcd['sigs']['Stats']["parallelism"] = code
-        my $mcode = ($vcd->{sigs}{threads}{"thread${thread}_mtask"} ||= $vcd->{code}++);
+        pcode = code
-        foreach my $start (sort {$a <=> $b} keys %{$Threads{$thread}}) {
+        code += 1
            my $end = $Threads{$thread}{$start}{end};
            my $mtask = $Threads{$thread}{$start}{mtask};
            my $cpu = $Threads{$thread}{$start}{cpu};
            $vcd->{values}{$start}{$mcode} = $mtask;
            $vcd->{values}{$end}{$mcode} = undef;
            $parallelism{$start}++;
            $parallelism{$end}--;
-            my $ccode = $vcd->{sigs}{cpus}{"cpu${cpu}_thread"} ||= $vcd->{code}++;
+        value = 0
-            $vcd->{values}{$start}{$ccode} = $thread;
+        for time in sorted(parallelism.keys()):
-            $vcd->{values}{$end}{$ccode} = undef;
+            value += parallelism[time]
            vcd['values'][time][pcode] = value
-            my $mcode = $vcd->{sigs}{mtasks}{"mtask${mtask}_cpu"} ||= $vcd->{code}++;
+        fh.write("$version Generated by verilator_gantt $end\n")
-            $vcd->{values}{$start}{$mcode} = $cpu;
+        fh.write("$timescale 1ns $end\n")
-            $vcd->{values}{$end}{$mcode} = undef;
+        fh.write("\n")
        }
    }
    {
        my $pcode = ($vcd->{sigs}{Stats}{"parallelism"} ||= $vcd->{code}++);
        my $value = 0;
        foreach my $time (sort {$a<=>$b} keys %parallelism) {
            $value += $parallelism{$time};
            $vcd->{values}{$time}{$pcode} = $value;
        }
    }
-    $fh->print('$version Generated by verilator_gantt $end'."\n");
+        all_codes = {}
-    $fh->print('$timescale 1ns $end'."\n");
+        fh.write(" $scope module gantt $end\n")
-    $fh->print("\n");
+        for module in sorted(vcd['sigs'].keys()):
            fh.write("  $scope module %s $end\n" % module)
            for sig in sorted(vcd['sigs'][module].keys()):
                code = vcd['sigs'][module][sig]
                fh.write("   $var wire 32 v%x %s [31:0] $end\n" % (code, sig))
                all_codes[code] = 1
            fh.write("  $upscope $end\n")
        fh.write(" $upscope $end\n")
        fh.write("$enddefinitions $end\n")
        fh.write("\n")
-    my %all_codes;
+        first = True
-    $fh->print(' $scope module gantt $end'."\n");
+        for time in sorted(vcd['values']):
-    foreach my $module (sort keys %{$vcd->{sigs}}) {
+            if first:
-        $fh->printf('  $scope module %s $end'."\n", $module);
+                first = False
-        foreach my $sig (sort keys %{$vcd->{sigs}{$module}}) {
+                # Start with Z for any signals without time zero data
-            my $code = $vcd->{sigs}{$module}{$sig};
+                for code in sorted(all_codes.keys()):
-            $fh->printf('   $var wire 32 v%x %s [31:0] $end'."\n",
+                    if code not in vcd['values'][time]:
-                        $code, $sig);
+                        vcd['values'][time][code] = None
-            $all_codes{$code} = 1;
+            fh.write("#%d\n" % time)
-        }
+            for code in sorted(vcd['values'][time].keys()):
-        $fh->print('  $upscope $end'."\n");
+                value = vcd['values'][time][code]
-    }
+                if value is None:
-    $fh->print(' $upscope $end'."\n");
+                    fh.write("bz v%x\n" % code)
-    $fh->print('$enddefinitions $end'."\n");
+                else:
-    $fh->print("\n");
+                    fh.write("b%s v%x\n" % (format(value, 'b'), code))
    my $first = 1;
    foreach my $time (sort {$a <=> $b} keys %{$vcd->{values}}) {
        if ($first) {
            $first = 0;
            # Start with Z for any signals without time zero data
            foreach my $code (keys %all_codes) {
                if (!defined $vcd->{values}{$time}{$code}) {
                    $vcd->{values}{$time}{$code} = undef;
                }
            }
        }
        $fh->printf("#%d\n", $time);
        foreach my $code (sort keys %{$vcd->{values}{$time}}) {
            my $value = $vcd->{values}{$time}{$code};
            if (defined $value) {
                $fh->printf("b%b v%x\n", $value, $code);
            } else {
                $fh->printf("bz v%x\n", $code);
            }
        }
    }
 }
-#######################################################################
+######################################################################
 # Similar to Statistics::Basic functions, but avoid a package dependency
-sub max {
+parser = argparse.ArgumentParser(
-    my $n = $_[0]; shift;
+    allow_abbrev=False,
-    while (defined $_[0]) {
+    formatter_class=argparse.RawDescriptionHelpFormatter,
-        $n = $_[0] if !defined $n || $_[0] > $n;
+    description="""Create Gantt chart of multi-threaded execution""",
-        shift;
+    epilog=
-    }
+    """Verilator_gantt creates a visual representation to help analyze Verilator
-    return $n;
+#xmultithreaded simulation performance, by showing when each macro-task
-}
+#xstarts and ends, and showing when each thread is busy or idle.
 sub mean {
    my $arrayref = shift;
    my $n = 0;
    my $sum = 0;
    foreach my $v (@$arrayref) {
        $sum += $v;
        $n++;
    }
    return undef if !$n;
    return $sum/$n;
 }
 sub stddev {
    my $arrayref = shift;
    my $n = 0;
    my $sum = 0;
    my $sumsq = 0;
    foreach my $v (@$arrayref) {
        $sum += $v;
        $sumsq += $v**2;
        $n++;
    }
    return undef if !$n;
    return sqrt(($sumsq/$n) - ($sum/$n)**2);
 }
 #######################################################################
 __END__
 =pod
 =head1 NAME
 verilator_gantt - Create Gantt chart of multi-threaded execution
 =head1 SYNOPSIS
 Verilator_gantt creates a visual representation to help analyze Verilator
 multithreaded simulation performance, by showing when each macro-task
 starts and ends, and showing when each thread is busy or idle.
 For documentation see
-L<https://verilator.org/guide/latest/exe_verilator_gantt.html>.
+https://verilator.org/guide/latest/exe_verilator_gantt.html
 =head1 ARGUMENT SUMMARY
    <filename>    Filename to read data from, default "profile_threads.dat".
    --help        Displays this message and program version and exits.
    --scale I<n>  Number of characters per time step.
    --no-vcd      Do not create a VCD file.
    --vcd <filename>   Set output filename for vcd dump, default "verilator_gantt.vcd."
 =head1 DISTRIBUTION
 The latest version is available from L<https://verilator.org>.
 Copyright 2018-2021 by Wilson Snyder. This program is free software; you
 can redistribute it and/or modify it under the terms of either the GNU
 Lesser General Public License Version 3 or the Perl Artistic License
 Version 2.0.
-SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0""")
-=head1 SEE ALSO
+parser.add_argument('--debug', action='store_true', help='enable debug')
 parser.add_argument('--scale',
                    help='number of time units per character in graph',
                    type=int,
                    default=0)
 parser.add_argument('--no-vcd',
                    help='disable creating vcd',
                    action='store_true')
 parser.add_argument('--vcd',
                    help='filename for vcd outpue',
                    default='profile_threads.vcd')
 parser.add_argument('filename',
                    help='input profile_threads.dat filename to process',
                    default='profile_threads.dat')
-C<verilator>
+Args = parser.parse_args()
-and L<https://verilator.org/guide/latest/exe_verilator_gantt.html> for
+process(Args.filename)
-detailed documentation.
+if not Args.no_vcd:
-
+    write_vcd(Args.vcd)
 =cut
 ######################################################################
-### Local Variables:
+# Local Variables:
-### compile-command: "$V4/bin/verilator_gantt $V4/test_regress/obj_vltmt/t_gantt/vlt_sim.log"
+# compile-command: "./verilator_gantt ../test_regress/t/t_gantt_io.dat"
-### End:
+# End:
--- a/docs/guide/conf.py
+++ b/docs/guide/conf.py
@ -1,4 +1,4 @@
-# pylint: disable=C0103,C0114,C0116,E0402,W0622
+# pylint: disable=C0103,C0114,C0116,C0301,E0402,W0622
 #
 # Configuration file for Verilator's Sphinx documentation builder.
 # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
--- a/test_regress/t/t_gantt_io.out
+++ b/test_regress/t/t_gantt_io.out
@ -6,11 +6,11 @@ Argument settings:
  --threads 2
 Thread gantt graph:
-  Legend: One character width = 200 rdtsc ticks
+  Legend: One character width = 100 rdtsc ticks
  Legend: '&' = multiple mtasks in this period (character width)
-     <-16065 rdtsc total------------------------------------------------------------>
+     <-16065 rdtsc total-------------------------------------------------------------------------------------------------------------------------------------------->
-  t:   [1]                                   [1]                [1]                 [1]
+  t:      [1]                                                                         [1]                                   [1]                                     [1]
-  t:                          [xx[x[16--]        [16] [16]                 [16] [[x[[16]
+  t:                                                    [16-] [16][16-------]                [16--]     [16]                                      [16-]     [1[] [16[xxx
 Analysis:
  Total threads             = 2
@ -26,7 +26,7 @@ Analysis:
 Statistics:
  min log(p2e) = -3.332  from mtask 5 (predict 30, elapsed 840)
-  max log(p2e) = -1.764  from mtask 11 (predict 30, elapsed 175)
+  max log(p2e) = -1.764  from mtask 7 (predict 30, elapsed 175)
  mean = -2.365
  stddev = 0.562
  e ^ stddev = 1.754