diff --git a/Changes b/Changes
index 82fe61b54..f1bc12cc2 100644
--- a/Changes
+++ b/Changes
@@ -8,6 +8,28 @@ The changes in each Verilator version are described below.  The
 contributors that suggested a given feature are shown in []. Thanks!
 
 
+Verilator 4.214 2021-10-17
+==========================
+
+* Add profile-guided optmization of mtasks (#3150).
+* Verilator_gantt has removed the ASCII graphics, use the VCD output instead.
+* Verilator_gantt now shows the predicted mtask times, eval times, and additional statistics.
+* Verilator_gantt data files now include processor information, to allow later processing.
+* Support displaying x and z in $display task (#3107) (#3109). [Iru Cai]
+* Fix verilator_profcfunc profile accounting (#3115).
+* Fix display has no time units on class function (#3116). [Damien Pretet]
+* Fix removing if statement with side effect in condition (#3131). [Alexander Grobman]
+* Fix --waiver-output for multiline warnings (#2429) (#3141). [Keith Colbert]
+* Fix internal error on bad widths (#3140) (#3145). [Zhanglei Wang]
+* Fix crash on clang 12/13 (#3148). [Kouping Hsu]
+* Fix cygwin compile error due to missing -std=gnu++14 (#3149). [Sun Kim]
+* Fix $urandom_range when the range is 0 ... UINT_MAX (#3161). [Iru Cai]
+* Fix constructor-parameter argument comma-separation in C++ (#3162). [Matthew Ballance]
+* Fix missing install of vl_file_copy/vl_hier_graph (#3165). [Popolon]
+* Fix calling new with arguments in same class (#3166). [Matthew Ballance]
+* Fix false EOFNEWLINE warning when DOS carriage returns present (#3171).
+
+
 Verilator 4.212 2021-09-01
 ==========================
 
diff --git a/Makefile.in b/Makefile.in
index 7b09873c8..c381eda44 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -118,23 +118,6 @@ INFOS = verilator.html verilator.pdf
 
 INFOS_OLD = README README.html README.pdf
 
-INST_PROJ_FILES = \
-	bin/verilator \
-	bin/verilator_ccache_report \
-	bin/verilator_coverage \
-	bin/verilator_gantt \
-	bin/verilator_includer \
-	bin/verilator_profcfunc \
-	include/verilated.mk \
-	include/*.[chv]* \
-	include/gtkwave/*.[chv]* \
-	include/vltstd/*.[chv]* \
-
-INST_PROJ_BIN_FILES = \
-	bin/verilator_bin$(EXEEXT) \
-	bin/verilator_bin_dbg$(EXEEXT) \
-	bin/verilator_coverage_bin_dbg$(EXEEXT) \
-
 EXAMPLES_FIRST = \
 	examples/make_hello_c \
 	examples/make_hello_sc \
@@ -142,7 +125,7 @@ EXAMPLES_FIRST = \
 EXAMPLES = $(EXAMPLES_FIRST) $(filter-out $(EXAMPLES_FIRST), $(sort $(wildcard examples/*)))
 
 # See uninstall also - don't put wildcards in this variable, it might uninstall other stuff
-VL_INST_MAN_FILES = verilator.1 verilator_coverage.1 verilator_gantt.1 verilator_profcfunc.1
+VL_INST_MAN_FILES = verilator.1 verilator_coverage.1
 
 default: all
 all: all_nomsg msg_test
@@ -224,8 +207,10 @@ VL_INST_INC_SRCDIR_FILES = \
 	include/vltstd/*.[chv]* \
 
 VL_INST_DATA_SRCDIR_FILES = \
-	examples/*/*.[chv]*  examples/*/Makefile* \
-	examples/*/CMakeLists.txt
+	examples/*/*.[chv]* \
+	examples/*/CMakeLists.txt \
+	examples/*/Makefile* \
+	examples/*/vl_* \
 
 installbin:
 	$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
@@ -369,6 +354,9 @@ clang-format:
 
 PY_PROGRAMS = \
 	bin/verilator_ccache_report \
+	bin/verilator_difftree \
+	bin/verilator_gantt \
+	bin/verilator_profcfunc \
 	examples/xml_py/vl_file_copy \
 	examples/xml_py/vl_hier_graph \
 	docs/guide/conf.py \
diff --git a/bin/verilator b/bin/verilator
index 0a9ceaf71..fbd06550a 100755
--- a/bin/verilator
+++ b/bin/verilator
@@ -34,11 +34,11 @@ my $opt_quiet_exit;
 
 # No arguments can't do anything useful.  Give help
 if ($#ARGV < 0) {
-    pod2usage(-exitstatus=>2, -verbose=>0);
+    pod2usage(-exitstatus => 2, -verbose => 0);
 }
 
 # Insert debugging options up front
-push @ARGV, (split ' ',$ENV{VERILATOR_TEST_FLAGS}||"");
+push @ARGV, (split ' ', $ENV{VERILATOR_TEST_FLAGS} || "");
 
 # We sneak a look at the flags so we can do some pre-environment checks
 # All flags will hit verilator...
@@ -46,7 +46,7 @@ foreach my $sw (@ARGV) {
     push @Opt_Verilator_Sw, $sw;
 }
 
-Getopt::Long::config("no_auto_abbrev","pass_through");
+Getopt::Long::config("no_auto_abbrev", "pass_through");
 if (! GetOptions(
           # Major operating modes
           "help"        => \&usage,
@@ -60,7 +60,7 @@ if (! GetOptions(
           # Additional parameters
           "<>"          => sub {},      # Ignored
     )) {
-    pod2usage(-exitstatus=>2, -verbose=>0);
+    pod2usage(-exitstatus => 2, -verbose => 0);
 }
 
 if ($opt_gdbbt && !gdb_works()) {
@@ -73,12 +73,12 @@ if ($opt_gdbbt && !gdb_works()) {
 # Starting with that, escape all special chars for the shell;
 # The shell will undo the escapes and the verilator binary should
 # then see exactly the contents of @Opt_Verilator_Sw.
-my @quoted_sw = map {sh_escape($_)} @Opt_Verilator_Sw;
+my @quoted_sw = map { sh_escape($_) } @Opt_Verilator_Sw;
 if ($opt_gdb) {
     # Generic GDB interactive
     run (aslr_off()
-         .($ENV{VERILATOR_GDB}||"gdb")
-         ." ".verilator_bin()
+         . ($ENV{VERILATOR_GDB} || "gdb")
+         . " " . verilator_bin()
          # Note, uncomment to set breakpoints before running:
          #  ." -ex 'break main'"
 
@@ -87,42 +87,41 @@ if ($opt_gdb) {
          # escapes as you would expect in a double-quoted string.
          # That's not true for a single-quoted string, where \'
          # actually terminates the string -- not what we want!
-         ." -ex \"run ".join(' ', @quoted_sw)."\""
-         ." -ex 'set width 0'"
-         ." -ex 'bt'");
+         . " -ex \"run " . join(' ', @quoted_sw) . "\""
+         . " -ex 'set width 0'"
+         . " -ex 'bt'");
 } elsif ($opt_rr) {
     # Record with rr
     run (aslr_off()
-         ."rr record ".verilator_bin()
-         ." ".join(' ', @quoted_sw));
+         . "rr record " . verilator_bin()
+         . " " . join(' ', @quoted_sw));
 } elsif ($opt_gdbbt && $Debug) {
     # Run under GDB to get gdbbt
     run (aslr_off()
-         ."gdb"
-         ." ".verilator_bin()
-         ." --batch --quiet --return-child-result"
-         ." -ex \"run ".join(' ', @quoted_sw)."\""
-         ." -ex 'set width 0'"
-         ." -ex 'bt' -ex 'quit'");
+         . "gdb"
+         . " " . verilator_bin()
+         . " --batch --quiet --return-child-result"
+         . " -ex \"run " . join(' ', @quoted_sw)."\""
+         . " -ex 'set width 0'"
+         . " -ex 'bt' -ex 'quit'");
 } elsif ($Debug) {
     # Debug
-    run(aslr_off()
-        .verilator_bin()." ".join(' ',@quoted_sw));
+    run(aslr_off() . verilator_bin() . " " . join(' ', @quoted_sw));
 } else {
     # Normal, non gdb
-    run(verilator_bin()." ".join(' ',@quoted_sw));
+    run(verilator_bin() . " " . join(' ', @quoted_sw));
 }
 
 #----------------------------------------------------------------------
 
 sub usage {
-    pod2usage(-verbose=>2, -exitval=>0, -output=>\*STDOUT);
+    pod2usage(-verbose => 2, -exitval => 0, -output => \*STDOUT);
 }
 
 sub debug {
     shift;
     my $level = shift;
-    $Debug = $level||3;
+    $Debug = $level || 3;
 }
 
 #######################################################################
@@ -163,13 +162,13 @@ sub verilator_bin {
 sub gdb_works {
     $! = undef;  # Cleanup -x
     system("gdb /bin/echo"
-           ." --batch-silent --quiet --return-child-result"
-           ." -ex 'run -n'"  # `echo -n`
-           ." -ex 'set width 0'"
-           ." -ex 'bt'"
-           ." -ex 'quit'");
+           . " --batch-silent --quiet --return-child-result"
+           . " -ex 'run -n'"  # `echo -n`
+           . " -ex 'set width 0'"
+           . " -ex 'bt'"
+           . " -ex 'quit'");
     my $status = $?;
-    return $status==0;
+    return $status == 0;
 }
 
 sub aslr_off {
@@ -185,7 +184,7 @@ sub run {
     # Run command, check errors
     my $command = shift;
     $! = undef;  # Cleanup -x
-    print "\t$command\n" if $Debug>=3;
+    print "\t$command\n" if $Debug >= 3;
     system($command);
     my $status = $?;
     if ($status) {
@@ -193,7 +192,7 @@ sub run {
             warn "%Error: verilator: Misinstalled, or VERILATOR_ROOT might need to be in environment\n";
         }
         if ($Debug) {  # For easy rerunning
-            warn "%Error: export VERILATOR_ROOT=".($ENV{VERILATOR_ROOT}||"")."\n";
+            warn "%Error: export VERILATOR_ROOT=" . ($ENV{VERILATOR_ROOT} || "") . "\n";
             warn "%Error: $command\n";
         }
         if ($status & 127) {
@@ -201,13 +200,13 @@ sub run {
                 || ($status & 127) == 8  # SIGFPA
                 || ($status & 127) == 11) {  # SIGSEGV
                 warn "%Error: Verilator internal fault, sorry. "
-                    ."Suggest trying --debug --gdbbt\n" if !$Debug;
+                    . "Suggest trying --debug --gdbbt\n" if !$Debug;
             } elsif (($status & 127) == 6) {  # SIGABRT
                 warn "%Error: Verilator aborted. "
-                    ."Suggest trying --debug --gdbbt\n" if !$Debug;
+                    . "Suggest trying --debug --gdbbt\n" if !$Debug;
             } else {
                 warn "%Error: Verilator threw signal $status. "
-                    ."Suggest trying --debug --gdbbt\n" if !$Debug;
+                    . "Suggest trying --debug --gdbbt\n" if !$Debug;
             }
         }
         if (!$opt_quiet_exit && ($status != 256 || $Debug)) {  # i.e. not normal exit(1)
@@ -448,6 +447,7 @@ description of these arguments.
      +verilator+prof+threads+file+<filename>  Set profile filename
      +verilator+prof+threads+start+<value>    Set profile starting point
      +verilator+prof+threads+window+<value>   Set profile duration
+     +verilator+prof+vlt+file+<filename>      Set profile guided filename
      +verilator+rand+reset+<value>     Set random reset technique
      +verilator+seed+<value>           Set random seed
      +verilator+V                      Verbose version and config
diff --git a/bin/verilator_coverage b/bin/verilator_coverage
index ab76d15ba..0ec1f1bac 100755
--- a/bin/verilator_coverage
+++ b/bin/verilator_coverage
@@ -30,7 +30,7 @@ $Debug = 0;
 
 # No arguments can't do anything useful.  Give help
 if ($#ARGV < 0) {
-    pod2usage(-exitstatus=>2, -verbose=>0);
+    pod2usage(-exitstatus => 2, -verbose => 0);
 }
 
 # We sneak a look at the flags so we can do some pre-environment checks
@@ -40,7 +40,7 @@ foreach my $sw (@ARGV) {
     push @Opt_Verilator_Sw, $sw;
 }
 
-Getopt::Long::config("no_auto_abbrev","pass_through");
+Getopt::Long::config("no_auto_abbrev", "pass_through");
 if (! GetOptions (
           # Major operating modes
           "help"        => \&usage,
@@ -49,23 +49,23 @@ if (! GetOptions (
           # Additional parameters
           "<>"          => sub {},      # Ignored
     )) {
-    pod2usage(-exitstatus=>2, -verbose=>0);
+    pod2usage(-exitstatus => 2, -verbose => 0);
 }
 
 # Normal, non gdb
 run(verilator_coverage_bin()
-    ." ".join(' ',@Opt_Verilator_Sw));
+    . " " . join(' ', @Opt_Verilator_Sw));
 
 #----------------------------------------------------------------------
 
 sub usage {
-    pod2usage(-verbose=>2, -exitval=>0, -output=>\*STDOUT);
+    pod2usage(-verbose => 2, -exitval => 0, -output => \*STDOUT);
 }
 
 sub debug {
     shift;
     my $level = shift;
-    $Debug = $level||3;
+    $Debug = $level || 3;
 }
 
 #######################################################################
@@ -107,7 +107,7 @@ sub run {
     # Run command, check errors
     my $command = shift;
     $! = undef;  # Cleanup -x
-    print "\t$command\n" if $Debug>=3;
+    print "\t$command\n" if $Debug >= 3;
     system($command);
     my $status = $?;
     if ($status) {
@@ -115,11 +115,11 @@ sub run {
             warn "%Error: verilator_coverage: Misinstalled, or VERILATOR_ROOT might need to be in environment\n";
         }
         if ($Debug) {  # For easy rerunning
-            warn "%Error: export VERILATOR_ROOT=".($ENV{VERILATOR_ROOT}||"")."\n";
+            warn "%Error: export VERILATOR_ROOT=" . ($ENV{VERILATOR_ROOT} || "") . "\n";
             warn "%Error: $command\n";
         }
         if ($status & 127) {
-            if (($status & 127) == 8 || ($status & 127) == 11) { # SIGFPA or SIGSEGV
+            if (($status & 127) == 8 || ($status & 127) == 11) {  # SIGFPA or SIGSEGV
                 warn "%Error: Verilator_coverage internal fault, sorry.\n" if !$Debug;
             } elsif (($status & 127) == 6) {  # SIGABRT
                 warn "%Error: Verilator_coverage aborted.\n" if !$Debug;
diff --git a/bin/verilator_difftree b/bin/verilator_difftree
index 52b03d6bb..ab612bc5c 100755
--- a/bin/verilator_difftree
+++ b/bin/verilator_difftree
@@ -1,233 +1,139 @@
-#!/usr/bin/env perl
-# See copyright, etc in below POD section.
+#!/usr/bin/env python3
+# pylint: disable=C0103,C0114,C0116
 ######################################################################
 
-use warnings;
-use Getopt::Long;
-use IO::File;
-use Pod::Usage;
-use strict;
-use vars qw($Debug);
+import argparse
+import collections
+import glob
+import os.path
+import re
+import sys
 
-#======================================================================
-# main
 
-$Debug = 0;
-my $Opt_A;
-my $Opt_B;
-my $Opt_Lineno = 1;
-autoflush STDOUT 1;
-autoflush STDERR 1;
-Getopt::Long::config("no_auto_abbrev");
-if (! GetOptions(
-          "help"        => \&usage,
-          "debug"       => \&debug,
-          "<>"          => \&parameter,
-          "lineno!"     => \$Opt_Lineno,
-    )) {
-    die "%Error: Bad usage, try 'verilator_difftree --help'\n";
-}
+def diff(a, b):
 
-defined $Opt_A or die "%Error: No old diff filename\n";
-defined $Opt_B or die "%Error: No new diff filename\n";
+    if not os.path.exists(a):
+        sys.exit("%Error: No old diff filename found: " + a)
+    if not os.path.exists(b):
+        sys.exit("%Error: No new diff filename found: " + b)
 
--e $Opt_A or die "%Error: No old diff filename found: $Opt_A\n";
--e $Opt_B or die "%Error: No new diff filename found: $Opt_B\n";
+    if os.path.isdir(a) and os.path.isdir(b):
+        diff_dir(a, b)
+    elif os.path.isfile(a) and os.path.isfile(b):
+        diff_file(a, b)
+    else:
+        sys.exit("%Error: Mix of files and dirs")
 
-if (-d $Opt_A && -d $Opt_B) {
-    diff_dir($Opt_A, $Opt_B);
-} elsif (-f $Opt_A && -f $Opt_B) {
-    diff_file($Opt_A, $Opt_B);
-} else {
-    die "%Error: Mix of files and dirs\n";
-}
 
-sub diff_dir {
-    my $a = shift;
-    my $b = shift;
+def diff_dir(a, b):
     # Diff all files under two directories
-    my %files;
+    files = collections.defaultdict(lambda: {})
 
-    foreach my $fn (glob("$a/*.tree")) {
-        (my $base = $fn) =~ s!.*/!!;
-        $files{$base}{a} = $fn;
-    }
-    foreach my $fn (glob("$b/*.tree")) {
-        (my $base = $fn) =~ s!.*/!!;
-        $files{$base}{b} = $fn;
-    }
-    my $any;
-    foreach my $base (sort (keys %files)) {
-        my $a = $files{$base}{a};
-        my $b = $files{$base}{b};
-        next if !$a || !$b;
-        print "="x70,"\n";
-        print "= $a <-> $b\n";
-        diff_file($a,$b);
-        $any = 1;
-    }
-    $any or warn("%Warning: No .tree files found that have similar base names:\n    "
-                 .join("\n    ", sort keys %files),"\n");
-}
+    for fn in glob.glob(a + "/*.tree"):
+        base = re.sub(r'.*/', '', fn)
+        files[base]['a'] = fn
+    for fn in glob.glob(b + "/*.tree"):
+        base = re.sub(r'.*/', '', fn)
+        files[base]['b'] = fn
 
-sub diff_file {
-    my $a = shift;
-    my $b = shift;
+    anyfile = False
+    for base in sorted(files.keys()):
+        a = files[base]['a']
+        b = files[base]['b']
+        if not a or not b:
+            continue
+        print("=" * 70)
+        print("= %s <-> %s" % (a, b))
+        diff_file(a, b)
+        anyfile = True
+    if not anyfile:
+        sys.stderr.write(
+            "%Warning: No .tree files found that have similar base names\n")
+
+
+def diff_file(a, b):
     # Compare the two tree files
-    (my $short_a = $a) =~ s/[^a-zA-Z0-9.]+/_/g;
-    (my $short_b = $b) =~ s/[^a-zA-Z0-9.]+/_/g;
-    my $tmp_a = "/tmp/${$}_${short_a}.a";
-    my $tmp_b = "/tmp/${$}_${short_b}.b";
+    short_a = re.sub(r'[^a-zA-Z0-9.]+', '_', a)
+    short_b = re.sub(r'[^a-zA-Z0-9.]+', '_', b)
+    tmp_a = "/tmp/%s_%s.a" % (os.getpid(), short_a)
+    tmp_b = "/tmp/%s_%s.b" % (os.getpid(), short_b)
 
-    my $vera = version_from($a);
-    my $verb = version_from($b);
-    my $verCvt = (($vera < 0x3900 && $verb >= 0x3900)
-                  || ($vera >= 0x3900 && $verb < 0x3900));
+    # Version conversion deprecated, but for future...
+    # vera = version_from(a)
+    # verb = version_from(b)
+    # verCvt = ((vera < 0x3900 and verb >= 0x3900)
+    #            or (vera >= 0x3900 and verb < 0x3900))
 
-    filter($a, $tmp_a, $verCvt);
-    filter($b, $tmp_b, $verCvt);
-    system("diff -u $tmp_a $tmp_b");
-    unlink $tmp_a;
-    unlink $tmp_b;
-}
+    filterf(a, tmp_a)
+    filterf(b, tmp_b)
+    os.system("diff -u " + tmp_a + " " + tmp_b)
+    os.unlink(tmp_a)
+    os.unlink(tmp_b)
 
-sub version_from {
-    my $fn = shift;
+
+def version_from(filename):
     # Return dump format
-    my $f1 = IO::File->new ($fn) or die "%Error: $! $fn,";
-    while (defined (my $line=$f1->getline())) {
-        last if $. > 10;
-        return hex $1 if $line =~ /\(format (0x[0-9.]+)\)/;
-    }
-    return 1.0;
-}
+    with open(filename) as fh:
+        lineno = 0
+        for line in fh:
+            if lineno > 10:
+                break
+            match = re.search(r'format (0x[0-9.]+)', line)
+            if match:
+                return hex(match.group(1))
+    return 1.0
 
-sub filter {
-    my $fn1 = shift;
-    my $fn2 = shift;
-    my $verCvt = shift;
+
+def filterf(fn1, fn2):
     # Remove hex numbers before diffing
-    my $f1 = IO::File->new ($fn1) or die "%Error: $! $fn1,";
-    my $f2 = IO::File->new ($fn2,"w") or die "%Error: $! $fn2,";
-    while (defined (my $line=$f1->getline())) {
-      same_line:
-        next if $line =~ / This=/;
-        $line =~ s/0x[a-f0-9]+/0x/g;
-        $line =~ s/<e[0-9]+\#?>/<e>/g;
-        $line =~ s/{[a-z]*\d+}/{}/g if !$Opt_Lineno;
-        if ($verCvt) {
-            next if $line =~ /^     NETLIST/;
-            if ($line =~ /: ([A-Z]+) /) {
-                my $type = $1;
-                next if $type =~ 'DTYPE';
-                if ($type eq 'TYPETABLE' || $type eq 'RANGE') {
-                    $line =~ /^(\s+\S+:) /; my $prefix = $1;
-                    while (defined ($line=$f1->getline())) {
-                        next if $line =~ /^\s+[a-z]/;  # Table body
-                        next if $line =~ /^${prefix}[0-9]:/;
-                        goto same_line;
-                    }
-                    next;
-                }
-            }
-        }
-        print $f2 $line;
-    }
-    $f1->close;
-    $f2->close;
-}
+    with open(fn1) as fh1:
+        with open(fn2, "w") as fh2:
+            for line in fh1:
+                if re.search(r' This=', line):
+                    continue
+                line = re.sub(r'0x[a-f0-9]+', '0x', line)
+                line = re.sub(r'<e[0-9]+\#?>', '<e>', line)
+                if not Args.no_lineno:
+                    line = re.sub(r'{[a-z]*\d+}', '{}', line)
+                fh2.write(line)
 
-#----------------------------------------------------------------------
 
-sub usage {
-    pod2usage(-verbose=>2, -exitval=>0, -output=>\*STDOUT);
-    exit(1);  # Unreachable
-}
+######################################################################
+######################################################################
 
-sub debug {
-    $Debug = 1;
-}
-
-sub parameter {
-    my $param = shift;
-    if (!defined $Opt_A) {
-        $Opt_A = $param;
-    } elsif (!defined $Opt_B) {
-        $Opt_B = $param;
-    } else {
-        die "%Error: Unknown parameter: $param\n";
-    }
-}
-
-#######################################################################
-
-sub run {
-    # Run a system command, check errors
-    my $command = shift;
-    print "\t$command\n";
-    system "$command";
-    my $status = $?;
-    ($status == 0) or die "%Error: Command Failed $command, $status, stopped";
-}
-
-#######################################################################
-__END__
-
-=pod
-
-=head1 NAME
-
-verilator_difftree - Compare two Verilator debugging trees
-
-=head1 SYNOPSIS
-
-  verilator_difftree .../a/a.tree  .../b/a.tree
-  verilator_difftree .../a         .../b
-
-=head1 DESCRIPTION
-
-Verilator_difftree is used for debugging Verilator tree output files.  It
-performs a diff between two files, or all files common between two
+parser = argparse.ArgumentParser(
+    allow_abbrev=False,
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    description="""Compare two Verilator debugging trees""",
+    epilog=
+    """Verilator_difftree is used for debugging Verilator tree output files.
+It performs a diff between two files, or all files common between two
 directories, ignoring irrelevant pointer differences.
 
-=head1 ARGUMENTS
-
-=over 4
-
-=item --help
-
-Displays this message and program version and exits.
-
-=item --nolineno
-
-Do not show differences in line numbering.
-
-=back
-
-=head1 DISTRIBUTION
-
-The latest version is available from L<https://verilator.org>.
+For documentation see
+https://verilator.org/guide/latest/exe_verilator_difftree.html
 
 Copyright 2005-2021 by Wilson Snyder. This program is free software; you
 can redistribute it and/or modify it under the terms of either the GNU
 Lesser General Public License Version 3 or the Perl Artistic License
 Version 2.0.
 
-SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0""")
 
-=head1 AUTHORS
+parser.add_argument('--debug',
+                    action='store_const',
+                    const=9,
+                    help='enable debug')
+parser.add_argument('--no-lineno',
+                    action='store_false',
+                    help='do not show differences in line numbering')
+parser.add_argument('filea', help='input file a to diff')
+parser.add_argument('fileb', help='input file b to diff')
 
-Wilson Snyder <wsnyder@wsnyder.org>
-
-=head1 SEE ALSO
-
-C<verilator>
-
-and L<https://verilator.org/verilator_doc.html> for detailed documentation.
-
-=cut
+Args = parser.parse_args()
+diff(Args.filea, Args.fileb)
 
 ######################################################################
-### Local Variables:
-### compile-command: "$V4/bin/verilator_difftree  {$V4D,$V4}/test_regress/obj_dir/t_EXAMPLE/V*_03_*.tree"
-### End:
+# Local Variables:
+# compile-command: "./verilator_difftree ../test_regress/t/t_difftree.{a,b}.tree"
+# End:
diff --git a/bin/verilator_gantt b/bin/verilator_gantt
index 8d043670d..17d90e8e4 100755
--- a/bin/verilator_gantt
+++ b/bin/verilator_gantt
@@ -1,171 +1,162 @@
-#!/usr/bin/env perl
-# See copyright, etc in below POD section.
+#!/usr/bin/env python3
+# pylint: disable=C0103,C0114,C0116,C0301,R0914,R0912,R0915,W0511,eval-used
 ######################################################################
 
-use warnings;
-use strict;
-use Getopt::Long;
-use IO::File;
-use Pod::Usage;
-use vars qw($Debug);
+import argparse
+import collections
+import math
+import re
+import statistics
+# from pprint import pprint
 
-$Debug = 0;
-my $Opt_File;
-my $Opt_Time_Per_Char = 0;  # rdtsc ticks per char in gantt chart, 0=auto
-my $opt_vcd = "profile_threads.vcd";
-
-our %Threads;
-our %Mtasks;
-our %Global;
-
-autoflush STDOUT 1;
-autoflush STDERR 1;
-Getopt::Long::config("no_auto_abbrev");
-if (! GetOptions(
-          "help"        => \&usage,
-          "scale=i"     => \$Opt_Time_Per_Char,
-          "debug"       => sub { $Debug = 1; },
-          "vcd=s"       => \$opt_vcd,
-          "no-vcd!"     => sub { $opt_vcd = undef; },
-          "<>"          => \&parameter,
-    )) {
-    die "%Error: Bad usage, try 'verilator_gantt --help'\n";
+Threads = collections.defaultdict(lambda: {})
+Mtasks = collections.defaultdict(lambda: {})
+Evals = collections.defaultdict(lambda: {})
+EvalLoops = collections.defaultdict(lambda: {})
+Global = {
+    'args': {},
+    'cpuinfo': collections.defaultdict(lambda: {}),
+    'rdtsc_cycle_time': 0,
+    'stats': {}
 }
 
-$Opt_File = "profile_threads.dat" if !defined $Opt_File;
+######################################################################
 
-process($Opt_File);
-write_vcd($opt_vcd) if defined $opt_vcd;
-exit(0);
 
-#######################################################################
+def process(filename):
+    read_data(filename)
+    report()
 
-sub usage {
-    pod2usage(-verbose=>2, -exitval=>0, -output=>\*STDOUT);
-    exit(1);  # Unreachable
-}
 
-sub parameter {
-    my $param = shift;
-    if (!defined $Opt_File) {
-        $Opt_File = $param;
-    } else {
-        die "%Error: Unknown parameter: $param\n";
-    }
-}
+def read_data(filename):
+    with open(filename) as fh:
+        re_prof = re.compile(
+            r'^VLPROF mtask\s(\d+)\sstart\s(\d+)\selapsed\s(\d+)\spredict_start\s(\d+)\spredict_cost\s(\d+)\scpu\s(\d+)\son thread (\d+)'
+        )
+        re_eval = re.compile(r'^VLPROF eval\sstart\s(\d+)\selapsed\s(\d+)')
+        re_loop = re.compile(
+            r'^VLPROF eval_loop\sstart\s(\d+)\selapsed\s(\d+)')
+        re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.]*)\s*')
+        re_arg2 = re.compile(r'VLPROF arg\s+(\S+)\s+([0-9.]*)\s*$')
+        re_stat = re.compile(r'VLPROF stat\s+(\S+)\s+([0-9.]+)')
+        re_time = re.compile(r'rdtsc time = (\d+) ticks')
+        re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$')
+        re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$')
+        cpu = None
 
-#######################################################################
+        for line in fh:
+            if re_prof.match(line):
+                match = re_prof.match(line)
+                mtask = int(match.group(1))
+                start = int(match.group(2))
+                elapsed_time = int(match.group(3))
+                end = start + elapsed_time
+                predict_start = int(match.group(4))
+                predict_cost = int(match.group(5))
+                cpu = int(match.group(6))
+                thread = int(match.group(7))
+                if start not in Threads[thread]:
+                    Threads[thread][start] = {}
+                Threads[thread][start]['mtask'] = mtask
+                Threads[thread][start]['end'] = end
+                Threads[thread][start]['cpu'] = cpu
+                Threads[thread][start]['predict_start'] = predict_start
+                Threads[thread][start]['predict_cost'] = predict_cost
 
-sub process {
-    my $filename = shift;
+                if 'elapsed' not in Mtasks[mtask]:
+                    Mtasks[mtask] = {'end': 0, 'elapsed': 0}
+                Mtasks[mtask]['thread'] = thread
+                Mtasks[mtask]['elapsed'] += elapsed_time
+                Mtasks[mtask]['predict_start'] = predict_start
+                Mtasks[mtask]['predict_cost'] = predict_cost
+                Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], end)
+            elif re_eval.match(line):
+                match = re_eval.match(line)
+                start = int(match.group(1))
+                elapsed_time = int(match.group(2))
+                Evals[start]['start'] = start
+                Evals[start]['end'] = start + elapsed_time
+            elif re_loop.match(line):
+                match = re_loop.match(line)
+                start = int(match.group(1))
+                elapsed_time = int(match.group(2))
+                EvalLoops[start]['start'] = start
+                EvalLoops[start]['end'] = start + elapsed_time
+            elif re.match(r'^VLPROFTHREAD', line):
+                None  # pylint: disable=pointless-statement
+            elif re_arg1.match(line):
+                match = re_arg1.match(line)
+                Global['args'][match.group(1)] = match.group(2)
+            elif re_arg2.match(line):
+                match = re_arg2.match(line)
+                Global['args'][match.group(1)] = match.group(2)
+            elif re_stat.match(line):
+                match = re_stat.match(line)
+                Global['stats'][match.group(1)] = match.group(2)
+            elif re_proc_cpu.match(line):
+                match = re_proc_cpu.match(line)
+                cpu = int(match.group(1))
+            elif cpu and re_proc_dat.match(line):
+                match = re_proc_dat.match(line)
+                term = match.group(1)
+                value = match.group(2)
+                term = re.sub(r'\s+$', '', term)
+                term = re.sub(r'\s+', '_', term)
+                value = re.sub(r'\s+$', '', value)
+                Global['cpuinfo'][cpu][term] = value
+            elif re.match(r'^#', line):
+                None  # pylint: disable=pointless-statement
+            elif Args.debug:
+                print("-Unk: %s" % line)
+            # TODO -- this is parsing text printed by a client.
+            # Really, verilator proper should generate this
+            # if it's useful...
+            if re_time.match(line):
+                Global['rdtsc_cycle_time'] = re_time.group(1)
 
-    read_data($filename);
-    read_cpuinfo();
-    report();
-}
 
-#######################################################################
+def re_match_result(regexp, line, result_to):
+    result_to = re.match(regexp, line)
+    return result_to
 
-sub read_data {
-    my $filename = shift;
 
-    %Global = (rdtsc_cycle_time => 0);
+######################################################################
 
-    my $fh = IO::File->new("<$filename") or die "%Error: $! $filename,";
-    while (my $line = $fh->getline) {
-        if ($line =~ m/VLPROF mtask\s(\d+)\sstart\s(\d+)\send\s(\d+)\selapsed\s(\d+)\spredict_time\s(\d+)\scpu\s(\d+)\son thread (\d+)/) {
-            my $mtask = $1;
-            my $start = $2;
-            my $end = $3;
-            my $elapsed_time = $4;
-            my $predict_time = $5;
-            my $cpu = $6;
-            my $thread = $7;
-            $Threads{$thread}{$start}{mtask} = $mtask;
-            $Threads{$thread}{$start}{end} = $end;
-            $Threads{$thread}{$start}{cpu} = $cpu;
 
-            if (!exists $Mtasks{$mtask}{elapsed}) {
-                $Mtasks{$mtask}{elapsed} = 0;
-            }
-            $Mtasks{$mtask}{elapsed} += $elapsed_time;
-            $Mtasks{$mtask}{predict} = $predict_time;
-            $Mtasks{$mtask}{end} = max($Mtasks{$mtask}{end}, $end);
-        }
-        elsif ($line =~ /^VLPROFTHREAD/) {}
-        elsif ($line =~ m/VLPROF arg\s+(\S+)\+([0-9.])\s*$/
-               || $line =~ m/VLPROF arg\s+(\S+)\s+([0-9.])\s*$/) {
-            $Global{args}{$1} = $2;
-        }
-        elsif ($line =~ m/VLPROF stat\s+(\S+)\s+([0-9.]+)/) {
-            $Global{stats}{$1} = $2;
-        }
-        elsif ($line =~ /^#/) {}
-        elsif ($Debug) {
-            chomp $line;
-            print "Unk: $line\n";
-        }
-        # TODO -- this is parsing text printed by a client.
-        # Really, verilator proper should generate this
-        # if it's useful...
-        if ($line =~ m/rdtsc time = (\d+) ticks/) {
-            $Global{rdtsc_cycle_time} = $1;
-        }
-    }
-}
+def report():
+    print("Verilator Gantt report")
 
-sub read_cpuinfo {
-    my $filename = "/proc/cpuinfo";
-    my $fh = IO::File->new("<$filename") or return;
-    my $cpu;
-    while (my $line = $fh->getline) {
-        chomp $line;
-        if ($line =~ m/^processor\s*:\s*(\d+)\s*$/) {
-            $cpu = $1;
-        }
-        if ($cpu && $line =~ m/^([a-z_ ]+)\s*:\s*(.*)$/) {
-            my ($term, $value) = ($1, $2);
-            $term =~ s/\s+$//;
-            $term =~ s/\s+/_/;
-            $value =~ s/\s+$//;
-            $Global{cpuinfo}{$cpu}{$term} = $value;
-        }
-    }
-}
+    print("\nArgument settings:")
+    for arg in sorted(Global['args'].keys()):
+        plus = "+" if re.match(r'^\+', arg) else " "
+        print("  %s%s%s" % (arg, plus, Global['args'][arg]))
 
-#######################################################################
-
-sub report {
-    print "Verilator Gantt report\n";
-
-    print "\nArgument settings:\n";
-    foreach my $arg (sort keys %{$Global{args}}) {
-        my $plus = ($arg =~ /^\+/) ? "+" : " ";
-        printf "  %s%s%d\n", $arg, $plus, $Global{args}{$arg};
-    }
-
-    my $nthreads = scalar keys %Threads;
-    $Global{cpus} = {};
-    foreach my $thread (keys %Threads) {
+    nthreads = len(Threads)
+    Global['cpus'] = {}
+    for thread in Threads:
         # Make potentially multiple characters per column
-        foreach my $start (keys %{$Threads{$thread}}) {
-            my $cpu = $Threads{$thread}{$start}{cpu};
-            my $elapsed = $Threads{$thread}{$start}{end} - $start;
-            $Global{cpus}{$cpu}{cpu_time} += $elapsed;
-        }
-    }
+        for start in Threads[thread]:
+            cpu = Threads[thread][start]['cpu']
+            elapsed = Threads[thread][start]['end'] - start
+            if cpu not in Global['cpus']:
+                Global['cpus'][cpu] = {'cpu_time': 0}
+            Global['cpus'][cpu]['cpu_time'] += elapsed
 
-    my $mt_mtask_time = 0;
-    my $long_mtask_time = 0;
-    my $last_end = 0;
-    foreach my $mtask (keys %Mtasks) {
-        $mt_mtask_time += $Mtasks{$mtask}{elapsed};
-        $last_end = max($last_end, $Mtasks{$mtask}{end});
-        $long_mtask_time = max($long_mtask_time, $Mtasks{$mtask}{elapsed});
-    }
-    $Global{last_end} = $last_end;
-
-    report_graph();
+    measured_mt_mtask_time = 0
+    predict_mt_mtask_time = 0
+    long_mtask_time = 0
+    measured_last_end = 0
+    predict_last_end = 0
+    for mtask in Mtasks:
+        measured_mt_mtask_time += Mtasks[mtask]['elapsed']
+        predict_mt_mtask_time += Mtasks[mtask]['predict_cost']
+        measured_last_end = max(measured_last_end, Mtasks[mtask]['end'])
+        predict_last_end = max(
+            predict_last_end,
+            Mtasks[mtask]['predict_start'] + Mtasks[mtask]['predict_cost'])
+        long_mtask_time = max(long_mtask_time, Mtasks[mtask]['elapsed'])
+    Global['measured_last_end'] = measured_last_end
+    Global['predict_last_end'] = predict_last_end
 
     # If we know cycle time in the same (rdtsc) units,
     # this will give us an actual utilization number,
@@ -175,402 +166,328 @@ sub report {
     # serial mode, to estimate the overhead of data sharing,
     # which will show up in the total elapsed time. (Overhead
     # of synchronization and scheduling should not.)
-    print "\nAnalysis:\n";
-    printf "  Total threads             = %d\n", $nthreads;
-    printf "  Total mtasks              = %d\n", scalar(keys %Mtasks);
-    my $ncpus = scalar(keys %{$Global{cpus}});
-    printf "  Total cpus used           = %d\n", $ncpus;
-    printf "  Total yields              = %d\n", $Global{stats}{yields};
-    printf "  Total eval time           = %d rdtsc ticks\n", $Global{last_end};
-    printf "  Longest mtask time        = %d rdtsc ticks\n", $long_mtask_time;
-    printf "  All-thread mtask time     = %d rdtsc ticks\n", $mt_mtask_time;
-    my $long_efficiency = $long_mtask_time/($Global{last_end} || 1);
-    printf "  Longest-thread efficiency = %0.1f%%\n", $long_efficiency*100;
-    my $mt_efficiency = $mt_mtask_time/($Global{last_end}*$nthreads || 1);
-    printf "  All-thread efficiency     = %0.1f%%\n", $mt_efficiency*100;
-    printf "  All-thread speedup        = %0.1f\n", $mt_efficiency*$nthreads;
-    if ($Global{rdtsc_cycle_time} > 0) {
-        my $ut = $mt_mtask_time / $Global{rdtsc_cycle_time};
-        print "tot_mtask_cpu=$mt_mtask_time cyc=$Global{rdtsc_cycle_time} ut=$ut\n";
-    }
+    print("\nAnalysis:")
+    print("  Total threads             = %d" % nthreads)
+    print("  Total mtasks              = %d" % len(Mtasks))
+    ncpus = len(Global['cpus'])
+    print("  Total cpus used           = %d" % ncpus)
+    print("  Total yields              = %d" % int(Global['stats']['yields']))
+    print("  Total evals               = %d" % len(Evals))
+    print("  Total eval loops          = %d" % len(EvalLoops))
+    print("  Total eval time           = %d rdtsc ticks" %
+          Global['measured_last_end'])
+    print("  Longest mtask time        = %d rdtsc ticks" % long_mtask_time)
+    print("  All-thread mtask time     = %d rdtsc ticks" %
+          measured_mt_mtask_time)
+    long_efficiency = long_mtask_time / (Global.get('measured_last_end', 1)
+                                         or 1)
+    print("  Longest-thread efficiency = %0.1f%%" % (long_efficiency * 100.0))
+    mt_efficiency = measured_mt_mtask_time / (
+        Global.get('measured_last_end', 1) * nthreads or 1)
+    print("  All-thread efficiency     = %0.1f%%" % (mt_efficiency * 100.0))
+    print("  All-thread speedup        = %0.1f" % (mt_efficiency * nthreads))
+    if Global['rdtsc_cycle_time'] > 0:
+        ut = measured_mt_mtask_time / Global['rdtsc_cycle_time']
+        print("tot_mtask_cpu=" + measured_mt_mtask_time + " cyc=" +
+              Global['rdtsc_cycle_time'] + " ut=" + ut)
 
-    my @p2e_ratios;
-    my $min_p2e = 1000000;
-    my $min_mtask;
-    my $max_p2e = -1000000;
-    my $max_mtask;
-    foreach my $mtask (sort keys %Mtasks) {
-        if ($Mtasks{$mtask}{elapsed} > 0) {
-            if ($Mtasks{$mtask}{predict} == 0) {
-                $Mtasks{$mtask}{predict} = 1;  # don't log(0) below
-            }
-            my $p2e_ratio = log( $Mtasks{$mtask}{predict} / $Mtasks{$mtask}{elapsed} );
-            #print "log(p2e $mtask) = $p2e_ratio   (predict $Mtasks{$mtask}{predict}, elapsed $Mtasks{$mtask}{elapsed})\n";
-            push @p2e_ratios, $p2e_ratio;
+    predict_mt_efficiency = predict_mt_mtask_time / (
+        Global.get('predict_last_end', 1) * nthreads or 1)
+    print("\nPrediction (what Verilator used for scheduling):")
+    print("  All-thread efficiency     = %0.1f%%" %
+          (predict_mt_efficiency * 100.0))
+    print("  All-thread speedup        = %0.1f" %
+          (predict_mt_efficiency * nthreads))
 
-            if ($p2e_ratio > $max_p2e) {
-                $max_p2e = $p2e_ratio;
-                $max_mtask = $mtask;
-            }
-            if ($p2e_ratio < $min_p2e) {
-                $min_p2e = $p2e_ratio;
-                $min_mtask = $mtask;
-            }
+    p2e_ratios = []
+    min_p2e = 1000000
+    min_mtask = None
+    max_p2e = -1000000
+    max_mtask = None
+
+    for mtask in sorted(Mtasks.keys()):
+        if Mtasks[mtask]['elapsed'] > 0:
+            if Mtasks[mtask]['predict_cost'] == 0:
+                Mtasks[mtask]['predict_cost'] = 1  # don't log(0) below
+            p2e_ratio = math.log(Mtasks[mtask]['predict_cost'] /
+                                 Mtasks[mtask]['elapsed'])
+            p2e_ratios.append(p2e_ratio)
+
+            if p2e_ratio > max_p2e:
+                max_p2e = p2e_ratio
+                max_mtask = mtask
+            if p2e_ratio < min_p2e:
+                min_p2e = p2e_ratio
+                min_mtask = mtask
+
+    print("\nStatistics:")
+    print("  min log(p2e) = %0.3f" % min_p2e, end="")
+    print("  from mtask %d (predict %d," %
+          (min_mtask, Mtasks[min_mtask]['predict_cost']),
+          end="")
+    print(" elapsed %d)" % Mtasks[min_mtask]['elapsed'])
+    print("  max log(p2e) = %0.3f" % max_p2e, end="")
+    print("  from mtask %d (predict %d," %
+          (max_mtask, Mtasks[max_mtask]['predict_cost']),
+          end="")
+    print(" elapsed %d)" % Mtasks[max_mtask]['elapsed'])
+
+    stddev = statistics.pstdev(p2e_ratios)
+    mean = statistics.mean(p2e_ratios)
+    print("  mean = %0.3f" % mean)
+    print("  stddev = %0.3f" % stddev)
+    print("  e ^ stddev = %0.3f" % math.exp(stddev))
+
+    report_cpus()
+
+    if nthreads > ncpus:
+        print()
+        print("%%Warning: There were fewer CPUs (%d) then threads (%d)." %
+              (ncpus, nthreads))
+        print("        : See docs on use of numactl.")
+    else:
+        if 'cpu_socket_cores_warning' in Global:
+            print()
+            print(
+                "%Warning: Multiple threads scheduled on same hyperthreaded core."
+            )
+            print("        : See docs on use of numactl.")
+        if 'cpu_sockets_warning' in Global:
+            print()
+            print("%Warning: Threads scheduled on multiple sockets.")
+            print("        : See docs on use of numactl.")
+    print()
+
+
+def report_cpus():
+    print("\nCPUs:")
+
+    Global['cpu_sockets'] = collections.defaultdict(lambda: 0)
+    Global['cpu_socket_cores'] = collections.defaultdict(lambda: 0)
+
+    for cpu in sorted(Global['cpus'].keys()):
+        print("  cpu %d: " % cpu, end='')
+        print("cpu_time=%d" % Global['cpus'][cpu]['cpu_time'], end='')
+
+        socket = None
+        if cpu in Global['cpuinfo']:
+            cpuinfo = Global['cpuinfo'][cpu]
+            if 'physical_id' in cpuinfo and 'core_id' in cpuinfo:
+                socket = int(cpuinfo['physical_id'])
+                Global['cpu_sockets'][socket] += 1
+                print(" socket=%d" % socket, end='')
+
+                core = int(cpuinfo['core_id'])
+                Global['cpu_socket_cores'][str(socket) + "__" + str(core)] += 1
+                print(" core=%d" % core, end='')
+
+            if 'model_name' in cpuinfo:
+                model = cpuinfo['model_name']
+                print("  %s" % model, end='')
+        print()
+
+    if len(Global['cpu_sockets']) > 1:
+        Global['cpu_sockets_warning'] = True
+        for scn in Global['cpu_socket_cores'].values():
+            if scn > 1:
+                Global['cpu_socket_cores_warning'] = True
+
+
+######################################################################
+
+
+def write_vcd(filename):
+    print("Writing %s" % filename)
+    with open(filename, "w") as fh:
+        vcd = {
+            'values':
+            collections.defaultdict(lambda: {}),  # {<time>}{<code>} = value
+            'sigs': {
+                'predicted_threads': {},
+                'measured_threads': {},
+                'cpus': {},
+                'evals': {},
+                'mtasks': {},
+                'Stats': {}
+            }  # {<module>}{<sig}} = code
         }
-    }
+        code = 0
 
-    print "\nStatistics:\n";
-    printf "  min log(p2e) = %0.3f", $min_p2e;
-    print "  from mtask $min_mtask (predict $Mtasks{$min_mtask}{predict},";
-    print " elapsed $Mtasks{$min_mtask}{elapsed})\n";
-    printf "  max log(p2e) = %0.3f", $max_p2e;
-    print "  from mtask $max_mtask (predict $Mtasks{$max_mtask}{predict},";
-    print " elapsed $Mtasks{$max_mtask}{elapsed})\n";
-
-    my $stddev = stddev(\@p2e_ratios);
-    my $mean = mean(\@p2e_ratios);
-    printf "  mean = %0.3f\n", $mean;
-    printf "  stddev = %0.3f\n", $stddev;
-    printf "  e ^ stddev = %0.3f\n", exp($stddev);
-
-    report_cpus();
-
-    if ($nthreads > $ncpus) {
-        print "\n";
-        print "%Warning: There were fewer CPUs ($ncpus) then threads ($nthreads).\n";
-        print "        : See docs on use of numactl.\n";
-    } else {
-        if ($Global{cpu_socket_cores_warning}) {
-            print "\n";
-            print "%Warning: Multiple threads scheduled on same hyperthreaded core.\n";
-            print "        : See docs on use of numactl.\n";
+        parallelism = {
+            'measured': collections.defaultdict(lambda: 0),
+            'predicted': collections.defaultdict(lambda: 0)
         }
-        if ($Global{cpu_sockets_warning}) {
-            print "\n";
-            print "%Warning: Threads scheduled on multiple sockets.\n";
-            print "        : See docs on use of numactl.\n";
-        }
-    }
-    print "\n";
-}
+        parallelism['measured'][0] = 0
+        parallelism['predicted'][0] = 0
 
-sub report_cpus {
-    print "\nCPUs:\n";
-    # Test - show all cores
-    # for (my $i=0; $i<73; ++$i) { $Global{cpus}{$i} ||= {cpu_time => 0}; }
+        # Measured graph
+        for thread in sorted(Threads.keys()):
+            sig = "thread%d_mtask" % thread
+            if sig not in vcd['sigs']['measured_threads']:
+                vcd['sigs']['measured_threads'][sig] = code
+                code += 1
+            mcode = vcd['sigs']['measured_threads'][sig]
 
-    $Global{cpu_sockets} ||= {};
-    $Global{cpu_socket_cores} ||= {};
+            for start in sorted(Threads[thread]):
+                mtask = Threads[thread][start]['mtask']
+                end = Threads[thread][start]['end']
+                cpu = Threads[thread][start]['cpu']
+                vcd['values'][start][mcode] = mtask
+                vcd['values'][end][mcode] = None
+                parallelism['measured'][start] += 1
+                parallelism['measured'][end] -= 1
 
-    foreach my $cpu (sort {$a <=> $b} keys %{$Global{cpus}}) {
-        printf "  cpu %d: ", $cpu;
-        printf "cpu_time=%d", $Global{cpus}{$cpu}{cpu_time};
+                sig = "cpu%d_thread" % cpu
+                if sig not in vcd['sigs']['cpus']:
+                    vcd['sigs']['cpus'][sig] = code
+                    code += 1
+                ccode = vcd['sigs']['cpus'][sig]
+                vcd['values'][start][ccode] = thread
+                vcd['values'][end][ccode] = None
 
-        my $socket = $Global{cpuinfo}{$cpu}{physical_id};
-        $Global{cpu_sockets}{$socket}++ if defined $socket;
-        printf " socket=%d", $socket if defined $socket;
+                sig = "mtask%d_cpu" % mtask
+                if sig not in vcd['sigs']['mtasks']:
+                    vcd['sigs']['mtasks'][sig] = code
+                    code += 1
+                ccode = vcd['sigs']['mtasks'][sig]
+                vcd['values'][start][ccode] = cpu
+                vcd['values'][end][ccode] = None
 
-        my $core = $Global{cpuinfo}{$cpu}{core_id};
-        $Global{cpu_socket_cores}{$socket."__".$core}++ if defined $socket && defined $core;
-        printf " core=%d", $core if defined $core;
+        # Eval graph
+        vcd['sigs']['evals']["eval"] = code
+        elcode = code
+        code += 1
+        n = 0
+        for eval_start in Evals:
+            eval_end = Evals[eval_start]['end']
+            n += 1
+            vcd['values'][eval_start][elcode] = n
+            vcd['values'][eval_end][elcode] = None
 
-        my $model = $Global{cpuinfo}{$cpu}{model_name};
-        printf "  %s", $model if defined $model;
-        print "\n";
-    }
+        # Eval_loop graph
+        vcd['sigs']['evals']["eval_loop"] = code
+        elcode = code
+        code += 1
+        n = 0
+        for eval_start in EvalLoops:
+            eval_end = EvalLoops[eval_start]['end']
+            n += 1
+            vcd['values'][eval_start][elcode] = n
+            vcd['values'][eval_end][elcode] = None
 
-    $Global{cpu_sockets_warning} = 1
-        if (scalar keys %{$Global{cpu_sockets}} > 1);
-    foreach my $scn (values %{$Global{cpu_socket_cores}}) {
-        $Global{cpu_socket_cores_warning} = 1 if $scn > 1;
-    }
-}
+        # Predicted graph
+        for eval_start in EvalLoops:
+            eval_end = EvalLoops[eval_start]['end']
+            # Compute scale so predicted graph is of same width as eval
+            measured_scaling = (eval_end -
+                                eval_start) / Global['predict_last_end']
+            # Predict mtasks that fill the time the eval occupied
+            for mtask in Mtasks:
+                thread = Mtasks[mtask]['thread']
+                pred_scaled_start = eval_start + int(
+                    Mtasks[mtask]['predict_start'] * measured_scaling)
+                pred_scaled_end = eval_start + int(
+                    (Mtasks[mtask]['predict_start'] +
+                     Mtasks[mtask]['predict_cost']) * measured_scaling)
+                if pred_scaled_start == pred_scaled_end:
+                    continue
 
-sub report_graph {
-    my $time_per = $Opt_Time_Per_Char;
-    if ($time_per == 0) {
-        $time_per = ($Global{last_end} / 40);  # Start with 40 columns
-        while ($time_per > 10) {
-            my ($graph, $conflicts) = _make_graph($time_per);
-            last if !$conflicts;
-            $time_per = int($time_per/2);
-        }
-        # One more step so we can fit more labels
-        $time_per = int($time_per/2);
-        $time_per ||= 1;
-    }
+                sig = "predicted_thread%d_mtask" % thread
+                if sig not in vcd['sigs']['predicted_threads']:
+                    vcd['sigs']['predicted_threads'][sig] = code
+                    code += 1
+                mcode = vcd['sigs']['predicted_threads'][sig]
 
-    my ($graph, $conflicts) = _make_graph($time_per);
+                vcd['values'][pred_scaled_start][mcode] = mtask
+                vcd['values'][pred_scaled_end][mcode] = None
 
-    print "\nThread gantt graph:\n";
-    print "  Legend: One character width = $time_per rdtsc ticks\n";
-    print "  Legend: '&' = multiple mtasks in this period (character width)\n";
+                parallelism['predicted'][pred_scaled_start] += 1
+                parallelism['predicted'][pred_scaled_end] -= 1
 
-    my $scale = "   <-".$Global{last_end}." rdtsc total";
-    for (my $col = length($scale);  # -2 for '->' below
-         $col < ($Global{last_end}/$time_per); ++$col) {
-        $scale .= "-";
-    }
-    print "  $scale->\n";
+        # Parallelism graph
+        for measpred in ('measured', 'predicted'):
+            vcd['sigs']['Stats']["%s_parallelism" % measpred] = code
+            pcode = code
+            code += 1
+            value = 0
+            for time in sorted(parallelism[measpred].keys()):
+                value += parallelism[measpred][time]
+                vcd['values'][time][pcode] = value
 
-    foreach my $thread (sort keys %{$graph}) {
-        print "  t: ";
-        _print_graph_line($graph->{$thread}, '');
-    }
-}
+        # Create output file
+        fh.write("$version Generated by verilator_gantt $end\n")
+        fh.write("$timescale 1ns $end\n")
+        fh.write("\n")
 
-sub _make_graph {
-    my $time_per = shift;
+        all_codes = {}
+        fh.write(" $scope module gantt $end\n")
+        for module in sorted(vcd['sigs'].keys()):
+            fh.write("  $scope module %s $end\n" % module)
+            for sig in sorted(vcd['sigs'][module].keys()):
+                code = vcd['sigs'][module][sig]
+                fh.write("   $var wire 32 v%x %s [31:0] $end\n" % (code, sig))
+                all_codes[code] = 1
+            fh.write("  $upscope $end\n")
+        fh.write(" $upscope $end\n")
+        fh.write("$enddefinitions $end\n")
+        fh.write("\n")
 
-    my $graph = {};  # {thread}{column}{char=>'x' or chars=>#}
-    my $conflicts = 0;
-    foreach my $thread (keys %Threads) {
-        # Make potentially multiple characters per column
-        foreach my $start (sort {$a <=> $b} keys %{$Threads{$thread}}) {
-            my $end = $Threads{$thread}{$start}{end};
-            my $mtask = $Threads{$thread}{$start}{mtask};
-            my $cpu = $Threads{$thread}{$start}{cpu};
+        first = True
+        for time in sorted(vcd['values']):
+            if first:
+                first = False
+                # Start with Z for any signals without time zero data
+                for code in sorted(all_codes.keys()):
+                    if code not in vcd['values'][time]:
+                        vcd['values'][time][code] = None
+            fh.write("#%d\n" % time)
+            for code in sorted(vcd['values'][time].keys()):
+                value = vcd['values'][time][code]
+                if value is None:
+                    fh.write("bz v%x\n" % code)
+                else:
+                    fh.write("b%s v%x\n" % (format(value, 'b'), code))
 
-            my $startcol = _time_col($time_per, $start);
-            my $endcol = _time_col($time_per, $end);
 
-            my $label = "[";
-            $label .= "$cpu";  # Maybe make optional in future
-            my $width = $endcol - $startcol + 1;
-            while (length($label) < ($width-1)) {  # -1 for ']'
-                $label .= "-";
-            }
-            $label .= "]";
-            $graph->{$thread}[$startcol]{char} .= $label;
-        }
-        if ($Debug) {
-            print "# Multicol: "; _print_graph_line($graph->{$thread}, '|');
-        }
-        # Expand line to one char per column
-        for (my $col = 0; $col <= $#{$graph->{$thread}}; ++$col) {
-            if (my $chars = $graph->{$thread}[$col]{char}) {
-                my $ok = 1;
-                for (my $coladd = 1; $coladd<length($chars); ++$coladd) {
-                    if ($graph->{$thread}[$col + $coladd]{char}) {
-                        $ok = 0; last;
-                    }
-                }
-                if (!$ok) {
-                    if ($chars =~ /\[.*\[/) {  # Two begins or more
-                        $conflicts++;
-                        $graph->{$thread}[$col]{char} = "&";
-                    } else {
-                        $graph->{$thread}[$col]{char} = "[";
-                    }
-                    for (my $coladd = 1; $coladd<length($chars); ++$coladd) {
-                        if ($graph->{$thread}[$col + $coladd]{char}) {
-                            last;
-                        } else {
-                            $graph->{$thread}[$col + $coladd]{char} = 'x';
-                        }
-                    }
-                } else {
-                    my $coladd = 0;
-                    foreach my $char (split //, $chars) {
-                        $graph->{$thread}[$col+$coladd]{char} = $char;
-                        ++$coladd;
-                    }
-                }
-            }
-        }
-        if ($Debug) {
-            print "# Singlcol: "; _print_graph_line($graph->{$thread}, '|');
-        }
-    }
-    print "# Conflicts $conflicts\n" if $Debug;
-    return ($graph, $conflicts);
-}
+######################################################################
 
-sub _print_graph_line {
-    my $graph_thread = shift;
-    my $sep = shift;
-    for (my $col = 0; $col <= $#{$graph_thread}; ++$col) {
-        my $c = $graph_thread->[$col]{char}; $c=' ' if !defined $c;
-        print $c, $sep;
-    }
-    print "\n";
-}
-
-sub _time_col {
-    my $time_per = shift;
-    my $time = shift;
-    return int($time/$time_per);
-}
-
-#######################################################################
-
-sub write_vcd {
-    my $filename = shift;
-    print "Writing $filename\n";
-    my $fh = IO::File->new(">$filename") or die "%Error: $! $filename,";
-    my $vcd = {values => {},  # {<time>}{<code>} = value
-               sigs => {},  # {<module>}{<sig}} = code
-               code => 0,
-    };
-
-    my %parallelism;
-    foreach my $thread (keys %Threads) {
-        my $mcode = ($vcd->{sigs}{threads}{"thread${thread}_mtask"} ||= $vcd->{code}++);
-        foreach my $start (sort {$a <=> $b} keys %{$Threads{$thread}}) {
-            my $end = $Threads{$thread}{$start}{end};
-            my $mtask = $Threads{$thread}{$start}{mtask};
-            my $cpu = $Threads{$thread}{$start}{cpu};
-            $vcd->{values}{$start}{$mcode} = $mtask;
-            $vcd->{values}{$end}{$mcode} = undef;
-            $parallelism{$start}++;
-            $parallelism{$end}--;
-
-            my $ccode = $vcd->{sigs}{cpus}{"cpu${cpu}_thread"} ||= $vcd->{code}++;
-            $vcd->{values}{$start}{$ccode} = $thread;
-            $vcd->{values}{$end}{$ccode} = undef;
-
-            my $mcode = $vcd->{sigs}{mtasks}{"mtask${mtask}_cpu"} ||= $vcd->{code}++;
-            $vcd->{values}{$start}{$mcode} = $cpu;
-            $vcd->{values}{$end}{$mcode} = undef;
-        }
-    }
-    {
-        my $pcode = ($vcd->{sigs}{Stats}{"parallelism"} ||= $vcd->{code}++);
-        my $value = 0;
-        foreach my $time (sort {$a<=>$b} keys %parallelism) {
-            $value += $parallelism{$time};
-            $vcd->{values}{$time}{$pcode} = $value;
-        }
-    }
-
-    $fh->print('$version Generated by verilator_gantt $end'."\n");
-    $fh->print('$timescale 1ns $end'."\n");
-    $fh->print("\n");
-
-    my %all_codes;
-    $fh->print(' $scope module gantt $end'."\n");
-    foreach my $module (sort keys %{$vcd->{sigs}}) {
-        $fh->printf('  $scope module %s $end'."\n", $module);
-        foreach my $sig (sort keys %{$vcd->{sigs}{$module}}) {
-            my $code = $vcd->{sigs}{$module}{$sig};
-            $fh->printf('   $var wire 32 v%x %s [31:0] $end'."\n",
-                        $code, $sig);
-            $all_codes{$code} = 1;
-        }
-        $fh->print('  $upscope $end'."\n");
-    }
-    $fh->print(' $upscope $end'."\n");
-    $fh->print('$enddefinitions $end'."\n");
-    $fh->print("\n");
-
-    my $first = 1;
-    foreach my $time (sort {$a <=> $b} keys %{$vcd->{values}}) {
-        if ($first) {
-            $first = 0;
-            # Start with Z for any signals without time zero data
-            foreach my $code (keys %all_codes) {
-                if (!defined $vcd->{values}{$time}{$code}) {
-                    $vcd->{values}{$time}{$code} = undef;
-                }
-            }
-        }
-        $fh->printf("#%d\n", $time);
-        foreach my $code (sort keys %{$vcd->{values}{$time}}) {
-            my $value = $vcd->{values}{$time}{$code};
-            if (defined $value) {
-                $fh->printf("b%b v%x\n", $value, $code);
-            } else {
-                $fh->printf("bz v%x\n", $code);
-            }
-        }
-    }
-}
-
-#######################################################################
-# Similar to Statistics::Basic functions, but avoid a package dependency
-
-sub max {
-    my $n = $_[0]; shift;
-    while (defined $_[0]) {
-        $n = $_[0] if !defined $n || $_[0] > $n;
-        shift;
-    }
-    return $n;
-}
-
-sub mean {
-    my $arrayref = shift;
-    my $n = 0;
-    my $sum = 0;
-    foreach my $v (@$arrayref) {
-        $sum += $v;
-        $n++;
-    }
-    return undef if !$n;
-    return $sum/$n;
-}
-
-sub stddev {
-    my $arrayref = shift;
-    my $n = 0;
-    my $sum = 0;
-    my $sumsq = 0;
-    foreach my $v (@$arrayref) {
-        $sum += $v;
-        $sumsq += $v**2;
-        $n++;
-    }
-    return undef if !$n;
-    return sqrt(($sumsq/$n) - ($sum/$n)**2);
-}
-
-#######################################################################
-__END__
-
-=pod
-
-=head1 NAME
-
-verilator_gantt - Create Gantt chart of multi-threaded execution
-
-=head1 SYNOPSIS
-
-Verilator_gantt creates a visual representation to help analyze Verilator
-multithreaded simulation performance, by showing when each macro-task
-starts and ends, and showing when each thread is busy or idle.
+parser = argparse.ArgumentParser(
+    allow_abbrev=False,
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    description="""Create Gantt chart of multi-threaded execution""",
+    epilog=
+    """Verilator_gantt creates a visual representation to help analyze Verilator
+#xmultithreaded simulation performance, by showing when each macro-task
+#xstarts and ends, and showing when each thread is busy or idle.
 
 For documentation see
-L<https://verilator.org/guide/latest/exe_verilator_gantt.html>.
-
-=head1 ARGUMENT SUMMARY
-
-    <filename>    Filename to read data from, default "profile_threads.dat".
-    --help        Displays this message and program version and exits.
-    --scale I<n>  Number of characters per time step.
-    --no-vcd      Do not create a VCD file.
-    --vcd <filename>   Set output filename for vcd dump, default "verilator_gantt.vcd."
-
-=head1 DISTRIBUTION
-
-The latest version is available from L<https://verilator.org>.
+https://verilator.org/guide/latest/exe_verilator_gantt.html
 
 Copyright 2018-2021 by Wilson Snyder. This program is free software; you
 can redistribute it and/or modify it under the terms of either the GNU
 Lesser General Public License Version 3 or the Perl Artistic License
 Version 2.0.
 
-SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0""")
 
-=head1 SEE ALSO
+parser.add_argument('--debug', action='store_true', help='enable debug')
+parser.add_argument('--no-vcd',
+                    help='disable creating vcd',
+                    action='store_true')
+parser.add_argument('--vcd',
+                    help='filename for vcd outpue',
+                    default='profile_threads.vcd')
+parser.add_argument('filename',
+                    help='input profile_threads.dat filename to process',
+                    default='profile_threads.dat')
 
-C<verilator>
+Args = parser.parse_args()
 
-and L<https://verilator.org/guide/latest/exe_verilator_gantt.html> for
-detailed documentation.
-
-=cut
+process(Args.filename)
+if not Args.no_vcd:
+    write_vcd(Args.vcd)
 
 ######################################################################
-### Local Variables:
-### compile-command: "$V4/bin/verilator_gantt $V4/test_regress/obj_vltmt/t_gantt/vlt_sim.log"
-### End:
+# Local Variables:
+# compile-command: "./verilator_gantt ../test_regress/t/t_gantt_io.dat"
+# End:
diff --git a/bin/verilator_includer b/bin/verilator_includer
index d6c26860b..356849827 100755
--- a/bin/verilator_includer
+++ b/bin/verilator_includer
@@ -10,11 +10,11 @@
 
 require 5.005;
 use warnings;
-print "// DESCR"."IPTION: Generated by verilator_includer via makefile\n";
+print "// DESCR" . "IPTION: Generated by verilator_includer via makefile\n";
 foreach my $param (@ARGV) {
     if ($param =~ /^-D([^=]+)=(.*)/) {
-        print "#define $1 $2\n"
+        print "#define $1 $2\n";
     } else {
-        print "#include \"$param\"\n"
+        print "#include \"$param\"\n";
     }
 }
diff --git a/bin/verilator_profcfunc b/bin/verilator_profcfunc
index 7ee22f54c..1b564b5ec 100755
--- a/bin/verilator_profcfunc
+++ b/bin/verilator_profcfunc
@@ -1,248 +1,204 @@
-#!/usr/bin/env perl
-# See copyright, etc in below POD section.
+#!/usr/bin/env python3
+# pylint: disable=C0103,C0114,C0116,R0914,R0912,R0915,eval-used
 ######################################################################
 
-require 5.006_001;
-use warnings;
-use Getopt::Long;
-use IO::File;
-use Pod::Usage;
-eval { use Data::Dumper;  $Data::Dumper::Indent = 1; }; # Debug, ok if missing
-use strict;
-use vars qw($Debug);
+import argparse
+import collections
+import re
+# from pprint import pprint
 
-#======================================================================
+######################################################################
 
 
-#======================================================================
-# main
+def profcfunc(filename):
+    funcs = {}
 
-$Debug = 0;
-my $Opt_File;
-autoflush STDOUT 1;
-autoflush STDERR 1;
-Getopt::Long::config("no_auto_abbrev");
-if (! GetOptions(
-          "help"        => \&usage,
-          "debug"       => \&debug,
-          "<>"          => \&parameter,
-    )) {
-    die "%Error: Bad usage, try 'verilator_profcfunc --help'\n";
-}
+    with open(filename) as fh:
 
-defined $Opt_File or die "%Error: No filename given\n";
+        for line in fh:
+            #                  %time      cumesec   selfsec     calls     {stuff}   name
+            match = re.match(
+                r'^\s*([0-9.]+)\s+[0-9.]+\s+([0-9.]+)\s+([0-9.]+)\s+[^a-zA-Z_]*([a-zA-Z_].*)$',
+                line)
+            if match:
+                pct = float(match.group(1))
+                sec = float(match.group(2))
+                calls = float(match.group(3))
+                func = match.group(4)
+                if func not in funcs:
+                    funcs[func] = {'pct': 0, 'sec': 0, 'calls': 0}
+                funcs[func]['pct'] += pct
+                funcs[func]['sec'] += sec
+                funcs[func]['calls'] += calls
+                continue
 
-profcfunc($Opt_File);
-
-#----------------------------------------------------------------------
-
-sub usage {
-    pod2usage(-verbose=>2, -exitval=>0, -output=>\*STDOUT);
-    exit(1);  # Unreachable
-}
-
-sub debug {
-    $Debug = 1;
-}
-
-sub parameter {
-    my $param = shift;
-    if (!defined $Opt_File) {
-        $Opt_File = $param;
-    } else {
-        die "%Error: Unknown parameter: $param\n";
-    }
-}
-
-#######################################################################
-
-sub profcfunc {
-    my $filename = shift;
-    # Remove hex numbers before diffing
-    my $fh = IO::File->new ($filename) or die "%Error: $! $filename,";
-
-    my %funcs;
-
-    while (defined (my $line=$fh->getline())) {
-        #                  %time      cumesec   selfsec     calls     {stuff}   name
-        if ($line =~ /^\s*([0-9.]+)\s+[0-9.]+\s+([0-9.]+)\s+([0-9.]+)\s+[^a-zA-Z_]*([a-zA-Z_].*)$/) {
-            my $pct=$1; my $sec=$2; my $calls=$3; my $func=$4;
-            $funcs{$func}{pct} += $pct;
-            $funcs{$func}{sec} += $sec;
-            $funcs{$func}{calls} += $calls;
-        }
-        # Older gprofs have no call column for single-call functions
-        #                  %time      cumesec   selfsec      {stuff}   name
-        elsif ($line =~ /^\s*([0-9.]+)\s+[0-9.]+\s+([0-9.]+)\s+[^a-zA-Z_]*([a-zA-Z_].*)$/) {
-            my $pct=$1; my $sec=$2; my $calls=1; my $func=$3;
-            $funcs{$func}{pct} += $pct;
-            $funcs{$func}{sec} += $sec;
-            $funcs{$func}{calls} += $calls;
-        }
-    }
-    $fh->close;
+            # Older gprofs have no call column for single-call functions
+            #                  %time      cumesec   selfsec      {stuff}   name
+            match = re.match(
+                r'^\s*([0-9.]+)\s+[0-9.]+\s+([0-9.]+)\s+[^a-zA-Z_]*([a-zA-Z_].*)$',
+                line)
+            if match:
+                pct = float(match.group(1))
+                sec = float(match.group(2))
+                calls = 1
+                func = match.group(3)
+                if func not in funcs:
+                    funcs[func] = {'pct': 0, 'sec': 0, 'calls': 0}
+                funcs[func]['pct'] += pct
+                funcs[func]['sec'] += sec
+                funcs[func]['calls'] += calls
+                continue
 
     # Find modules
-    my %pointer_mods;
-    my %verilated_mods;
-    foreach my $func (keys %funcs) {
-        if ($func =~ /(.*)::_eval\(([a-zA-Z_0-9]+__Syms).*\)$/) {
-            $verilated_mods{$1} = qr/^$1/;
-            $pointer_mods{$2} = $1;
-        }
-    }
-    #print Dumper(\%pointer_mods, \%verilated_mods);
+    verilated_mods = {}
+    for func in funcs:
+        match = re.search(r'(.*)::eval(_step)?\(', func)
+        if match:
+            prefix = match.group(1)
+            if Args.debug:
+                print("-got _eval %s   prefix=%s" % (func, prefix))
+            verilated_mods[prefix] = re.compile(r'^' + prefix)
+    # pprint(verilated_mods)
 
-    # Resort by Verilog name
-    my %vfuncs;
-    my %groups;
-    foreach my $func (keys %funcs) {
-        my $pct = $funcs{$func}{pct};
-        my $vfunc = $func;
-        my $design;
+    # Sort by Verilog name
+    vfuncs = {}
+    groups = {}
+    groups['type'] = collections.defaultdict(lambda: 0)
+    groups['design'] = collections.defaultdict(lambda: 0)
+    groups['module'] = collections.defaultdict(lambda: 0)
 
-        if ($func =~ /\(([a-zA-Z_0-9]+__Syms)/) {
-            $design = $pointer_mods{$1};
-        }
+    for func in funcs:
+        pct = funcs[func]['pct']
+        vfunc = func
 
-        foreach my $vde (keys %verilated_mods) {
-            last if $design;
-            if ($func =~ /$verilated_mods{$vde}/) {
-                $design=$vde;
-                last;
-            }
-        }
+        funcarg = re.sub(r'^.*\(', '', func)
 
-        if ($vfunc =~ /__PROF__([a-zA-Z_0-9]+)__l?([0-9]+)\(/) {
-            $vfunc     = sprintf("VBlock    %s:%d", $1, $2);
-            $groups{type}{"Verilog Blocks under $design"} += $pct;
-            $groups{design}{$design} += $pct;
-            $groups{module}{$1} += $pct;
-        } else {
-            if ($design) {
-                $vfunc = sprintf("VCommon   %s", $func);
-                $groups{type}{"Common code under $design"} += $pct;
-                $groups{design}{$design} += $pct;
-                $groups{module}{$design." common code"} += $pct;
-            } elsif ($func =~ /^VL_[A-Z0-9_]+/
-                     || $func =~ /^_?vl_[a-zA-Z0-9_]+/
-                     || $func =~ /^verilated/i) {
-                $vfunc = sprintf("VLib      %s", $func);
-                $groups{type}{'VLib'} += $pct;
-                $groups{design}{'VLib'} += $pct;
-                $groups{module}{'VLib'} += $pct;
-            } elsif ($func =~ /^_mcount_private/) {
-                $vfunc = sprintf("Prof      %s", $func);
-                $groups{type}{'Prof'} += $pct;
-                $groups{design}{'Prof'} += $pct;
-                $groups{module}{'Prof'} += $pct;
-            } else {
-                $vfunc = sprintf("C++       %s", $func);
-                $groups{type}{'C++'} += $pct;
-                $groups{design}{'C++'} += $pct;
-                $groups{module}{'C++'} += $pct;
-            }
-        }
-        $vfuncs{$vfunc} = $funcs{$func};
-    }
+        design = None
+        for vde in verilated_mods:
+            if verilated_mods[vde].match(func) or verilated_mods[vde].match(
+                    funcarg):
+                design = vde
+                break
+
+        vdesign = "-"
+
+        prof_match = re.search(r'__PROF__([a-zA-Z_0-9]+)__l?([0-9]+)\(', vfunc)
+        if design and prof_match:
+            linefunc = prof_match.group(1)
+            lineno = int(prof_match.group(2))
+            vfunc = "VBlock    %s:%d" % (linefunc, lineno)
+            vdesign = design
+            groups['type']["Verilog Blocks under " + design] += pct
+            groups['design'][design] += pct
+            groups['module'][linefunc] += pct
+        elif design:
+            vfunc = "VCommon   " + func
+            vdesign = design
+            groups['type']["Common code under " + design] += pct
+            groups['design'][design] += pct
+            groups['module'][design + " common code"] += pct
+        elif re.match(r'(VL_[A-Z0-9_]+|_?vl_[a-zA-Z0-9_]+|Verilated)', vfunc):
+            vfunc = "VLib      " + func
+            groups['type']['VLib'] += pct
+            groups['design']['VLib'] += pct
+            groups['module']['VLib'] += pct
+        elif re.match(r'^_mcount_private', vfunc):
+            vfunc = "Prof      " + func
+            groups['type']['Prof'] += pct
+            groups['design']['Prof'] += pct
+            groups['module']['Prof'] += pct
+        else:
+            vfunc = "C++       " + func
+            groups['type']['C++'] += pct
+            groups['design']['C++'] += pct
+            groups['module']['C++'] += pct
+
+        if vfunc not in vfuncs:
+            vfuncs[vfunc] = funcs[func]
+            vfuncs[vfunc]['design'] = vdesign
+        else:
+            vfuncs[vfunc]['pct'] += funcs[func]['pct']
+            vfuncs[vfunc]['calls'] += funcs[func]['calls']
+            vfuncs[vfunc]['sec'] += funcs[func]['sec']
+
+    for ftype in ['type', 'design', 'module']:
+        missing = 100
+        for item in groups[ftype].keys():
+            missing -= groups[ftype][item]
+        groups[ftype]["\377Unaccounted for/rounding error"] = missing
+
+        print("Overall summary by %s:" % ftype)
+        print("  %-6s  %s" % ("% time", ftype))
+        for what in sorted(groups[ftype].keys()):
+            # \377 used to establish sort order
+            pwhat = re.sub(r'^\377', '', what)
+            print("  %6.2f  %s" % (groups[ftype][what], pwhat))
+        print()
+
+    design_width = 1
+    for func in vfuncs:
+        if design_width < len(vfuncs[func]['design']):
+            design_width = len(vfuncs[func]['design'])
+
+    print("Verilog code profile:")
+    print("   These are split into three categories:")
+    print("      C++:     Time in non-Verilated C++ code")
+    print("      Prof:    Time in profile overhead")
+    print("      VBlock:  Time attributable to a block in a" +
+          " Verilog file and line")
+    print("      VCommon: Time in a Verilated module," +
+          " due to all parts of the design")
+    print("      VLib:    Time in Verilated common libraries," +
+          " called by the Verilated code")
+    print()
+
+    print("  %   cumulative   self              ")
+    print((" time   seconds   seconds      calls   %-" + str(design_width) +
+           "s  type      filename and line number") % "design")
+
+    cume = 0
+    for func in sorted(vfuncs.keys(),
+                       key=lambda f: vfuncs[f]['sec'],
+                       reverse=True):
+        cume += vfuncs[func]['sec']
+        print(("%6.2f %9.2f %8.2f %10d   %-" + str(design_width) + "s  %s") %
+              (vfuncs[func]['pct'], cume, vfuncs[func]['sec'],
+               vfuncs[func]['calls'], vfuncs[func]['design'], func))
 
 
-    foreach my $type (qw(type design module)) {
-        my $missing = 100;
-        foreach (sort (keys %{$groups{$type}})) {
-            $missing -= $groups{$type}{$_};
-        }
-        if ($missing) {
-            $groups{$type}{"\377Unaccounted for/rounding error"} = $missing;
-        }
+######################################################################
+######################################################################
 
-        print("Overall summary by $type:\n");
-        printf("  %-6s  %s\n","% time",$type);
-        foreach my $what (sort (keys %{$groups{$type}})) {
-            (my $pwhat = $what) =~ s/^\377//;  # Just used to establish sort order
-            printf("  %6.2f  %s\n", $groups{$type}{$what}, $pwhat);
-        }
-        print("\n");
-    }
-
-    print("Verilog code profile:\n");
-    print("   These are split into three categories:\n");
-    print("      C++:     Time in non-Verilated C++ code\n");
-    print("      Prof:    Time in profile overhead\n");
-    print("      VBlock:  Time attributable to a block in a Verilog file and line\n");
-    print("      VCommon: Time in a Verilated module, due to all parts of the design\n");
-    print("      VLib:    Time in Verilated common libraries, called by the Verilated code\n");
-    print("\n");
-
-    print("  %   cumulative   self              \n");
-    print(" time   seconds   seconds    calls   type      filename and line number\n");
-
-    my $cume = 0;
-    foreach my $func (sort {$vfuncs{$b}{sec} <=> $vfuncs{$a}{sec}
-                            || $a cmp $b}
-                      (keys %vfuncs)) {
-        $cume += $vfuncs{$func}{sec};
-        printf +("%6.2f %9.2f %8.2f %8d   %s\n",
-                 $vfuncs{$func}{pct},
-                 $cume, $vfuncs{$func}{sec},
-                 $vfuncs{$func}{calls},
-                 $func);
-    }
-}
-
-#######################################################################
-__END__
-
-=pod
-
-=head1 NAME
-
-verilator_profcfunc - Read gprof report created with --prof-cfuncs
-
-=head1 SYNOPSIS
-
-  verilator --prof-cfuncs ....
-  gcc ....
-  {run executable}
-  gprof
-  verilator_profcfuncs gprof.out
-
-=head1 DESCRIPTION
-
-Verilator_profcfunc reads a profile report created by gprof.  The names of
+parser = argparse.ArgumentParser(
+    allow_abbrev=False,
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    description="""Read gprof report created with --prof-cfuncs""",
+    epilog=
+    """Verilator_profcfunc reads a profile report created by gprof.  The names of
 the functions are then transformed, assuming the user used Verilator's
 --prof-cfuncs, and a report printed showing the percentage of time, etc,
 in each Verilog block.
 
 For documentation see
-L<https://verilator.org/guide/latest/exe_verilator_profcfuncs.html>.
+https://verilator.org/guide/latest/exe_verilator_profcfunc.html
 
-=head1 ARGUMENT SUMMARY
-
-    <filename>  Input file (gprof.out)
-    --help      Display this help
-
-=head1 DISTRIBUTION
-
-The latest version is available from L<https://verilator.org>.
-
-Copyright 2007-2021 by Wilson Snyder. This program is free software; you
+Copyright 2002-2021 by Wilson Snyder. This program is free software; you
 can redistribute it and/or modify it under the terms of either the GNU
 Lesser General Public License Version 3 or the Perl Artistic License
 Version 2.0.
 
-SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0""")
 
-=head1 SEE ALSO
+parser.add_argument('--debug',
+                    action='store_const',
+                    const=9,
+                    help='enable debug')
+parser.add_argument('filename', help='input gprof output to process')
 
-C<verilator>
-
-and L<https://verilator.org/guide/latest/exe_verilator_profcfuncs.html> for
-detailed documentation.
-
-=cut
+Args = parser.parse_args()
+profcfunc(Args.filename)
 
 ######################################################################
-### Local Variables:
-### compile-command: "$V4/bin/verilator_profcfunc  $V4/test_c/obj_dir/V*_03_*.tree $V4N/test_c/obj_dir/V*_03_*.tree"
-### End:
+# Local Variables:
+# compile-command: "./verilator_profcfunc ../test_regress/t/t_profcfunc.gprof"
+# End:
diff --git a/configure.ac b/configure.ac
index 720f03e4c..18d128901 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7,7 +7,7 @@
 
 #AC_INIT([Verilator],[#.### YYYY-MM-DD])
 #AC_INIT([Verilator],[#.### devel])
-AC_INIT([Verilator],[4.212 2021-09-01],
+AC_INIT([Verilator],[4.214 2021-10-17],
         [https://verilator.org],
         [verilator],[https://verilator.org])
 # When releasing, also update header of Changes file
@@ -347,22 +347,18 @@ AC_SUBST(CFG_CXXFLAGS_PROFILE)
 # Flag to select newest language standard supported
 # Macros work such that first option that passes is the one we take
 # Currently enabled c++14 due to packaged SystemC dependency
-# c++17 is the newest that Verilator supports
-# c++03 is the oldest that Verilator supports
+# c++14 is the newest that Verilator is regressed to support
+# c++11 is the oldest that Verilator supports
+# gnu is requried for Cygwin to compile verilated.h successfully
+#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++20)
 #_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++20)
+#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++17)
 #_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++17)
+_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++14)
 _MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++14)
+_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++11)
 _MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++11)
-_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++03)
 AC_SUBST(CFG_CXXFLAGS_STD_NEWEST)
-# And likewise oldest standard (same list above, backwards)
-# This is used for internal testing
-_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_OLDEST,-std=c++03)
-_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_OLDEST,-std=c++11)
-_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_OLDEST,-std=c++14)
-#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_OLDEST,-std=c++17)
-#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_OLDEST,-std=c++20)
-AC_SUBST(CFG_CXXFLAGS_STD_OLDEST)
 
 # Flags for compiling Verilator internals including parser, and Verilated files
 # These turn on extra warnings and are only used with 'configure --enable-ccwarn'
diff --git a/docs/CONTRIBUTORS b/docs/CONTRIBUTORS
index bead98916..1a86f4570 100644
--- a/docs/CONTRIBUTORS
+++ b/docs/CONTRIBUTORS
@@ -11,6 +11,7 @@ Andreas Kuster
 Chris Randall
 Conor McCullough
 Dan Petrisko
+Daniel Bates
 David Horton
 David Metz
 David Stanford
@@ -30,6 +31,7 @@ Harald Heckmann
 Howard Su
 Huang Rui
 HyungKi Jeong
+Iru Cai
 Ivan Vnučec
 Iztok Jeras
 James Hanlon
@@ -46,6 +48,7 @@ Josh Redford
 Julien Margetts
 Kaleb Barrett
 Kanad Kanhere
+Keith Colbert
 Kevin Kiningham
 Krzysztof Bieganski
 Kuba Ober
@@ -101,3 +104,4 @@ Yossi Nivin
 Yuri Victorovich
 Yutetsu TAKATSUKASA
 Yves Mathieu
+Zhanglei Wang
diff --git a/docs/gen/ex_DIDNOTCONVERGE_faulty.rst b/docs/gen/ex_DIDNOTCONVERGE_faulty.rst
new file mode 100644
index 000000000..e37f06208
--- /dev/null
+++ b/docs/gen/ex_DIDNOTCONVERGE_faulty.rst
@@ -0,0 +1,6 @@
+.. comment: generated by t_lint_didnotconverge_bad
+.. code-block:: sv
+   :linenos:
+
+      always_comb b = ~a;
+      always_comb a = b;
diff --git a/docs/gen/ex_DIDNOTCONVERGE_msg.rst b/docs/gen/ex_DIDNOTCONVERGE_msg.rst
new file mode 100644
index 000000000..c15d71bb4
--- /dev/null
+++ b/docs/gen/ex_DIDNOTCONVERGE_msg.rst
@@ -0,0 +1,7 @@
+.. comment: generated by t_lint_didnotconverge_bad
+.. code-block::
+
+   -V{t#,#}+    Vt_lint_didnotconverge_bad___024root___change_request
+   -V{t#,#}+    Vt_lint_didnotconverge_bad___024root___change_request_1
+   -V{t#,#}        CHANGE: t/t_lint_didnotconverge_bad.v:14: a
+   %Error: t/t_lint_didnotconverge_bad.v:7: Verilated model didn't converge
diff --git a/docs/gen/ex_DIDNOTCONVERGE_nodbg_msg.rst b/docs/gen/ex_DIDNOTCONVERGE_nodbg_msg.rst
new file mode 100644
index 000000000..b6de737df
--- /dev/null
+++ b/docs/gen/ex_DIDNOTCONVERGE_nodbg_msg.rst
@@ -0,0 +1,4 @@
+.. comment: generated by t_lint_didnotconverge_nodbg_bad
+.. code-block::
+
+   %Error: t/t_lint_didnotconverge_bad.v:7: Verilated model didn't converge
diff --git a/docs/gen/ex_STMTDLY_faulty.rst b/docs/gen/ex_STMTDLY_faulty.rst
new file mode 100644
index 000000000..97fcc43e9
--- /dev/null
+++ b/docs/gen/ex_STMTDLY_faulty.rst
@@ -0,0 +1,5 @@
+.. comment: generated by t_lint_stmtdly_bad
+.. code-block:: sv
+   :emphasize-lines: 1
+
+         #100 $finish;  //<--- Warning
diff --git a/docs/gen/ex_STMTDLY_msg.rst b/docs/gen/ex_STMTDLY_msg.rst
new file mode 100644
index 000000000..fe19fb943
--- /dev/null
+++ b/docs/gen/ex_STMTDLY_msg.rst
@@ -0,0 +1,4 @@
+.. comment: generated by t_lint_stmtdly_bad
+.. code-block::
+
+   %Warning-STMTDLY: example.v:1:8 Unsupported: Ignoring delay on this delayed statement.
diff --git a/docs/guide/conf.py b/docs/guide/conf.py
index 060d7a106..7bc25adee 100644
--- a/docs/guide/conf.py
+++ b/docs/guide/conf.py
@@ -1,4 +1,4 @@
-# pylint: disable=C0103,C0114,C0116,E0402,W0622
+# pylint: disable=C0103,C0114,C0116,C0301,E0402,W0622
 #
 # Configuration file for Verilator's Sphinx documentation builder.
 # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
diff --git a/docs/guide/exe_sim.rst b/docs/guide/exe_sim.rst
index bd805aab8..ba49b2ca7 100644
--- a/docs/guide/exe_sim.rst
+++ b/docs/guide/exe_sim.rst
@@ -62,6 +62,12 @@ Summary:
    makes sense for a single-clock-domain module where it's typical to want
    to capture one posedge eval() and one negedge eval().
 
+.. option:: +verilator+prof+vlt+file+<filename>
+
+   When a model was Verilated using :vlopt:`--prof-threads`, sets the
+   profile-guided optimization data runtime filename to dump to.  Defaults
+   to :file:`profile.vlt`.
+
 .. option:: +verilator+rand+reset+<value>
 
    When a model was Verilated using :vlopt:`--x-initial unique
diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst
index c12214f0f..8ed959e21 100644
--- a/docs/guide/exe_verilator.rst
+++ b/docs/guide/exe_verilator.rst
@@ -816,7 +816,7 @@ Summary:
    When compiling the C++ code, enable the compiler's profiling flag
    (e.g. :code:`g++ -pg`). See :ref:`Profiling`.
 
-   Using :vlopt:`--prof-cfuncs` also enables :vlopt:`prof-c`.
+   Using :vlopt:`--prof-cfuncs` also enables :vlopt:`--prof-c`.
 
 .. option:: --prof-cfuncs
 
@@ -828,12 +828,12 @@ Summary:
    came from.  This allows gprof or oprofile reports to be correlated with
    the original Verilog source statements. See :ref:`Profiling`.
 
-   Using :vlopt:`--prof-cfuncs` also enables :vlopt:`prof-c`.
+   Using :vlopt:`--prof-cfuncs` also enables :vlopt:`--prof-c`.
 
 .. option:: --prof-threads
 
    Enable gantt chart data collection for threaded builds. See :ref:`Thread
-   Profiling`.
+   Profiling` and :ref:`Thread PGO`.
 
 .. option:: --protect-key <key>
 
@@ -1612,6 +1612,12 @@ The grammar of configuration commands is as follows:
    :option:`/*verilator&32;public_flat*/`, etc, metacomments. See
    e.g. :ref:`VPI Example`.
 
+.. option:: profile_data -mtask "<mtask_hash>" -cost <cost_value>
+
+   Feeds profile-guided optimization data into the Verilator algorithms in
+   order to improve model runtime performance.  This option is not expected
+   to be used by users directly.  See :ref:`Thread PGO`.
+
 .. option:: sc_bv -module "<modulename>" [-task "<taskname>"] -var "<signame>"
 
 .. option:: sc_bv -module "<modulename>" [-function "<funcname>"] -var "<signame>"
diff --git a/docs/guide/exe_verilator_gantt.rst b/docs/guide/exe_verilator_gantt.rst
index ea660c09a..3e313f507 100644
--- a/docs/guide/exe_verilator_gantt.rst
+++ b/docs/guide/exe_verilator_gantt.rst
@@ -10,42 +10,60 @@ starts and ends, and showing when each thread is busy or idle.
 
 For an overview of use of verilator_gantt, see :ref:`Profiling`.
 
-Gantt Chart Contents
---------------------
+Gantt Chart VCD
+---------------
 
-The generated Gantt chart has time on the X-axis. Times shown are to the
-scale printed, i.e. a certain about of time for each character width.  The
-Y-axis shows threads, each thread's execution is shown on one line.  That
-line shows "[" at the position in time when it executes.
+Verilated_gnatt creates a value change dump (VCD) format dump file which
+may be viewed in a waveform viewer (e.g. C<GTKWave>):
 
-Following the "[" is the CPU number the task executed on, followed by zero
-or more "-" to make the width of the characters match the scaled execution
-time, followed by a "]".  If the scale is too small, the CPU number and
-mtask number will not be printed.  If the scale is very small, a "&"
-indicates multiple mtasks started at that time position.
+.. figure:: figures/fig_gantt_min.png
+
+   Example verilator_gantt output, as viewed with GTKWave.
+
+The viewed waveform chart has time on the X-axis, with one unit for each
+time tick of the system's high-performance counter.
 
-Also creates a value change dump (VCD) format dump file which may be viewed
-in a waveform viewer (e.g. C<GTKWave>).  See below.
 
 Gantt Chart VCD Signals
 -----------------------
 
-In waveforms there are the following signals. Most signals the "decimal"
-format will remove the leading zeros and make the traces easier to read.
+In waveforms there are the following signals. In GTKWave, using a data
+format of "decimal" will remove the leading zeros and make the traces
+easier to read.
 
-parallelism
+evals
+  Increments each time when eval_step was measured to be active.  This
+  allow visualization of how much time eval_step was active.
+
+eval_loop
+  Increments each time when the evaluation loop within eval_step was
+  measured to be active.  For best performance there is only a single
+  evaluation loop within each eval_step call, that is the eval_loop
+  waveform looks identical to the evals waveform.
+
+measured_parallelism
   The number of mtasks active at this time, for best performance this will
-  match the thread count. You may want to use an "analog step" format to
+  match the thread count. In GTKWave, use a data format of "analog step" to
   view this signal.
 
+predicted_parallelism
+  The number of mtasks Verilator predicted would be active at this time,
+  for best performance this will match the thread count. In GTKWave, use a
+  data format of "analog step" to view this signal.
+
 cpu#_thread
-  For the given CPU number, the thread number executing.
+  For the given CPU number, the thread number measured to be executing.
 
 mtask#_cpu
-  For the given mtask id, the CPU it is executing on.
+  For the given mtask id, the CPU it was measured to execute on.
 
 thread#_mtask
-  For the given thread number, the mtask id executing.
+  For the given thread number, the mtask id it was executing.
+
+predicted_thread#_mtask
+  For the given thread number, the mtask id Verilator predicted would be
+  executing.
+
 
 verilator_gantt Arguments
 -------------------------
@@ -60,13 +78,6 @@ The filename to read data from, defaults to "profile_threads.dat".
 
 Displays a help summary, the program version, and exits.
 
-.. option:: --scale <n>
-
-Sets the number of time units per character on the X-axis of the generated
-Gantt chart.  (On x86, time units are rdtsc ticks.)  Defaults to 0, which
-will automatically compute a reasonable scale where no two mtasks need to
-fit into same character width's worth of scaled time.
-
 .. option:: --no-vcd
 
 Disables creating a .vcd file.
diff --git a/docs/guide/exe_verilator_profcfuncs.rst b/docs/guide/exe_verilator_profcfunc.rst
similarity index 58%
rename from docs/guide/exe_verilator_profcfuncs.rst
rename to docs/guide/exe_verilator_profcfunc.rst
index f3ac26b81..1283e6f2a 100644
--- a/docs/guide/exe_verilator_profcfuncs.rst
+++ b/docs/guide/exe_verilator_profcfunc.rst
@@ -1,20 +1,24 @@
 .. Copyright 2003-2021 by Wilson Snyder.
 .. SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
 
-verilator_profcfuncs
-====================
+verilator_profcfunc
+===================
 
 Verilator_profcfunc reads a profile report created by gprof.  The names of
 the functions are then transformed, assuming the user used Verilator's
 --prof-cfuncs, and a report printed showing the percentage of time, etc, in
 each Verilog block.
 
-For an overview of use of verilator_profcfuncs, see :ref:`Profiling`.
+Due to rounding errors in gprof reports, the input report's percentages may
+not total to 100%.  In the verilator_profcfunc report this will get
+reported as a rounding error.
 
-verilator_profcfuncs Arguments
-------------------------------
+For an overview of use of verilator_profcfunc, see :ref:`Profiling`.
 
-.. program:: verilator_profcfuncs
+verilator_profcfunc Arguments
+-----------------------------
+
+.. program:: verilator_profcfunc
 
 .. option:: <filename>
 
diff --git a/docs/guide/executables.rst b/docs/guide/executables.rst
index e9c3fc700..248553266 100644
--- a/docs/guide/executables.rst
+++ b/docs/guide/executables.rst
@@ -15,5 +15,5 @@ options to each executable.
    exe_verilator.rst
    exe_verilator_coverage.rst
    exe_verilator_gantt.rst
-   exe_verilator_profcfuncs.rst
+   exe_verilator_profcfunc.rst
    exe_sim.rst
diff --git a/docs/guide/figures/fig_gantt_min.png b/docs/guide/figures/fig_gantt_min.png
index d17345eb3..1bcf9a070 100644
Binary files a/docs/guide/figures/fig_gantt_min.png and b/docs/guide/figures/fig_gantt_min.png differ
diff --git a/docs/guide/files.rst b/docs/guide/files.rst
index 692f3e937..3f30fbb39 100644
--- a/docs/guide/files.rst
+++ b/docs/guide/files.rst
@@ -74,7 +74,7 @@ For --cc/--sc, it creates:
      - DPI import and export declarations (from --dpi)
    * - *{prefix}*\ __Dpi.cpp
      - Global DPI export wrappers (from --dpi)
-   * - *{prefix}*\ __Dpi_Export\ *{__n}\ .cpp
+   * - *{prefix}*\ __Dpi_Export\ *{__n}*\ .cpp
      - DPI export wrappers scoped to this particular model (from --dpi)
    * - *{prefix}*\ __Inlines.h
      - Inline support functions
@@ -145,3 +145,23 @@ After running Make, the C++ compiler may produce the following:
      - Intermediate dependencies
    * - *{prefix}{misc}*\ .o
      - Intermediate objects
+
+The Verilated executable may produce the following:
+
+.. list-table::
+
+   * - coverage.dat
+     - Code coverage output, and default input filename for :command:`verilator_coverage`
+   * - gmon.out
+     - GCC/clang code profiler output, often fed into :command:`verilator_profcfunc`
+   * - profile.vlt
+     - -profile data file for :ref:`Thread PGO`
+   * - profile_threads.dat
+     - -profile-threads data file for :command:`verilator_gnatt`
+
+Verilator_gantt may produce the following:
+
+.. list-table::
+
+   * - profile_threads.vcd
+     - Gantt report waveform output
diff --git a/docs/guide/install.rst b/docs/guide/install.rst
index 5b95f23e4..7b13ae677 100644
--- a/docs/guide/install.rst
+++ b/docs/guide/install.rst
@@ -51,15 +51,15 @@ In brief, to install from git:
    unsetenv VERILATOR_ROOT  # For csh; ignore error if on bash
    unset VERILATOR_ROOT  # For bash
    cd verilator
-   git pull        # Make sure git repository is up-to-date
-   git tag         # See what versions exist
+   git pull         # Make sure git repository is up-to-date
+   git tag          # See what versions exist
    #git checkout master      # Use development branch (e.g. recent bug fixes)
    #git checkout stable      # Use most recent stable release
    #git checkout v{version}  # Switch to specified release version
 
-   autoconf        # Create ./configure script
-   ./configure     # Configure and create Makefile
-   make -j         # Build Verilator itself
+   autoconf         # Create ./configure script
+   ./configure      # Configure and create Makefile
+   make -j `nproc`  # Build Verilator itself (if error, try just 'make')
    sudo make install
 
 
@@ -287,7 +287,7 @@ Compile Verilator:
 
 ::
 
-   make -j
+   make -j `nproc`  # Or if error on `nproc`, the number of CPUs in system
 
 
 Test
diff --git a/docs/guide/simulating.rst b/docs/guide/simulating.rst
index 19f54f198..98114d886 100644
--- a/docs/guide/simulating.rst
+++ b/docs/guide/simulating.rst
@@ -26,7 +26,8 @@ risk of reset bugs in trade for performance; see the above documentation
 for these options.
 
 If using Verilated multithreaded, use ``numactl`` to ensure you are using
-non-conflicting hardware resources. See :ref:`Multithreading`.
+non-conflicting hardware resources. See :ref:`Multithreading`. Also
+consider using profile-guided optimization, see :ref:`Thread PGO`.
 
 Minor Verilog code changes can also give big wins.  You should not have any
 UNOPTFLAT warnings from Verilator.  Fixing these warnings can result in
@@ -93,9 +94,7 @@ cases, for example regressions, it is usually worth spending extra
 compilation time to reduce total CPU time.
 
 If you will be running many simulations on a single model, you can
-investigate profile guided optimization. With GCC, using GCC's
-"-fprofile-arcs", then GCC's "-fbranch-probabilities" will yield another
-15% or so.
+investigate profile guided optimization. See :ref:`Compiler PGO`.
 
 Modern compilers also support link-time optimization (LTO), which can help
 especially if you link in DPI code. To enable LTO on GCC, pass "-flto" in
@@ -230,7 +229,7 @@ coverage point insertions into the model and collect the coverage data.
 
 To get the coverage data from the model, in the user wrapper code,
 typically at the end once a test passes, call
-:code:`Verilated::coveragep()->write` with an argument of the filename for
+:code:`Verilated::threadContextp()->coveragep()->write` with an argument of the filename for
 the coverage data file to write coverage data to (typically
 "logs/coverage.dat").
 
@@ -298,6 +297,9 @@ With the :vlopt:`--prof-threads` option, Verilator will:
 * Add code to save profiling data in non-human-friendly form to the file
   specified with :vlopt:`+verilator+prof+threads+file+\<filename\>`.
 
+* Add code to save profiling data for thread profile-guided
+  optimization. See :ref:`Thread PGO`.
+
 The :command:`verilator_gantt` program may then be run to transform the
 saved profiling file into a nicer visual format and produce some related
 statistics.
@@ -306,7 +308,7 @@ statistics.
 
    Example verilator_gantt output, as viewed with GTKWave.
 
-   The parallelism shows the number of CPUs being used at a given moment.
+   The measured_parallelism shows the number of CPUs being used at a given moment.
 
    The cpu_thread section shows which thread is executing on each of the physical CPUs.
 
@@ -314,6 +316,7 @@ statistics.
 
 For more information see :command:`verilator_gantt`.
 
+
 .. _Profiling ccache efficiency:
 
 Profiling ccache efficiency
@@ -377,3 +380,120 @@ For example:
          os >> main_time;
          os >> *topp;
      }
+
+
+Profile-Guided Optimization
+===========================
+
+Profile-guided optimization is the technique where profiling data is
+collected by running your simulation executable, then this information is
+used to guide the next Verilation or compilation.
+
+There are two forms of profile-guided optimizations.  Unfortunately for
+best results they must each be performed from the highest level code to the
+lowest, which means performing them separately and in this order:
+
+* :ref:`Thread PGO`
+* :ref:`Compiler PGO`
+
+Other forms of PGO may be supported in the future, such as clock and reset
+toggle rate PGO, branch prediction PGO, statement execution time PGO, or
+others as they prove beneficial.
+
+
+.. _Thread PGO:
+
+Thread Profile-Guided Optimization
+----------------------------------
+
+Verilator supports thread profile-guided optimization (Thread PGO) to
+improve multithreaded performance.
+
+When using multithreading, Verilator computes how long macro tasks take and
+tries to balance those across threads.  (What is a macro-task?  See the
+Verilator internals document (:file:`docs/internals.rst` in the
+distribution.)  If the estimations are incorrect, the threads will not be
+balanced, leading to decreased performance.  Thread PGO allows collecting
+profiling data to replace the estimates and better optimize these
+decisions.
+
+To use Thread PGO, Verilate the model with the :vlopt:`--prof-threads`
+option.
+
+Run the model executable. When the executable exits, it will create a
+profile.vlt file.
+
+Rerun Verilator, optionally omitting the :vlopt:`--prof-threads` option,
+and adding the profile.vlt generated earlier to the command line.
+
+Note there is no Verilator equivalent to GCC's --fprofile-use. Verilator's
+profile data file (profile.vlt) can be placed on the verilator command line
+directly without any prefix.
+
+If results from multiple simulations are to be used in generating the
+optimization, multiple simulation's profile.vlt may be concatenated
+externally, or each of the files may be fed as separate command line
+options into Verilator.  Verilator will simply sum the profile results, so
+a longer running test will have proportionally more weight for optimization
+than a shorter running test.
+
+If you provide any profile feedback data to Verilator, and it cannot use
+it, it will issue the :option:`PROFOUTOFDATE` warning that threads were
+scheduled using estimated costs.  This usually indicates that the profile
+data was generated from different Verilog source code than Verilator is
+currently running against. Therefore, repeat the data collection phase to
+create new profiling data, then rerun Verilator with the same input source
+files and that new profiling data.
+
+
+.. _Compiler PGO:
+
+Compiler Profile-Guided Optimization
+------------------------------------
+
+GCC and Clang support compiler profile-guided optimization (PGO). This
+optimizes any C/C++ program including Verilated code.  Using compiler PGO
+typically yields improvements of 5-15% on both single-threaded and
+multi-threaded models.
+
+To use compiler PGO with GCC or Clang, please see the appropriate compiler
+documentation.  The process in GCC 10 was as follows:
+
+1. Compile the Verilated model with the compiler's "-fprofile-generate"
+   flag:
+
+   .. code-block:: bash
+
+      verilator [whatever_flags] --make \
+          -CFLAGS -fprofile-generate -LDFLAGS -fprofile-generate
+
+   or, if calling make yourself, add -fprofile-generate appropriately to your
+   Makefile.
+
+2. Run your simulation. This will create \*.gcda file(s) in the same
+   directory as the source files.
+
+3. Recompile the model with -fprofile-use. The compiler will read the
+   \*.gcda file(s).
+
+   For GCC:
+
+   .. code-block:: bash
+
+      verilator [whatever_flags] --build \
+          -CFLAGS "-fprofile-use -fprofile-correction"
+
+   For Clang:
+
+   .. code-block:: bash
+
+      llvm-profdata merge -output default.profdata *.profraw
+      verilator [whatever_flags] --build \
+          -CFLAGS "-fprofile-use -fprofile-correction"
+
+   or, if calling make yourself, add these CFLAGS switches appropriately to
+   your Makefile.
+
+Clang and GCC also support -fauto-profile which uses sample-based
+feedback-directed optimization.  See the appropriate compiler
+documentation.
diff --git a/docs/guide/warnings.rst b/docs/guide/warnings.rst
index ee76aeece..abc4a1ca4 100644
--- a/docs/guide/warnings.rst
+++ b/docs/guide/warnings.rst
@@ -130,9 +130,10 @@ List Of Warnings
 
 .. option:: BADSTDPRAGMA
 
-   Error that a pragma is badly formed, when that pragma is defined by IEEE1800-2017.
-   For example, an empty `pragma line, or an incorrect specified '`pragma protect'.
-   Note that 3rd party pragmas not defined by IEEE1800-2017 are ignored.
+   Error that a pragma is badly formed, when that pragma is defined by IEEE
+   1800-2017.  For example, an empty pragma line, or an incorrect specified
+   'pragma protect'.  Note that 3rd party pragmas not defined by IEEE
+   1800-2017 are ignored.
 
 
 .. option:: BLKANDNBLK
@@ -481,39 +482,44 @@ List Of Warnings
 
    Faulty example:
 
-   .. code-block:: sv
+   .. include:: ../../docs/gen/ex_DIDNOTCONVERGE_faulty.rst
 
-         always_comb b = ~a;
-         always_comb a = b
+   Results in at runtime (not when Verilated):
 
-   This code will toggle forever, and thus to prevent an infinite loop, the
-   executable will give the didn't converge error.
+   .. include:: ../../docs/gen/ex_DIDNOTCONVERGE_nodbg_msg.rst
 
-   To debug this, first review any UNOPTFLAT warnings that were ignored.
-   Though typically it is safe to ignore UNOPTFLAT (at a performance cost),
-   at the time of issuing a UNOPTFLAT Verilator did not know if the logic
-   would eventually converge and assumed it would.
+   This is because the signals keep toggling even with out time
+   passing. Thus to prevent an infinite loop, the Verilated executable
+   gives the DIDNOTCONVERGE error.
+
+   To debug this, first review any UNOPT or UNOPTFLAT warnings that were
+   ignored.  Though typically it is safe to ignore UNOPTFLAT (at a
+   performance cost), at the time of issuing a UNOPTFLAT Verilator did not
+   know if the logic would eventually converge and assumed it would.
 
    Next, run Verilator with :vlopt:`--prof-cfuncs -CFLAGS -DVL_DEBUG
    <--prof-cfuncs>`.  Rerun the test.  Now just before the convergence
    error you should see additional output similar to this:
 
-   .. code-block::
+   .. include:: ../../docs/gen/ex_DIDNOTCONVERGE_msg.rst
 
-         CHANGE: filename.v:1: b
-         CHANGE: filename.v:2: a
-
-   This means that signal b and signal a keep changing, inspect the code
-   that modifies these signals.  Note if many signals are getting printed
-   then most likely all of them are oscillating.  It may also be that
-   e.g. "a" may be oscillating, then "a" feeds signal "c" which then is
-   also reported as oscillating.
+   The CHANGE line means that on the given filename and line number that
+   drove a signal, the signal 'a' kept changing. Inspect the code that
+   modifies these signals.  Note if many signals are getting printed then
+   most likely all of them are oscillating.  It may also be that e.g. "a"
+   may be oscillating, then "a" feeds signal "c" which then is also
+   reported as oscillating.
 
    One way DIDNOTCONVERGE may occur is flops are built out of gate
-   primitives.  error. Verilator does not support building flops or latches
-   out of gate primitives, and any such code must change to use behavioral
+   primitives. Verilator does not support building flops or latches out of
+   gate primitives, and any such code must change to use behavioral
    constructs (e.g. always_ff and always_latch).
 
+   Another way DIDNOTCONVERGE may occur is if # delays are used to generate
+   clocks.  Verilator ignores the delays and gives an :option:`ASSIGNDLY`
+   or :option:`STMTDLY` warning.  If these were suppressed, due to the
+   absense of the delay, the code may now oscillate.
+
    Finally, rare, more difficult cases can be debugged like a C++ program;
    either enter :command:`gdb` and use its tracing facilities, or edit the
    generated C++ code to add appropriate prints to see what is going on.
@@ -572,7 +578,7 @@ List Of Warnings
    must end in newline, as otherwise for example :command:`cat` with the
    file as an argument may produce undesirable results.
 
-   Repair by adding a newline to the end of the file.
+   Repair by appending a newline to the end of the file.
 
    Disabled by default as this is a code style warning; it will simulate
    correctly.
@@ -792,8 +798,8 @@ List Of Warnings
    .. TODO better example
 
    Warns that a module has multiple definitions.  Generally this indicates
-   a coding error, or a mistake in a library file and it's good practice to
-   have one module per file (and only put each file once on the command
+   a coding error, or a mistake in a library file, and it's good practice
+   to have one module per file (and only put each file once on the command
    line) to avoid these issues.  For some gate level netlists duplicates
    are sometimes unavoidable, and MODDUP should be disabled.
 
@@ -1005,9 +1011,25 @@ List Of Warnings
    a var/reg must be used as the target of procedural assignments.
 
 
+.. option:: PROFOUTOFDATE
+
+   Warns that threads were scheduled using estimated costs, despite the
+   fact that data was provided from profile-guided optimization (see
+   :ref:`Thread PGO`) as fed into Verilator using the
+   :option:`profile_data` configuration file option.  This usually
+   indicates that the profile data was generated from different Verilog
+   source code than Verilator is currently running against.
+
+   It is recommended to create new profiling data, then rerun Verilator
+   with the same input source files and that new profiling data.
+
+   Ignoring this warning may only slow simulations, it will simulate
+   correctly.
+
+
 .. option:: PROTECTED
 
-   Warning that a '`pragma protected' section was encountered. The code
+   Warning that a 'pragma protected' section was encountered. The code
    inside the protected region will be partly checked for correctness, but is
    otherwise ignored.
 
@@ -1183,9 +1205,25 @@ List Of Warnings
 
    Faulty example:
 
-   .. code-block:: sv
+   .. include:: ../../docs/gen/ex_STMTDLY_faulty.rst
 
-         #100 $finish;  //<--- Warning
+   Results in:
+
+   .. include:: ../../docs/gen/ex_STMTDLY_msg.rst
+
+   This is a warning because Verilator does not support delayed statements.
+   It will simply ignore all such delays.  In many cases ignoring a delay
+   might be harmless, but if the delayed statement is, as in this example,
+   used to cause some important action at a later time, it might be an
+   important difference.
+
+   Some possible work arounds:
+
+   * Move the delayed statement into the C++ wrapper file, where the
+     stimulus and clock generation can be done in C++.
+
+   * Convert the statement into a FSM, or other statement that tests
+     against $time.
 
 
 .. option:: SYMRSVDWORD
@@ -1353,7 +1391,9 @@ List Of Warnings
 
    Often UNOPTFLAT is caused by logic that isn't truly circular as viewed by
    synthesis which analyzes interconnection per-bit, but is circular to
-   simulation which analyzes per-bus:
+   simulation which analyzes per-bus.
+
+   Faulty example:
 
    .. code-block:: sv
 
@@ -1466,9 +1506,16 @@ List Of Warnings
    Error that a construct might be legal according to IEEE but is not
    currently supported by Verilator.
 
+   A typical workaround is to recode the construct into a simpler and more
+   common alternative language construct.
+
+   Alternatively, check if the construct is supported by other tools, and
+   if so please consider submitting a github pull request against the
+   Verilator sources to implement the missing unsupported feature.
+
    This error may be ignored with :vlopt:`--bbox-unsup`, however this will
-   make the design simulate incorrectly; see the details under
-   :vlopt:`--bbox-unsup`.
+   make the design simulate incorrectly and is only intended for lint
+   usage; see the details under :vlopt:`--bbox-unsup`.
 
 
 .. option:: UNUSED
@@ -1598,7 +1645,7 @@ List Of Warnings
    * A part select has a different size then needed to index into the
      packed or unpacked array (etc).
 
-   Verilator for attempts to track the minimum width of unsized constants
+   Verilator attempts to track the minimum width of unsized constants
    and will suppress the warning when the minimum width is appropriate to
    fit the required size.
 
@@ -1607,11 +1654,13 @@ List Of Warnings
 
    The recommendation is to fix these issues by:
 
-   * Resizing the variable or constant.  E.g. :code:`2'd2` instead of :code:`3'd2`.
+   * Resizing the variable or constant to match the needed size for the
+     expression.  E.g. :code:`2'd2` instead of :code:`3'd2`.
 
-   * Using :code:`'0` or :code:`'1`.
+   * Using :code:`'0` or :code:`'1` which automatically resize in an
+     expression.
 
-   * Using part select to narrow a variable. E.g. :code:`too_wide[1:0]`.
+   * Using part selects to narrow a variable. E.g. :code:`too_wide[1:0]`.
 
    * Using concatenate to widen a variable. E.g. :code:`{1'b1, too_narrow}`.
 
diff --git a/docs/internals.rst b/docs/internals.rst
index 39e3d3862..1b9b3e390 100644
--- a/docs/internals.rst
+++ b/docs/internals.rst
@@ -405,6 +405,9 @@ routines in the sources to rely more heavily on randomness, and
 generally try harder not to keep input nodes together when we have the
 option to scramble things.
 
+Profile-guided optimization make this a bit better, by adjusting mtask
+scheduling, but this does not yet guide the packing into mtasks.
+
 
 Performance Regression
 """"""""""""""""""""""
diff --git a/docs/spelling.txt b/docs/spelling.txt
index 278dd6b76..82c6984da 100644
--- a/docs/spelling.txt
+++ b/docs/spelling.txt
@@ -676,7 +676,6 @@ prev
 printf
 printtimescale
 profcfunc
-profcfuncs
 prototyptes
 ps
 pthread
diff --git a/include/verilated.cpp b/include/verilated.cpp
index f86f04852..fb349c41f 100644
--- a/include/verilated.cpp
+++ b/include/verilated.cpp
@@ -2258,6 +2258,7 @@ VerilatedContext::VerilatedContext()
     Verilated::lastContextp(this);
     Verilated::threadContextp(this);
     m_ns.m_profThreadsFilename = "profile_threads.dat";
+    m_ns.m_profVltFilename = "profile.vlt";
     m_fdps.resize(31);
     std::fill(m_fdps.begin(), m_fdps.end(), static_cast<FILE*>(nullptr));
     m_fdFreeMct.resize(30);
@@ -2340,6 +2341,14 @@ std::string VerilatedContext::profThreadsFilename() const VL_MT_SAFE {
     const VerilatedLockGuard lock{m_mutex};
     return m_ns.m_profThreadsFilename;
 }
+void VerilatedContext::profVltFilename(const std::string& flag) VL_MT_SAFE {
+    const VerilatedLockGuard lock{m_mutex};
+    m_ns.m_profVltFilename = flag;
+}
+std::string VerilatedContext::profVltFilename() const VL_MT_SAFE {
+    const VerilatedLockGuard lock{m_mutex};
+    return m_ns.m_profVltFilename;
+}
 void VerilatedContext::randReset(int val) VL_MT_SAFE {
     const VerilatedLockGuard lock{m_mutex};
     m_s.m_randReset = val;
@@ -2495,6 +2504,8 @@ void VerilatedContextImp::commandArgVl(const std::string& arg) {
             profThreadsWindow(std::atol(value.c_str()));
         } else if (commandArgVlValue(arg, "+verilator+prof+threads+file+", value /*ref*/)) {
             profThreadsFilename(value);
+        } else if (commandArgVlValue(arg, "+verilator+prof+vlt+file+", value /*ref*/)) {
+            profVltFilename(value);
         } else if (commandArgVlValue(arg, "+verilator+rand+reset+", value /*ref*/)) {
             randReset(std::atoi(value.c_str()));
         } else if (commandArgVlValue(arg, "+verilator+seed+", value /*ref*/)) {
diff --git a/include/verilated.h b/include/verilated.h
index 1866e34fb..c6d6815c6 100644
--- a/include/verilated.h
+++ b/include/verilated.h
@@ -346,6 +346,7 @@ protected:
         vluint32_t m_profThreadsWindow = 2;  // +prof+threads window size
         // Slow path
         std::string m_profThreadsFilename;  // +prof+threads filename
+        std::string m_profVltFilename;  // +prof+vlt filename
     } m_ns;
 
     mutable VerilatedMutex m_argMutex;  // Protect m_argVec, m_argVecLoaded
@@ -522,6 +523,8 @@ public:  // But for internal use only
     vluint32_t profThreadsWindow() const VL_MT_SAFE { return m_ns.m_profThreadsWindow; }
     void profThreadsFilename(const std::string& flag) VL_MT_SAFE;
     std::string profThreadsFilename() const VL_MT_SAFE;
+    void profVltFilename(const std::string& flag) VL_MT_SAFE;
+    std::string profVltFilename() const VL_MT_SAFE;
 
     // Internal: Find scope
     const VerilatedScope* scopeFind(const char* namep) const VL_MT_SAFE;
diff --git a/include/verilated.mk.in b/include/verilated.mk.in
index 39e263fa3..150196d0f 100644
--- a/include/verilated.mk.in
+++ b/include/verilated.mk.in
@@ -23,8 +23,6 @@ CFG_WITH_LONGTESTS = @CFG_WITH_LONGTESTS@
 CFG_CXXFLAGS_PROFILE = @CFG_CXXFLAGS_PROFILE@
 # Select newest language
 CFG_CXXFLAGS_STD_NEWEST = @CFG_CXXFLAGS_STD_NEWEST@
-# Select oldest language (for Verilator internal testing only)
-CFG_CXXFLAGS_STD_OLDEST = @CFG_CXXFLAGS_STD_OLDEST@
 # Compiler flags to use to turn off unused and generated code warnings, such as -Wno-div-by-zero
 CFG_CXXFLAGS_NO_UNUSED = @CFG_CXXFLAGS_NO_UNUSED@
 # Compiler flags that turn on extra warnings
diff --git a/include/verilated_cov.cpp b/include/verilated_cov.cpp
index 9f04c5429..af6080275 100644
--- a/include/verilated_cov.cpp
+++ b/include/verilated_cov.cpp
@@ -364,7 +364,7 @@ public:
 
         std::ofstream os{filename};
         if (os.fail()) {
-            std::string msg = std::string{"%Error: Can't write '"} + filename + "'";
+            const std::string msg = std::string{"%Error: Can't write '"} + filename + "'";
             VL_FATAL_MT("", 0, "", msg.c_str());
             return;
         }
diff --git a/include/verilated_funcs.h b/include/verilated_funcs.h
index 0c11c6551..cddc44842 100644
--- a/include/verilated_funcs.h
+++ b/include/verilated_funcs.h
@@ -82,9 +82,12 @@ extern IData VL_RANDOM_SEEDED_II(int obits, IData seed) VL_MT_SAFE;
 inline IData VL_URANDOM_RANGE_I(IData hi, IData lo) {
     vluint64_t rnd = vl_rand64();
     if (VL_LIKELY(hi > lo)) {
+        // (hi - lo + 1) can be zero when hi is UINT_MAX and lo is zero
+        if (VL_UNLIKELY(hi - lo + 1 == 0)) return rnd;
         // Modulus isn't very fast but it's common that hi-low is power-of-two
         return (rnd % (hi - lo + 1)) + lo;
     } else {
+        if (VL_UNLIKELY(lo - hi + 1 == 0)) return rnd;
         return (rnd % (lo - hi + 1)) + hi;
     }
 }
diff --git a/include/verilated_imp.h b/include/verilated_imp.h
index 4c30cbafc..f63b158d0 100644
--- a/include/verilated_imp.h
+++ b/include/verilated_imp.h
@@ -549,8 +549,8 @@ public:  // But only for verilated*.cpp
         const VerilatedLockGuard lock{s().m_exportMutex};
         const auto& it = s().m_exportMap.find(namep);
         if (VL_LIKELY(it != s().m_exportMap.end())) return it->second;
-        std::string msg = (std::string{"%Error: Testbench C called "} + namep
-                           + " but no such DPI export function name exists in ANY model");
+        const std::string msg = (std::string{"%Error: Testbench C called "} + namep
+                                 + " but no such DPI export function name exists in ANY model");
         VL_FATAL_MT("unknown", 0, "", msg.c_str());
         return -1;
     }
diff --git a/include/verilated_profiler.h b/include/verilated_profiler.h
new file mode 100644
index 000000000..129d0870c
--- /dev/null
+++ b/include/verilated_profiler.h
@@ -0,0 +1,113 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//=============================================================================
+//
+// Code available from: https://verilator.org
+//
+// Copyright 2012-2021 by Wilson Snyder. This program is free software; you
+// can redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+//
+//=============================================================================
+///
+/// \file
+/// \brief Verilated general profiling header
+///
+/// This file is not part of the Verilated public-facing API.
+/// It is only for internal use by Verilated library routines.
+///
+//=============================================================================
+
+#ifndef VERILATOR_VERILATED_PROFILER_H_
+#define VERILATOR_VERILATED_PROFILER_H_
+
+#include "verilatedos.h"
+#include "verilated.h"  // for VerilatedMutex and clang annotations
+
+// Profile record, private class used only by this header
+class VerilatedProfilerRec final {
+    std::string m_name;  // Hashed name of mtask/etc
+    size_t m_counterNumber = 0;  // Which counter has data
+public:
+    // METHODS
+    VerilatedProfilerRec(size_t counterNumber, const std::string& name)
+        : m_name{name}
+        , m_counterNumber{counterNumber} {}
+    VerilatedProfilerRec() = default;
+    size_t counterNumber() const { return m_counterNumber; }
+    std::string name() const { return m_name; }
+};
+
+// Create some number of bucketed profilers
+template <std::size_t T_Entries> class VerilatedProfiler final {
+    // Counters are stored packed, all together, versus in VerilatedProfilerRec to
+    // reduce cache effects
+    std::array<vluint64_t, T_Entries> m_counters{};  // Time spent on this record
+    std::deque<VerilatedProfilerRec> m_records;  // Record information
+
+public:
+    // METHODS
+    VerilatedProfiler() = default;
+    ~VerilatedProfiler() = default;
+    void write(const char* modelp, const std::string& filename) VL_MT_SAFE;
+    void addCounter(size_t counter, const std::string& name) {
+        VL_DEBUG_IF(assert(counter < T_Entries););
+        m_records.emplace_back(VerilatedProfilerRec{counter, name});
+    }
+    void startCounter(size_t counter) {
+        vluint64_t val;
+        VL_RDTSC(val);
+        // -= so when we add end time in stopCounter, we already subtracted
+        // out, without needing to hold another temporary
+        m_counters[counter] -= val;
+    }
+    void stopCounter(size_t counter) {
+        vluint64_t val;
+        VL_RDTSC(val);
+        m_counters[counter] += val;
+    }
+};
+
+template <std::size_t T_Entries>
+void VerilatedProfiler<T_Entries>::write(const char* modelp,
+                                         const std::string& filename) VL_MT_SAFE {
+    static VerilatedMutex s_mutex;
+    const VerilatedLockGuard lock{s_mutex};
+
+    // On the first call we create the file.  On later calls we append.
+    // So when we have multiple models in an executable, possibly even
+    // running on different threads, each will have a different symtab so
+    // each will collect is own data correctly.  However when each is
+    // destroid we need to get all the data, not keep overwriting and only
+    // get the last model's data.
+    static bool s_firstCall = true;
+
+    VL_DEBUG_IF(VL_DBG_MSGF("+prof+vlt+file writing to '%s'\n", filename.c_str()););
+
+    FILE* fp = nullptr;
+    if (!s_firstCall) fp = std::fopen(filename.c_str(), "a");
+    if (VL_UNLIKELY(!fp))
+        fp = std::fopen(filename.c_str(), "w");  // firstCall, or doesn't exist yet
+    if (VL_UNLIKELY(!fp)) {
+        VL_FATAL_MT(filename.c_str(), 0, "", "+prof+vlt+file file not writable");
+        // cppcheck-suppress resourceLeak   // bug, doesn't realize fp is nullptr
+        return;  // LCOV_EXCL_LINE
+    }
+    s_firstCall = false;
+
+    // TODO Perhaps merge with verilated_coverage output format, so can
+    // have a common merging and reporting tool, etc.
+    fprintf(fp, "// Verilated model profile-guided optimization data dump file\n");
+    fprintf(fp, "`verilator_config\n");
+
+    for (const auto& it : m_records) {
+        const std::string& name = it.name();
+        fprintf(fp, "profile_data -model \"%s\" -mtask \"%s\" -cost 64'd%" VL_PRI64 "u\n", modelp,
+                name.c_str(), m_counters[it.counterNumber()]);
+    }
+
+    std::fclose(fp);
+}
+
+#endif
diff --git a/include/verilated_save.cpp b/include/verilated_save.cpp
index 2fec9808d..eada7a572 100644
--- a/include/verilated_save.cpp
+++ b/include/verilated_save.cpp
@@ -204,7 +204,7 @@ void VerilatedSave::flush() VL_MT_UNSAFE_ONE {
             if (VL_UNCOVERABLE(errno != EAGAIN && errno != EINTR)) {
                 // LCOV_EXCL_START
                 // write failed, presume error (perhaps out of disk space)
-                std::string msg = std::string{__FUNCTION__} + ": " + std::strerror(errno);
+                const std::string msg = std::string{__FUNCTION__} + ": " + std::strerror(errno);
                 VL_FATAL_MT("", 0, "", msg.c_str());
                 close();
                 break;
diff --git a/include/verilated_threads.cpp b/include/verilated_threads.cpp
index 606985494..edfa8891b 100644
--- a/include/verilated_threads.cpp
+++ b/include/verilated_threads.cpp
@@ -25,6 +25,7 @@
 #include "verilated_threads.h"
 
 #include <cstdio>
+#include <fstream>
 
 //=============================================================================
 // Globals
@@ -145,7 +146,7 @@ void VlThreadPool::profileAppendAll(const VlProfileRec& rec) VL_MT_SAFE_EXCLUDES
     }
 }
 
-void VlThreadPool::profileDump(const char* filenamep, vluint64_t ticksElapsed)
+void VlThreadPool::profileDump(const char* filenamep, vluint64_t tickStart, vluint64_t tickEnd)
     VL_MT_SAFE_EXCLUDES(m_mutex) {
     const VerilatedLockGuard lock{m_mutex};
     VL_DEBUG_IF(VL_DBG_MSGF("+prof+threads writing to '%s'\n", filenamep););
@@ -159,7 +160,7 @@ void VlThreadPool::profileDump(const char* filenamep, vluint64_t ticksElapsed)
 
     // TODO Perhaps merge with verilated_coverage output format, so can
     // have a common merging and reporting tool, etc.
-    fprintf(fp, "VLPROFTHREAD 1.0 # Verilator thread profile dump version 1.0\n");
+    fprintf(fp, "VLPROFTHREAD 1.1 # Verilator thread profile dump version 1.1\n");
     fprintf(fp, "VLPROF arg --threads %" VL_PRI64 "u\n", vluint64_t(m_workers.size() + 1));
     fprintf(fp, "VLPROF arg +verilator+prof+threads+start+%" VL_PRI64 "u\n",
             Verilated::threadContextp()->profThreadsStart());
@@ -167,6 +168,16 @@ void VlThreadPool::profileDump(const char* filenamep, vluint64_t ticksElapsed)
             Verilated::threadContextp()->profThreadsWindow());
     fprintf(fp, "VLPROF stat yields %" VL_PRI64 "u\n", VlMTaskVertex::yields());
 
+    // Copy /proc/cpuinfo into this output so verilator_gantt can be run on
+    // a different machine
+    {
+        const std::unique_ptr<std::ifstream> ifp{new std::ifstream("/proc/cpuinfo")};
+        if (!ifp->fail()) {
+            std::string line;
+            while (std::getline(*ifp, line)) { fprintf(fp, "VLPROFPROC %s\n", line.c_str()); }
+        }
+    }
+
     vluint32_t thread_id = 0;
     for (const auto& pi : m_allProfiles) {
         ++thread_id;
@@ -177,20 +188,36 @@ void VlThreadPool::profileDump(const char* filenamep, vluint64_t ticksElapsed)
             case VlProfileRec::TYPE_BARRIER:  //
                 printing = true;
                 break;
+            case VlProfileRec::TYPE_EVAL:
+                if (!printing) break;
+                fprintf(fp,
+                        "VLPROF eval start %" VL_PRI64 "u elapsed %" VL_PRI64 "u"
+                        " cpu %u on thread %u\n",
+                        ei.m_startTime - tickStart, (ei.m_endTime - ei.m_startTime), ei.m_cpu,
+                        thread_id);
+                break;
+            case VlProfileRec::TYPE_EVAL_LOOP:
+                if (!printing) break;
+                fprintf(fp,
+                        "VLPROF eval_loop start %" VL_PRI64 "u elapsed %" VL_PRI64 "u"
+                        " cpu %u on thread %u\n",
+                        ei.m_startTime - tickStart, (ei.m_endTime - ei.m_startTime), ei.m_cpu,
+                        thread_id);
+                break;
             case VlProfileRec::TYPE_MTASK_RUN:
                 if (!printing) break;
                 fprintf(fp,
                         "VLPROF mtask %d"
-                        " start %" VL_PRI64 "u end %" VL_PRI64 "u elapsed %" VL_PRI64 "u"
-                        " predict_time %u cpu %u on thread %u\n",
-                        ei.m_mtaskId, ei.m_startTime, ei.m_endTime,
-                        (ei.m_endTime - ei.m_startTime), ei.m_predictTime, ei.m_cpu, thread_id);
+                        " start %" VL_PRI64 "u elapsed %" VL_PRI64 "u"
+                        " predict_start %u predict_cost %u cpu %u on thread %u\n",
+                        ei.m_mtaskId, ei.m_startTime - tickStart, (ei.m_endTime - ei.m_startTime),
+                        ei.m_predictStart, ei.m_predictCost, ei.m_cpu, thread_id);
                 break;
             default: assert(false); break;  // LCOV_EXCL_LINE
             }
         }
     }
-    fprintf(fp, "VLPROF stat ticks %" VL_PRI64 "u\n", ticksElapsed);
+    fprintf(fp, "VLPROF stat ticks %" VL_PRI64 "u\n", tickEnd - tickStart);
 
     std::fclose(fp);
 }
diff --git a/include/verilated_threads.h b/include/verilated_threads.h
index 150168f80..e07e4418f 100644
--- a/include/verilated_threads.h
+++ b/include/verilated_threads.h
@@ -131,21 +131,36 @@ public:
 class VlProfileRec final {
 protected:
     friend class VlThreadPool;
-    enum VlProfileE { TYPE_MTASK_RUN, TYPE_BARRIER };
-    VlProfileE m_type = TYPE_BARRIER;  // Record type
-    vluint32_t m_mtaskId = 0;  // Mtask we're logging
-    vluint32_t m_predictTime = 0;  // How long scheduler predicted would take
-    vluint64_t m_startTime = 0;  // Tick at start of execution
+    enum VlProfileE { TYPE_MTASK_RUN, TYPE_EVAL, TYPE_EVAL_LOOP, TYPE_BARRIER };
+    // Layout below allows efficient packing.
+    // Leave endTime first, so no math needed to calculate address in endRecord
     vluint64_t m_endTime = 0;  // Tick at end of execution
+    vluint64_t m_startTime = 0;  // Tick at start of execution
+    vluint32_t m_mtaskId = 0;  // Mtask we're logging
+    vluint32_t m_predictStart = 0;  // Time scheduler predicted would start
+    vluint32_t m_predictCost = 0;  // How long scheduler predicted would take
+    VlProfileE m_type = TYPE_BARRIER;  // Record type
     unsigned m_cpu;  // Execution CPU number (at start anyways)
 public:
     class Barrier {};
     VlProfileRec() = default;
     explicit VlProfileRec(Barrier) { m_cpu = getcpu(); }
-    void startRecord(vluint64_t time, uint32_t mtask, uint32_t predict) {
+    void startEval(vluint64_t time) {
+        m_type = VlProfileRec::TYPE_EVAL;
+        m_startTime = time;
+        m_cpu = getcpu();
+    }
+    void startEvalLoop(vluint64_t time) {
+        m_type = VlProfileRec::TYPE_EVAL_LOOP;
+        m_startTime = time;
+        m_cpu = getcpu();
+    }
+    void startRecord(vluint64_t time, vluint32_t mtask, vluint32_t predictStart,
+                     vluint32_t predictCost) {
         m_type = VlProfileRec::TYPE_MTASK_RUN;
         m_mtaskId = mtask;
-        m_predictTime = predict;
+        m_predictStart = predictStart;
+        m_predictCost = predictCost;
         m_startTime = time;
         m_cpu = getcpu();
     }
@@ -292,7 +307,8 @@ public:
         return &(t_profilep->back());
     }
     void profileAppendAll(const VlProfileRec& rec) VL_MT_SAFE_EXCLUDES(m_mutex);
-    void profileDump(const char* filenamep, vluint64_t ticksElapsed) VL_MT_SAFE_EXCLUDES(m_mutex);
+    void profileDump(const char* filenamep, vluint64_t tickStart, vluint64_t tickEnd)
+        VL_MT_SAFE_EXCLUDES(m_mutex);
     // In profiling mode, each executing thread must call
     // this once to setup profiling state:
     void setupProfilingClientThread() VL_MT_SAFE_EXCLUDES(m_mutex);
diff --git a/include/verilated_trace_imp.cpp b/include/verilated_trace_imp.cpp
index 07e60f7a3..ba8e43110 100644
--- a/include/verilated_trace_imp.cpp
+++ b/include/verilated_trace_imp.cpp
@@ -481,8 +481,8 @@ void VerilatedTrace<VL_DERIVED_T>::addCallbackRecord(std::vector<CallbackRecord>
     VL_MT_SAFE_EXCLUDES(m_mutex) {
     const VerilatedLockGuard lock{m_mutex};
     if (VL_UNCOVERABLE(timeLastDump() != 0)) {  // LCOV_EXCL_START
-        std::string msg = (std::string{"Internal: "} + __FILE__ + "::" + __FUNCTION__
-                           + " called with already open file");
+        const std::string msg = (std::string{"Internal: "} + __FILE__ + "::" + __FUNCTION__
+                                 + " called with already open file");
         VL_FATAL_MT(__FILE__, __LINE__, "", msg.c_str());
     }  // LCOV_EXCL_STOP
     cbVec.push_back(cbRec);
diff --git a/include/verilatedos.h b/include/verilatedos.h
index 1f64bf63c..9a1f50a75 100644
--- a/include/verilatedos.h
+++ b/include/verilatedos.h
@@ -472,7 +472,12 @@ typedef unsigned long long vluint64_t;  ///< 64-bit unsigned type
         (val) = ((vluint64_t)lo) | (((vluint64_t)hi) << 32); \
     }
 #elif defined(__aarch64__)
-# define VL_RDTSC(val) asm volatile("mrs %[rt],PMCCNTR_EL0" : [rt] "=r"(val));
+// 1 GHz virtual system timer on SBSA level 5 compliant systems, else often 100 MHz
+# define VL_RDTSC(val) \
+    { \
+        asm volatile("isb" : : : "memory"); \
+        asm volatile("mrs %[rt],CNTVCT_EL0" : [rt] "=r"(val)); \
+    }
 #else
 // We just silently ignore unknown OSes, as only leads to missing statistics
 # define VL_RDTSC(val) (val) = 0;
diff --git a/src/V3Ast.cpp b/src/V3Ast.cpp
index 356c8b201..e0bbe975b 100644
--- a/src/V3Ast.cpp
+++ b/src/V3Ast.cpp
@@ -230,7 +230,8 @@ string AstNode::prettyTypeName() const {
 //######################################################################
 // Insertion
 
-inline void AstNode::debugTreeChange(const char* prefix, int lineno, bool next){
+inline void AstNode::debugTreeChange(const AstNode* nodep, const char* prefix, int lineno,
+                                     bool next){
 #ifdef VL_DEBUG
 // Called on all major tree changers.
 // Only for use for those really nasty bugs relating to internals
@@ -254,8 +255,8 @@ inline void AstNode::debugTreeChange(const char* prefix, int lineno, bool next){
 AstNode* AstNode::addNext(AstNode* nodep, AstNode* newp) {
     // Add to m_nextp, returns this
     UDEBUGONLY(UASSERT_OBJ(newp, nodep, "Null item passed to addNext"););
-    nodep->debugTreeChange("-addNextThs: ", __LINE__, false);
-    newp->debugTreeChange("-addNextNew: ", __LINE__, true);
+    debugTreeChange(nodep, "-addNextThs: ", __LINE__, false);
+    debugTreeChange(newp, "-addNextNew: ", __LINE__, true);
     if (!nodep) {  // verilog.y and lots of other places assume this
         return newp;
     } else {
@@ -285,7 +286,7 @@ AstNode* AstNode::addNext(AstNode* nodep, AstNode* newp) {
         newp->editCountInc();
         if (oldtailp->m_iterpp) *(oldtailp->m_iterpp) = newp;  // Iterate on new item
     }
-    nodep->debugTreeChange("-addNextOut:", __LINE__, true);
+    debugTreeChange(nodep, "-addNextOut:", __LINE__, true);
     return nodep;
 }
 
@@ -300,8 +301,8 @@ void AstNode::addNextHere(AstNode* newp) {
     //  New  could be head of single node, or list
     UASSERT(newp, "Null item passed to addNext");
     UASSERT(!newp->backp(), "New node (back) already assigned?");
-    this->debugTreeChange("-addHereThs: ", __LINE__, false);
-    newp->debugTreeChange("-addHereNew: ", __LINE__, true);
+    debugTreeChange(this, "-addHereThs: ", __LINE__, false);
+    debugTreeChange(newp, "-addHereNew: ", __LINE__, true);
     newp->editCountInc();
 
     AstNode* addlastp = newp->m_headtailp;  // Last node in list to be added
@@ -339,7 +340,7 @@ void AstNode::addNextHere(AstNode* newp) {
     }
 
     if (this->m_iterpp) *(this->m_iterpp) = newp;  // Iterate on new item
-    this->debugTreeChange("-addHereOut: ", __LINE__, true);
+    debugTreeChange(this, "-addHereOut: ", __LINE__, true);
 }
 
 void AstNode::setOp1p(AstNode* newp) {
@@ -347,12 +348,12 @@ void AstNode::setOp1p(AstNode* newp) {
     UDEBUGONLY(UASSERT_OBJ(!m_op1p, this, "Adding to non-empty, non-list op1"););
     UDEBUGONLY(UASSERT_OBJ(!newp->m_backp, newp, "Adding already linked node"););
     UDEBUGONLY(UASSERT_OBJ(!newp->m_nextp, newp, "Adding list to non-list op1"););
-    this->debugTreeChange("-setOp1pThs: ", __LINE__, false);
-    newp->debugTreeChange("-setOp1pNew: ", __LINE__, true);
+    debugTreeChange(this, "-setOp1pThs: ", __LINE__, false);
+    debugTreeChange(newp, "-setOp1pNew: ", __LINE__, true);
     m_op1p = newp;
     newp->editCountInc();
     newp->m_backp = this;
-    this->debugTreeChange("-setOp1pOut: ", __LINE__, false);
+    debugTreeChange(this, "-setOp1pOut: ", __LINE__, false);
 }
 
 void AstNode::setOp2p(AstNode* newp) {
@@ -360,12 +361,12 @@ void AstNode::setOp2p(AstNode* newp) {
     UDEBUGONLY(UASSERT_OBJ(!m_op2p, this, "Adding to non-empty, non-list op2"););
     UDEBUGONLY(UASSERT_OBJ(!newp->m_backp, newp, "Adding already linked node"););
     UDEBUGONLY(UASSERT_OBJ(!newp->m_nextp, newp, "Adding list to non-list op2"););
-    this->debugTreeChange("-setOp2pThs: ", __LINE__, false);
-    newp->debugTreeChange("-setOp2pNew: ", __LINE__, true);
+    debugTreeChange(this, "-setOp2pThs: ", __LINE__, false);
+    debugTreeChange(newp, "-setOp2pNew: ", __LINE__, true);
     m_op2p = newp;
     newp->editCountInc();
     newp->m_backp = this;
-    this->debugTreeChange("-setOp2pOut: ", __LINE__, false);
+    debugTreeChange(this, "-setOp2pOut: ", __LINE__, false);
 }
 
 void AstNode::setOp3p(AstNode* newp) {
@@ -373,12 +374,12 @@ void AstNode::setOp3p(AstNode* newp) {
     UDEBUGONLY(UASSERT_OBJ(!m_op3p, this, "Adding to non-empty, non-list op3"););
     UDEBUGONLY(UASSERT_OBJ(!newp->m_backp, newp, "Adding already linked node"););
     UDEBUGONLY(UASSERT_OBJ(!newp->m_nextp, newp, "Adding list to non-list op3"););
-    this->debugTreeChange("-setOp3pThs: ", __LINE__, false);
-    newp->debugTreeChange("-setOp3pNew: ", __LINE__, true);
+    debugTreeChange(this, "-setOp3pThs: ", __LINE__, false);
+    debugTreeChange(newp, "-setOp3pNew: ", __LINE__, true);
     m_op3p = newp;
     newp->editCountInc();
     newp->m_backp = this;
-    this->debugTreeChange("-setOp3pOut: ", __LINE__, false);
+    debugTreeChange(this, "-setOp3pOut: ", __LINE__, false);
 }
 
 void AstNode::setOp4p(AstNode* newp) {
@@ -386,12 +387,12 @@ void AstNode::setOp4p(AstNode* newp) {
     UDEBUGONLY(UASSERT_OBJ(!m_op4p, this, "Adding to non-empty, non-list op4"););
     UDEBUGONLY(UASSERT_OBJ(!newp->m_backp, newp, "Adding already linked node"););
     UDEBUGONLY(UASSERT_OBJ(!newp->m_nextp, newp, "Adding list to non-list op4"););
-    this->debugTreeChange("-setOp4pThs: ", __LINE__, false);
-    newp->debugTreeChange("-setOp4pNew: ", __LINE__, true);
+    debugTreeChange(this, "-setOp4pThs: ", __LINE__, false);
+    debugTreeChange(newp, "-setOp4pNew: ", __LINE__, true);
     m_op4p = newp;
     newp->editCountInc();
     newp->m_backp = this;
-    this->debugTreeChange("-setOp4pOut: ", __LINE__, false);
+    debugTreeChange(this, "-setOp4pOut: ", __LINE__, false);
 }
 
 void AstNode::addOp1p(AstNode* newp) {
@@ -453,7 +454,7 @@ void AstNRelinker::dump(std::ostream& str) const {
 }
 
 AstNode* AstNode::unlinkFrBackWithNext(AstNRelinker* linkerp) {
-    this->debugTreeChange("-unlinkWNextThs: ", __LINE__, true);
+    debugTreeChange(this, "-unlinkWNextThs: ", __LINE__, true);
     AstNode* oldp = this;
     UASSERT(oldp->m_backp, "Node has no back, already unlinked?");
     oldp->editCountInc();
@@ -508,12 +509,12 @@ AstNode* AstNode::unlinkFrBackWithNext(AstNRelinker* linkerp) {
     // Iterator fixup
     if (oldp->m_iterpp) *(oldp->m_iterpp) = nullptr;
     oldp->m_iterpp = nullptr;
-    oldp->debugTreeChange("-unlinkWNextOut: ", __LINE__, true);
+    debugTreeChange(oldp, "-unlinkWNextOut: ", __LINE__, true);
     return oldp;
 }
 
 AstNode* AstNode::unlinkFrBack(AstNRelinker* linkerp) {
-    this->debugTreeChange("-unlinkFrBkThs: ", __LINE__, true);
+    debugTreeChange(this, "-unlinkFrBkThs: ", __LINE__, true);
     AstNode* oldp = this;
     UASSERT(oldp->m_backp, "Node has no back, already unlinked?");
     oldp->editCountInc();
@@ -572,7 +573,7 @@ AstNode* AstNode::unlinkFrBack(AstNRelinker* linkerp) {
     oldp->m_backp = nullptr;
     oldp->m_headtailp = this;
     oldp->m_iterpp = nullptr;
-    oldp->debugTreeChange("-unlinkFrBkOut: ", __LINE__, true);
+    debugTreeChange(oldp, "-unlinkFrBkOut: ", __LINE__, true);
     return oldp;
 }
 
@@ -592,8 +593,8 @@ void AstNode::relink(AstNRelinker* linkerp) {
     }
 
     AstNode* backp = linkerp->m_backp;
-    this->debugTreeChange("-relinkNew: ", __LINE__, true);
-    backp->debugTreeChange("-relinkTre: ", __LINE__, true);
+    debugTreeChange(this, "-relinkNew: ", __LINE__, true);
+    debugTreeChange(backp, "-relinkTre: ", __LINE__, true);
 
     switch (linkerp->m_chg) {
     case AstNRelinker::RELINK_NEXT: backp->addNextHere(newp); break;
@@ -618,7 +619,7 @@ void AstNode::relink(AstNRelinker* linkerp) {
     }
     // Empty the linker so not used twice accidentally
     linkerp->m_backp = nullptr;
-    this->debugTreeChange("-relinkOut: ", __LINE__, true);
+    debugTreeChange(this, "-relinkOut: ", __LINE__, true);
 }
 
 void AstNode::relinkOneLink(AstNode*& pointpr,  // Ref to pointer that gets set to newp
@@ -700,7 +701,7 @@ AstNode* AstNode::cloneTreeIterList() {
 }
 
 AstNode* AstNode::cloneTree(bool cloneNextLink) {
-    this->debugTreeChange("-cloneThs: ", __LINE__, cloneNextLink);
+    debugTreeChange(this, "-cloneThs: ", __LINE__, cloneNextLink);
     cloneClearTree();
     AstNode* newp;
     if (cloneNextLink && this->m_nextp) {
@@ -712,7 +713,7 @@ AstNode* AstNode::cloneTree(bool cloneNextLink) {
     }
     newp->m_backp = nullptr;
     newp->cloneRelinkTree();
-    newp->debugTreeChange("-cloneOut: ", __LINE__, true);
+    debugTreeChange(newp, "-cloneOut: ", __LINE__, true);
     return newp;
 }
 
@@ -764,7 +765,7 @@ void AstNode::deleteTree() {
     // deleteTree always deletes the next link, because you must have called
     // unlinkFromBack or unlinkFromBackWithNext as appropriate before calling this.
     UASSERT(!m_backp, "Delete called on node with backlink still set");
-    this->debugTreeChange("-delTree:  ", __LINE__, true);
+    debugTreeChange(this, "-delTree:  ", __LINE__, true);
     this->editCountInc();
     // MUST be depth first!
     deleteTreeIter();
diff --git a/src/V3Ast.h b/src/V3Ast.h
index 3351a1b54..4fb190666 100644
--- a/src/V3Ast.h
+++ b/src/V3Ast.h
@@ -60,15 +60,21 @@ using MTaskIdSet = std::set<int>;  // Set of mtaskIds for Var sorting
         if (VL_UNCOVERABLE(reasonp)) return reasonp; \
     } while (false)
 
-// (V)erilator (N)ode is: True if AstNode is of a a given AstType
+// (V)erilator (N)ode is: Returns true iff AstNode is of the given AstNode subtype, and not
+// nullptr.
 #define VN_IS(nodep, nodetypename) (AstNode::privateIs<Ast##nodetypename>(nodep))
 
-// (V)erilator (N)ode cast: Cast to given type if can; effectively
-// dynamic_cast<nodetypename>(nodep)
+// (V)erilator (N)ode cast: More efficient but otherwise same as dynamic_cast, use this instead.
+// Cast to given type if node is of such type, otherwise returns nullptr.
 #define VN_CAST(nodep, nodetypename) (AstNode::privateCast<Ast##nodetypename>(nodep))
-#define VN_CAST_CONST(nodep, nodetypename) (AstNode::privateConstCast<Ast##nodetypename>(nodep))
+#define VN_CAST_CONST(nodep, nodetypename) (AstNode::privateCastConst<Ast##nodetypename>(nodep))
 
-// (V)erilator (N)ode deleted: Reference to deleted child (for assertions only)
+// (V)erilator (N)ode as: Assert node is of given type then cast to that type. Node can be nullptr.
+// Use this to downcast instead of VN_CAST when you know the true type of the node.
+#define VN_AS(nodep, nodetypename) (AstNode::privateAs<Ast##nodetypename>(nodep))
+#define VN_AS_CONST(nodep, nodetypename) (AstNode::privateAsConst<Ast##nodetypename>(nodep))
+
+// (V)erilator (N)ode deleted: Pointer to deleted AstNode (for assertions only)
 #define VN_DELETED(nodep) VL_UNLIKELY((vluint64_t)(nodep) == 0x1)
 
 //######################################################################
@@ -1437,7 +1443,7 @@ private:
 public:
     static void relinkOneLink(AstNode*& pointpr, AstNode* newp);
     // cppcheck-suppress functionConst
-    void debugTreeChange(const char* prefix, int lineno, bool next);
+    static void debugTreeChange(const AstNode* nodep, const char* prefix, int lineno, bool next);
 
 protected:
     // CONSTRUCTORS
@@ -1838,16 +1844,36 @@ protected:
 private:
     void iterateListBackwards(AstNVisitor& v);
 
-    // CONVERSION
+    // For internal use only.
+    template <typename T> inline static bool privateTypeTest(const AstNode* nodep);
+
 public:
-    // These for use by VN_IS macro only
-    template <class T> static bool privateIs(const AstNode* nodep);
-
-    // These for use by VN_CAST macro only
-    template <class T> static T* privateCast(AstNode* nodep);
-
-    // These for use by VN_CAST_CONST macro only
-    template <class T> static const T* privateConstCast(const AstNode* nodep);
+    // For use via the VN_IS macro only
+    template <typename T> inline static bool privateIs(const AstNode* nodep) {
+        return nodep && privateTypeTest<T>(nodep);
+    }
+    // For use via the VN_CAST macro only
+    template <typename T> inline static T* privateCast(AstNode* nodep) {
+        return privateIs<T>(nodep) ? reinterpret_cast<T*>(nodep) : nullptr;
+    }
+    // For use via the VN_CAST_CONST macro only
+    template <typename T> inline static const T* privateCastConst(const AstNode* nodep) {
+        return privateIs<T>(nodep) ? reinterpret_cast<const T*>(nodep) : nullptr;
+    }
+    // For use via the VN_AS macro only
+    template <typename T> inline static T* privateAs(AstNode* nodep) {
+        UASSERT_OBJ(!nodep || privateTypeTest<T>(nodep), nodep,
+                    "AstNode is not of expected type, but instead has type '" << nodep->typeName()
+                                                                              << "'");
+        return reinterpret_cast<T*>(nodep);
+    }
+    // For use via the VN_AS_CONST macro only
+    template <typename T> inline static const T* privateAsConst(const AstNode* nodep) {
+        UASSERT_OBJ(!nodep || privateTypeTest<T>(nodep), nodep,
+                    "AstNode is not of expected type, but instead has type '" << nodep->typeName()
+                                                                              << "'");
+        return reinterpret_cast<const T*>(nodep);
+    }
 };
 
 // Specialisations of privateIs/privateCast
diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp
index ea3ecdf8e..abc85275f 100644
--- a/src/V3AstNodes.cpp
+++ b/src/V3AstNodes.cpp
@@ -1338,7 +1338,7 @@ void AstNodeCoverOrAssert::dump(std::ostream& str) const {
 }
 void AstDisplay::dump(std::ostream& str) const {
     this->AstNodeStmt::dump(str);
-    // str<<" "<<displayType().ascii();
+    // str << " " << displayType().ascii();
 }
 void AstEnumItemRef::dump(std::ostream& str) const {
     this->AstNodeMath::dump(str);
diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h
index da6da2a01..d9351f101 100644
--- a/src/V3AstNodes.h
+++ b/src/V3AstNodes.h
@@ -4436,6 +4436,7 @@ public:
     virtual string emitC() override { V3ERROR_NA_RETURN(""); }
     virtual bool isGateOptimizable() const override { return false; }
     virtual bool isPredictOptimizable() const override { return false; }
+    virtual bool isPure() const override { return !outp(); }
     virtual bool cleanOut() const override { return true; }
     virtual bool same(const AstNode* samep) const override { return true; }
     AstNode* searchp() const { return op1p(); }  // op1 = Search expression
diff --git a/src/V3Clock.cpp b/src/V3Clock.cpp
index 7f941851d..07d8e2c81 100644
--- a/src/V3Clock.cpp
+++ b/src/V3Clock.cpp
@@ -136,6 +136,7 @@ private:
                           "Unsupported: Complicated event expression in sensitive activity list");
             return nullptr;
         }
+        UASSERT_OBJ(nodep->varrefp(), nodep, "No clock found on sense item");
         AstVarScope* clkvscp = nodep->varrefp()->varScopep();
         if (nodep->edgeType() == VEdgeType::ET_POSEDGE) {
             AstVarScope* lastVscp = getCreateLastClk(clkvscp);
diff --git a/src/V3Config.cpp b/src/V3Config.cpp
index 6b511b6c7..93dd8a81f 100644
--- a/src/V3Config.cpp
+++ b/src/V3Config.cpp
@@ -24,6 +24,7 @@
 #include <map>
 #include <set>
 #include <string>
+#include <unordered_map>
 
 //######################################################################
 // Resolve wildcards in files, modules, ftasks or variables
@@ -346,6 +347,9 @@ using V3ConfigFileResolver = V3ConfigWildcardResolver<V3ConfigFile>;
 class V3ConfigResolver final {
     V3ConfigModuleResolver m_modules;  // Access to module names (with wildcards)
     V3ConfigFileResolver m_files;  // Access to file names (with wildcards)
+    std::unordered_map<string, std::unordered_map<string, vluint64_t>>
+        m_profileData;  // Access to profile_data records
+    FileLine* m_profileFileLine = nullptr;
 
     static V3ConfigResolver s_singleton;  // Singleton (not via local static, as that's slow)
     V3ConfigResolver() = default;
@@ -356,6 +360,20 @@ public:
 
     V3ConfigModuleResolver& modules() { return m_modules; }
     V3ConfigFileResolver& files() { return m_files; }
+
+    void addProfileData(FileLine* fl, const string& model, const string& key, vluint64_t cost) {
+        if (!m_profileFileLine) m_profileFileLine = fl;
+        if (cost == 0) cost = 1;  // Cost 0 means delete (or no data)
+        m_profileData[model][key] += cost;
+    }
+    vluint64_t getProfileData(const string& model, const string& key) const {
+        const auto mit = m_profileData.find(model);
+        if (mit == m_profileData.cend()) return 0;
+        const auto it = mit->second.find(key);
+        if (it == mit->second.cend()) return 0;
+        return it->second;
+    }
+    FileLine* getProfileDataFileLine() const { return m_profileFileLine; }  // Maybe null
 };
 
 V3ConfigResolver V3ConfigResolver::s_singleton;
@@ -392,10 +410,6 @@ void V3Config::addIgnore(V3ErrorCode code, bool on, const string& filename, int
     }
 }
 
-void V3Config::addModulePragma(const string& module, AstPragmaType pragma) {
-    V3ConfigResolver::s().modules().at(module).addModulePragma(pragma);
-}
-
 void V3Config::addInline(FileLine* fl, const string& module, const string& ftask, bool on) {
     if (ftask.empty()) {
         V3ConfigResolver::s().modules().at(module).setInline(on);
@@ -408,6 +422,15 @@ void V3Config::addInline(FileLine* fl, const string& module, const string& ftask
     }
 }
 
+void V3Config::addModulePragma(const string& module, AstPragmaType pragma) {
+    V3ConfigResolver::s().modules().at(module).addModulePragma(pragma);
+}
+
+void V3Config::addProfileData(FileLine* fl, const string& model, const string& key,
+                              vluint64_t cost) {
+    V3ConfigResolver::s().addProfileData(fl, model, key, cost);
+}
+
 void V3Config::addVarAttr(FileLine* fl, const string& module, const string& ftask,
                           const string& var, AstAttrType attr, AstSenTree* sensep) {
     // Semantics: sensep only if public_flat_rw
@@ -497,6 +520,13 @@ void V3Config::applyVarAttr(AstNodeModule* modulep, AstNodeFTask* ftaskp, AstVar
     if (vp) vp->apply(varp);
 }
 
+vluint64_t V3Config::getProfileData(const string& model, const string& key) {
+    return V3ConfigResolver::s().getProfileData(model, key);
+}
+FileLine* V3Config::getProfileDataFileLine() {
+    return V3ConfigResolver::s().getProfileDataFileLine();
+}
+
 bool V3Config::waive(FileLine* filelinep, V3ErrorCode code, const string& message) {
     V3ConfigFile* filep = V3ConfigResolver::s().files().resolve(filelinep->filename());
     if (!filep) return false;
diff --git a/src/V3Config.h b/src/V3Config.h
index 470827fdf..2931b53e3 100644
--- a/src/V3Config.h
+++ b/src/V3Config.h
@@ -33,17 +33,23 @@ public:
     static void addCoverageBlockOff(const string& file, int lineno);
     static void addCoverageBlockOff(const string& module, const string& blockname);
     static void addIgnore(V3ErrorCode code, bool on, const string& filename, int min, int max);
-    static void addWaiver(V3ErrorCode code, const string& filename, const string& message);
-    static void addModulePragma(const string& module, AstPragmaType pragma);
     static void addInline(FileLine* fl, const string& module, const string& ftask, bool on);
+    static void addModulePragma(const string& module, AstPragmaType pragma);
+    static void addProfileData(FileLine* fl, const string& model, const string& key,
+                               vluint64_t cost);
+    static void addWaiver(V3ErrorCode code, const string& filename, const string& message);
     static void addVarAttr(FileLine* fl, const string& module, const string& ftask,
                            const string& signal, AstAttrType type, AstSenTree* nodep);
+
     static void applyCase(AstCase* nodep);
     static void applyCoverageBlock(AstNodeModule* modulep, AstBegin* nodep);
     static void applyIgnores(FileLine* filelinep);
     static void applyModule(AstNodeModule* modulep);
     static void applyFTask(AstNodeModule* modulep, AstNodeFTask* ftaskp);
     static void applyVarAttr(AstNodeModule* modulep, AstNodeFTask* ftaskp, AstVar* varp);
+
+    static uint64_t getProfileData(const string& model, const string& key);
+    static FileLine* getProfileDataFileLine();
     static bool waive(FileLine* filelinep, V3ErrorCode code, const string& message);
 };
 
diff --git a/src/V3Const.cpp b/src/V3Const.cpp
index 6c279f8ad..047b0810d 100644
--- a/src/V3Const.cpp
+++ b/src/V3Const.cpp
@@ -923,8 +923,9 @@ private:
         if (ccastp) {
             andp->replaceWith(ccastp);
             VL_DO_DANGLING(andp->deleteTree(), andp);
+            return true;
         }
-        return ccastp;
+        return false;
     }
 
     static bool operandAndOrSame(const AstNode* nodep) {
@@ -1312,6 +1313,15 @@ private:
         // but for now can disable en-mass until V3Purify takes effect.
         return m_doShort || VN_IS(nodep, VarRef) || VN_IS(nodep, Const);
     }
+    bool isTreePureRecurse(AstNode* nodep) {
+        // Should memoize this if call commonly
+        if (!nodep->isPure()) return false;
+        if (nodep->op1p() && !isTreePureRecurse(nodep->op1p())) return false;
+        if (nodep->op2p() && !isTreePureRecurse(nodep->op2p())) return false;
+        if (nodep->op3p() && !isTreePureRecurse(nodep->op3p())) return false;
+        if (nodep->op4p() && !isTreePureRecurse(nodep->op4p())) return false;
+        return true;
+    }
 
     // Extraction checks
     bool warnSelect(AstSel* nodep) {
@@ -1337,10 +1347,19 @@ private:
                        && (nodep->msbConst() > maxDeclBit || nodep->lsbConst() > maxDeclBit)) {
                 // See also warning in V3Width
                 // Must adjust by element width as declRange() is in number of elements
+                string msbLsbProtected;
+                if (nodep->declElWidth() == 0) {
+                    msbLsbProtected = "(nodep->declElWidth() == 0) "
+                                      + std::to_string(nodep->msbConst()) + ":"
+                                      + std::to_string(nodep->lsbConst());
+                } else {
+                    msbLsbProtected = std::to_string(nodep->msbConst() / nodep->declElWidth())
+                                      + ":"
+                                      + std::to_string(nodep->lsbConst() / nodep->declElWidth());
+                }
                 nodep->v3warn(SELRANGE,
                               "Selection index out of range: "
-                                  << (nodep->msbConst() / nodep->declElWidth()) << ":"
-                                  << (nodep->lsbConst() / nodep->declElWidth()) << " outside "
+                                  << msbLsbProtected << " outside "
                                   << nodep->declRange().hiMaxSelect() << ":0"
                                   << (nodep->declRange().lo() >= 0
                                           ? ""
@@ -2199,8 +2218,8 @@ private:
     void swapSides(AstNodeBiCom* nodep) {
         // COMMUTATIVE({a},CONST) -> COMMUTATIVE(CONST,{a})
         // This simplifies later optimizations
-        AstNode* lhsp = nodep->lhsp()->unlinkFrBackWithNext();
-        AstNode* rhsp = nodep->rhsp()->unlinkFrBackWithNext();
+        AstNode* const lhsp = nodep->lhsp()->unlinkFrBackWithNext();
+        AstNode* const rhsp = nodep->rhsp()->unlinkFrBackWithNext();
         nodep->lhsp(rhsp);
         nodep->rhsp(lhsp);
         iterate(nodep);  // Again?
@@ -2209,8 +2228,8 @@ private:
     int operandConcatMove(AstConcat* nodep) {
         //    CONCAT under concat  (See moveConcat)
         // Return value: true indicates to do it; 2 means move to LHS
-        AstConcat* abConcp = VN_CAST(nodep->lhsp(), Concat);
-        AstConcat* bcConcp = VN_CAST(nodep->rhsp(), Concat);
+        AstConcat* const abConcp = VN_CAST(nodep->lhsp(), Concat);
+        AstConcat* const bcConcp = VN_CAST(nodep->rhsp(), Concat);
         if (!abConcp && !bcConcp) return 0;
         if (bcConcp) {
             AstNode* ap = nodep->lhsp();
@@ -2783,10 +2802,13 @@ private:
                 }
                 VL_DO_DANGLING(nodep->deleteTree(), nodep);
             } else if (!afterComment(nodep->ifsp()) && !afterComment(nodep->elsesp())) {
-                // Empty block, remove it
-                // Note if we support more C++ then there might be side
-                // effects in the condition itself
-                VL_DO_DANGLING(nodep->unlinkFrBack()->deleteTree(), nodep);
+                if (!isTreePureRecurse(nodep->condp())) {
+                    // Condition has side effect - leave - perhaps in
+                    // future simplify to remove all but side effect terms
+                } else {
+                    // Empty block, remove it
+                    VL_DO_DANGLING(nodep->unlinkFrBack()->deleteTree(), nodep);
+                }
             } else if (!afterComment(nodep->ifsp())) {
                 UINFO(4, "IF({x}) nullptr {...} => IF(NOT{x}}: " << nodep << endl);
                 AstNode* condp = nodep->condp();
diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h
index f4e72f380..fa9a3d157 100644
--- a/src/V3EmitCFunc.h
+++ b/src/V3EmitCFunc.h
@@ -392,10 +392,15 @@ public:
         emitCCallArgs(nodep, "");
     }
     virtual void visit(AstCNew* nodep) override {
+        bool comma = false;
         puts("std::make_shared<" + prefixNameProtect(nodep->dtypep()) + ">(");
         puts("vlSymsp");  // TODO make this part of argsp, and eliminate when unnecessary
-        if (nodep->argsp()) puts(", ");
-        iterateAndNextNull(nodep->argsp());
+        if (nodep->argsp()) comma = true;
+        for (AstNode* subnodep = nodep->argsp(); subnodep; subnodep = subnodep->nextp()) {
+            if (comma) puts(", ");
+            iterate(subnodep);
+            comma = true;
+        }
         puts(")");
     }
     virtual void visit(AstCMethodHard* nodep) override {
diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp
index 140f03140..2a7c087bd 100644
--- a/src/V3EmitCModel.cpp
+++ b/src/V3EmitCModel.cpp
@@ -323,7 +323,24 @@ class EmitCModel final : public EmitCFunc {
         puts(" loop\\n\"););\n");
         if (initial)
             puts(topModNameProtected + "__" + protect("_eval_settle") + "(&(vlSymsp->TOP));\n");
+
+        const string recName = "__Vprfloop";
+        if (v3Global.opt.profThreads() && !initial) {
+            puts("VlProfileRec* " + recName + " = nullptr;\n");
+            // Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
+            puts("if (VL_UNLIKELY(vlSymsp->__Vm_profile_cycle_start)) {\n");
+            // Eval start
+            puts(/**/ recName + " = vlSymsp->__Vm_threadPoolp->profileAppend();\n");
+            puts(/**/ recName + "->startEvalLoop(VL_RDTSC_Q());\n");
+            puts("}\n");
+        }
+
         puts(topModNameProtected + "__" + protect("_eval") + "(&(vlSymsp->TOP));\n");
+
+        if (v3Global.opt.profThreads() && !initial) {
+            puts("if (VL_UNLIKELY(" + recName + ")) " + recName + "->endRecord(VL_RDTSC_Q());\n");
+        }
+
         if (v3Global.rootp()->changeRequest()) {
             puts("if (VL_UNLIKELY(++__VclockLoop > " + cvtToStr(v3Global.opt.convergeLimit())
                  + ")) {\n");
@@ -354,7 +371,7 @@ class EmitCModel final : public EmitCFunc {
              + ");\n");
     }
 
-    void emitStandardMethods(AstNodeModule* modp) {
+    void emitStandardMethods1(AstNodeModule* modp) {
         UASSERT_OBJ(modp->isTop(), modp, "Attempting to emitWrapEval for non-top class");
 
         const string topModNameProtected = prefixNameProtect(modp);
@@ -385,16 +402,21 @@ class EmitCModel final : public EmitCFunc {
         emitSettleLoop(modp, /* initial: */ true);
         ensureNewLine();
         puts("}\n");
+    }
 
+    void emitStandardMethods2(AstNodeModule* modp) {
+        const string topModNameProtected = prefixNameProtect(modp);
         // ::eval_step
         puts("\nvoid " + topClassName() + "::eval_step() {\n");
         puts("VL_DEBUG_IF(VL_DBG_MSGF(\"+++++TOP Evaluate " + topClassName()
              + "::eval_step\\n\"); );\n");
+
         puts("#ifdef VL_DEBUG\n");
         putsDecoration("// Debug assertions\n");
         puts(topModNameProtected + "__" + protect("_eval_debug_assertions")
              + "(&(vlSymsp->TOP));\n");
         puts("#endif  // VL_DEBUG\n");
+
         putsDecoration("// Initialize\n");
         puts("if (VL_UNLIKELY(!vlSymsp->__Vm_didInit)) " + protect("_eval_initial_loop")
              + "(vlSymsp);\n");
@@ -406,56 +428,80 @@ class EmitCModel final : public EmitCFunc {
             puts("Verilated::mtaskId(" + cvtToStr(mtaskId) + ");\n");
         }
 
-        if (v3Global.opt.mtasks() && v3Global.opt.profThreads()) {
+        if (v3Global.opt.profThreads()) {
             puts("if (VL_UNLIKELY((vlSymsp->_vm_contextp__->profThreadsStart() != "
                  "vlSymsp->__Vm_profile_time_finished)\n");
             puts(" && (VL_TIME_Q() > vlSymsp->_vm_contextp__->profThreadsStart())\n");
             puts(" && (vlSymsp->_vm_contextp__->profThreadsWindow() >= 1))) {\n");
             // Within a profile (either starting, middle, or end)
-            puts("if (vlSymsp->__Vm_profile_window_ct == 0) {\n");  // Opening file?
+            puts(/**/ "if (vlSymsp->__Vm_profile_window_ct == 0) {\n");  // Opening file?
+            puts(/**/ "VL_DEBUG_IF(VL_DBG_MSGF(\"+ profile start warmup\\n\"););\n");
             // Start profile on this cycle. We'll capture a window worth, then
             // only analyze the next window worth. The idea is that the first window
             // capture will hit some cache-cold stuff (eg printf) but it'll be warm
             // by the time we hit the second window, we hope.
-            puts("vlSymsp->__Vm_profile_cycle_start = VL_RDTSC_Q();\n");
+            puts(/****/ "vlSymsp->__Vm_profile_cycle_start = VL_RDTSC_Q();\n");
             // "* 2" as first half is warmup, second half is collection
-            puts("vlSymsp->__Vm_profile_window_ct = vlSymsp->_vm_contextp__->profThreadsWindow() "
-                 "* 2 "
-                 "+ "
-                 "1;\n");
-            puts("}\n");
-            puts("--(vlSymsp->__Vm_profile_window_ct);\n");
-            puts("if (vlSymsp->__Vm_profile_window_ct == "
-                 "vlSymsp->_vm_contextp__->profThreadsWindow()) {\n");
+            puts(/****/ "vlSymsp->__Vm_profile_window_ct"
+                        " = vlSymsp->_vm_contextp__->profThreadsWindow()"
+                        " * 2 + 1;\n");
+            puts(/**/ "}\n");
+            puts(/**/ "--(vlSymsp->__Vm_profile_window_ct);\n");
+            puts(/**/ "if (vlSymsp->__Vm_profile_window_ct"
+                      " == vlSymsp->_vm_contextp__->profThreadsWindow()) {\n");
             // This barrier record in every threads' profile demarcates the
             // cache-warm-up cycles before the barrier from the actual profile
             // cycles afterward.
-            puts("vlSymsp->__Vm_threadPoolp->profileAppendAll(");
-            puts("VlProfileRec(VlProfileRec::Barrier()));\n");
-            puts("vlSymsp->__Vm_profile_cycle_start = VL_RDTSC_Q();\n");
-            puts("}\n");
-            puts("else if (vlSymsp->__Vm_profile_window_ct == 0) {\n");
-            // Ending file.
-            puts("vluint64_t elapsed = VL_RDTSC_Q() - vlSymsp->__Vm_profile_cycle_start;\n");
-            puts("vlSymsp->__Vm_threadPoolp->profileDump(vlSymsp->_vm_contextp__->"
-                 "profThreadsFilename().c_str(), elapsed);\n");
+            puts(/****/ "vlSymsp->__Vm_threadPoolp->profileAppendAll(");
+            puts(/****/ "VlProfileRec{VlProfileRec::Barrier{}});\n");
+            puts(/****/ "vlSymsp->__Vm_profile_cycle_start = VL_RDTSC_Q();\n");
+            puts(/**/ "}\n");
+            // Ending trace file?
+            puts(/**/ "else if (vlSymsp->__Vm_profile_window_ct == 0) {\n");
+            puts(/****/ "vluint64_t tick_end = VL_RDTSC_Q();\n");
+            puts(/****/ "VL_DEBUG_IF(VL_DBG_MSGF(\"+ profile end\\n\"););\n");
+            puts(/****/ "vlSymsp->__Vm_threadPoolp->profileDump("
+                        "vlSymsp->_vm_contextp__->profThreadsFilename().c_str(), "
+                        "vlSymsp->__Vm_profile_cycle_start, "
+                        "tick_end);\n");
             // This turns off the test to enter the profiling code, but still
             // allows the user to collect another profile by changing
             // profThreadsStart
-            puts("vlSymsp->__Vm_profile_time_finished = "
-                 "vlSymsp->_vm_contextp__->profThreadsStart();\n");
-            puts("vlSymsp->__Vm_profile_cycle_start = 0;\n");
+            puts(/****/ "vlSymsp->__Vm_profile_time_finished = "
+                        "vlSymsp->_vm_contextp__->profThreadsStart();\n");
+            puts(/****/ "vlSymsp->__Vm_profile_cycle_start = 0;\n");
+            puts(/**/ "}\n");
             puts("}\n");
+        }
+
+        const string recName = "__Vprfeval";
+        if (v3Global.opt.profThreads()) {
+            puts("VlProfileRec* " + recName + " = nullptr;\n");
+            // Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
+            puts("if (VL_UNLIKELY(vlSymsp->__Vm_profile_cycle_start)) {\n");
+            // Eval start
+            puts(/**/ recName + " = vlSymsp->__Vm_threadPoolp->profileAppend();\n");
+            puts(/**/ recName + "->startEval(VL_RDTSC_Q());\n");
             puts("}\n");
         }
 
         emitSettleLoop(modp, /* initial: */ false);
+
+        putsDecoration("// Evaluate cleanup\n");
         if (v3Global.opt.threads() == 1) {
             puts("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n");
         }
         if (v3Global.opt.threads()) puts("Verilated::endOfEval(vlSymsp->__Vm_evalMsgQp);\n");
-        puts("}\n");
 
+        if (v3Global.opt.profThreads()) {
+            // End eval record
+            puts("if (VL_UNLIKELY(" + recName + ")) " + recName + "->endRecord(VL_RDTSC_Q());\n");
+        }
+        puts("}\n");
+    }
+
+    void emitStandardMethods3(AstNodeModule* modp) {
+        const string topModNameProtected = prefixNameProtect(modp);
         // ::eval_end_step
         if (v3Global.needTraceDumper() && !optSystemC()) {
             puts("\nvoid " + topClassName() + "::eval_end_step() {\n");
@@ -572,7 +618,9 @@ class EmitCModel final : public EmitCFunc {
 
         emitConstructorImplementation(modp);
         emitDestructorImplementation();
-        emitStandardMethods(modp);
+        emitStandardMethods1(modp);
+        emitStandardMethods2(modp);
+        emitStandardMethods3(modp);
         if (v3Global.opt.trace()) { emitTraceMethods(modp); }
         if (v3Global.opt.savable()) { emitSerializationFunctions(); }
 
diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp
index fc430a369..76e82adf0 100644
--- a/src/V3EmitCSyms.cpp
+++ b/src/V3EmitCSyms.cpp
@@ -21,6 +21,7 @@
 #include "V3EmitC.h"
 #include "V3EmitCBase.h"
 #include "V3LanguageWords.h"
+#include "V3PartitionGraph.h"
 
 #include <algorithm>
 #include <map>
@@ -214,12 +215,13 @@ class EmitCSyms final : EmitCBaseVisitor {
                     } else {
                         varBase = whole;
                     }
-                    // UINFO(9,"For "<<scopep->name()<<" - "<<varp->name()<<"  Scp "<<scpName<<"
-                    // Var "<<varBase<<endl);
+                    // UINFO(9, "For " << scopep->name() << " - " << varp->name() << "  Scp "
+                    // << scpName << "Var " << varBase << endl);
                     const string varBasePretty = AstNode::prettyName(varBase);
                     const string scpPretty = AstNode::prettyName(scpName);
                     const string scpSym = scopeSymString(scpName);
-                    // UINFO(9," scnameins sp "<<scpName<<" sp "<<scpPretty<<" ss "<<scpSym<<endl);
+                    // UINFO(9, " scnameins sp " << scpName << " sp " << scpPretty << " ss "
+                    // << scpSym << endl);
                     if (v3Global.opt.vpi()) varHierarchyScopes(scpName);
                     if (m_scopeNames.find(scpSym) == m_scopeNames.end()) {
                         m_scopeNames.insert(std::make_pair(
@@ -393,6 +395,7 @@ void EmitCSyms::emitSymHdr() {
     if (v3Global.needTraceDumper()) {
         puts("#include \"" + v3Global.opt.traceSourceLang() + ".h\"\n");
     }
+    if (v3Global.opt.profThreads()) puts("#include \"verilated_profiler.h\"\n");
 
     puts("\n// INCLUDE MODEL CLASS\n");
     puts("\n#include \"" + topClassName() + ".h\"\n");
@@ -474,6 +477,21 @@ void EmitCSyms::emitSymHdr() {
         puts("];\n");
     }
 
+    if (v3Global.opt.profThreads()) {
+        puts("\n// PROFILING\n");
+        vluint64_t maxProfilerId = 0;
+        if (v3Global.opt.mtasks()) {
+            for (const V3GraphVertex* vxp
+                 = v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp();
+                 vxp; vxp = vxp->verticesNextp()) {
+                ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
+                if (maxProfilerId < mtp->profilerId()) maxProfilerId = mtp->profilerId();
+            }
+        }
+        ++maxProfilerId;  // As size must include 0
+        puts("VerilatedProfiler<" + cvtToStr(maxProfilerId) + "> _vm_profiler;\n");
+    }
+
     if (!m_scopeNames.empty()) {  // Scope names
         puts("\n// SCOPE NAMES\n");
         for (const auto& itr : m_scopeNames) {
@@ -653,6 +671,7 @@ void EmitCSyms::emitSymImp() {
     }
 
     puts("// FUNCTIONS\n");
+
     // Destructor
     puts(symClassName() + "::~" + symClassName() + "()\n");
     puts("{\n");
@@ -662,7 +681,11 @@ void EmitCSyms::emitSymImp() {
         puts("if (__Vm_dumping) _traceDumpClose();\n");
         puts("#endif  // VM_TRACE\n");
     }
-    if (v3Global.opt.mtasks()) { puts("delete __Vm_threadPoolp;\n"); }
+    if (v3Global.opt.profThreads()) {
+        puts("_vm_profiler.write(\"" + topClassName()
+             + "\", _vm_contextp__->profVltFilename());\n");
+    }
+    if (v3Global.opt.mtasks()) puts("delete __Vm_threadPoolp;\n");
     puts("}\n\n");
 
     // Constructor
@@ -717,6 +740,19 @@ void EmitCSyms::emitSymImp() {
     }
     puts("{\n");
 
+    if (v3Global.opt.profThreads()) {
+        puts("// Configure profiling\n");
+        if (v3Global.opt.mtasks()) {
+            for (const V3GraphVertex* vxp
+                 = v3Global.rootp()->execGraphp()->depGraphp()->verticesBeginp();
+                 vxp; vxp = vxp->verticesNextp()) {
+                ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
+                puts("_vm_profiler.addCounter(" + cvtToStr(mtp->profilerId()) + ", \""
+                     + mtp->hashName() + "\");\n");
+            }
+        }
+    }
+
     puts("// Configure time unit / time precision\n");
     if (!v3Global.rootp()->timeunit().isNone()) {
         puts("_vm_contextp__->timeunit(");
diff --git a/src/V3Error.h b/src/V3Error.h
index cac812106..aaa444cec 100644
--- a/src/V3Error.h
+++ b/src/V3Error.h
@@ -110,6 +110,7 @@ public:
         PINNOTFOUND,    // instance port name not found in it's module
         PKGNODECL,      // Error: Package/class needs to be predeclared
         PROCASSWIRE,    // Procedural assignment on wire
+        PROFOUTOFDATE,  // Profile data out of date
         PROTECTED,      // detected `pragma protected
         RANDC,          // Unsupported: 'randc' converted to 'rand'
         REALCVT,        // Real conversion
@@ -173,7 +174,7 @@ public:
             "LATCH", "LITENDIAN", "MODDUP",
             "MULTIDRIVEN", "MULTITOP","NOLATCH", "NULLPORT", "PINCONNECTEMPTY",
             "PINMISSING", "PINNOCONNECT",  "PINNOTFOUND", "PKGNODECL", "PROCASSWIRE",
-            "PROTECTED", "RANDC", "REALCVT", "REDEFMACRO",
+            "PROFOUTOFDATE", "PROTECTED", "RANDC", "REALCVT", "REDEFMACRO",
             "SELRANGE", "SHORTREAL", "SPLITVAR", "STMTDLY", "SYMRSVDWORD", "SYNCASYNCNET",
             "TICKCOUNT", "TIMESCALEMOD",
             "UNDRIVEN", "UNOPT", "UNOPTFLAT", "UNOPTTHREADS",
diff --git a/src/V3Hasher.cpp b/src/V3Hasher.cpp
index b83f8386a..259656989 100644
--- a/src/V3Hasher.cpp
+++ b/src/V3Hasher.cpp
@@ -81,7 +81,7 @@ private:
 
     virtual void visit(AstNode* nodep) override {
 #if VL_DEBUG
-        UINFO(0, "%Warning: Hashing node as AstNode: " << nodep);
+        UINFO(0, "%Warning: Hashing node as AstNode: " << nodep << endl);
 #endif
         m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, [=]() {});
     }
@@ -455,6 +455,9 @@ private:
             iterateNull(nodep->ftaskp());
         });
     }
+    virtual void visit(AstMTaskBody* nodep) override {
+        m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, [=]() {});
+    }
     virtual void visit(AstNodeProcedure* nodep) override {
         m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, [=]() {});
     }
diff --git a/src/V3LinkLevel.cpp b/src/V3LinkLevel.cpp
index d83627ef3..bbecddae1 100644
--- a/src/V3LinkLevel.cpp
+++ b/src/V3LinkLevel.cpp
@@ -132,6 +132,8 @@ void V3LinkLevel::timescaling(const ModVec& mods) {
         v3Global.rootp()->timeprecisionMerge(v3Global.rootp()->fileline(),
                                              VTimescale(VTimescale::TS_DEFAULT));
     }
+
+    // Classes under package have timescale propaged in V3LinkParse
 }
 
 //######################################################################
diff --git a/src/V3LinkParse.cpp b/src/V3LinkParse.cpp
index 22cc38dc8..6dff6a029 100644
--- a/src/V3LinkParse.cpp
+++ b/src/V3LinkParse.cpp
@@ -504,7 +504,8 @@ private:
         {
             // Module: Create sim table for entire module and iterate
             cleanFileline(nodep);
-            //
+            // Classes inherit from upper package
+            if (m_modp && nodep->timeunit().isNone()) nodep->timeunit(m_modp->timeunit());
             m_modp = nodep;
             m_genblkAbove = 0;
             m_genblkNum = 0;
diff --git a/src/V3Number.cpp b/src/V3Number.cpp
index 595830d27..e4ea84f22 100644
--- a/src/V3Number.cpp
+++ b/src/V3Number.cpp
@@ -605,6 +605,24 @@ string V3Number::displayed(FileLine* fl, const string& vformat) const {
             while (bit && bitIs0(bit)) bit--;
         while ((bit % 3) != 2) bit++;
         for (; bit > 0; bit -= 3) {
+            const int numX = countX(bit - 2, 3);
+            const int numZ = countZ(bit - 2, 3);
+            if (numX == 3 || numX == width() - (bit - 2)) {
+                str += 'x';
+                continue;
+            }
+            if (numZ == 3 || numZ == width() - (bit - 2)) {
+                str += 'z';
+                continue;
+            }
+            if (numX > 0) {
+                str += 'X';
+                continue;
+            }
+            if (numZ > 0) {
+                str += 'Z';
+                continue;
+            }
             int v = bitsValue(bit - 2, 3);
             str += static_cast<char>('0' + v);
         }
@@ -617,6 +635,24 @@ string V3Number::displayed(FileLine* fl, const string& vformat) const {
             while (bit && bitIs0(bit)) bit--;
         while ((bit % 4) != 3) bit++;
         for (; bit > 0; bit -= 4) {
+            const int numX = countX(bit - 3, 4);
+            const int numZ = countZ(bit - 3, 4);
+            if (numX == 4 || numX == width() - (bit - 3)) {
+                str += 'x';
+                continue;
+            }
+            if (numZ == 4 || numZ == width() - (bit - 3)) {
+                str += 'z';
+                continue;
+            }
+            if (numX > 0) {
+                str += 'X';
+                continue;
+            }
+            if (numZ > 0) {
+                str += 'Z';
+                continue;
+            }
             int v = bitsValue(bit - 3, 4);
             if (v >= 10) {
                 str += static_cast<char>('a' + v - 10);
@@ -667,17 +703,33 @@ string V3Number::displayed(FileLine* fl, const string& vformat) const {
             if (issigned) dchars++;  // space for sign
             fmtsize = cvtToStr(int(dchars));
         }
-        if (issigned) {
-            if (width() > 64) {
-                str = toDecimalS();
+        bool hasXZ = false;
+        if (isAllX()) {
+            str = "x";
+            hasXZ = true;
+        } else if (isAllZ()) {
+            str = "z";
+            hasXZ = true;
+        } else if (isAnyX()) {
+            str = "X";
+            hasXZ = true;
+        } else if (isAnyZ()) {
+            str = "Z";
+            hasXZ = true;
+        }
+        if (!hasXZ) {
+            if (issigned) {
+                if (width() > 64) {
+                    str = toDecimalS();
+                } else {
+                    str = cvtToStr(toSQuad());
+                }
             } else {
-                str = cvtToStr(toSQuad());
-            }
-        } else {
-            if (width() > 64) {
-                str = toDecimalU();
-            } else {
-                str = cvtToStr(toUQuad());
+                if (width() > 64) {
+                    str = toDecimalU();
+                } else {
+                    str = cvtToStr(toUQuad());
+                }
             }
         }
         const bool zeropad = fmtsize.length() > 0 && fmtsize[0] == '0';
@@ -977,6 +1029,22 @@ bool V3Number::isLtXZ(const V3Number& rhs) const {
     }
     return false;
 }
+int V3Number::countX(int lsb, int nbits) const {
+    int count = 0;
+    for (int bitn = 0; bitn < nbits; ++bitn) {
+        if (lsb + bitn >= width()) return count;
+        if (bitIsX(lsb + bitn)) ++count;
+    }
+    return count;
+}
+int V3Number::countZ(int lsb, int nbits) const {
+    int count = 0;
+    for (int bitn = 0; bitn < nbits; ++bitn) {
+        if (lsb + bitn >= width()) return count;
+        if (bitIsZ(lsb + bitn)) ++count;
+    }
+    return count;
+}
 
 int V3Number::widthMin() const {
     for (int bit = width() - 1; bit > 0; bit--) {
diff --git a/src/V3Number.h b/src/V3Number.h
index 09b15e67b..9a33a6b7a 100644
--- a/src/V3Number.h
+++ b/src/V3Number.h
@@ -204,6 +204,9 @@ private:
         return v;
     }
 
+    int countX(int lsb, int nbits) const;
+    int countZ(int lsb, int nbits) const;
+
     int words() const { return ((width() + 31) / 32); }
     uint32_t hiWordMask() const { return VL_MASK_I(width()); }
 
diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp
index f12a1bce3..d9fc5f1d1 100644
--- a/src/V3Partition.cpp
+++ b/src/V3Partition.cpp
@@ -18,6 +18,7 @@
 #include "verilatedos.h"
 
 #include "V3EmitCBase.h"
+#include "V3Config.h"
 #include "V3Os.h"
 #include "V3File.h"
 #include "V3GraphAlg.h"
@@ -27,6 +28,7 @@
 #include "V3PartitionGraph.h"
 #include "V3Scoreboard.h"
 #include "V3Stats.h"
+#include "V3UniqueNames.h"
 
 #include <list>
 #include <memory>
@@ -2237,11 +2239,11 @@ public:
         std::vector<uint32_t> busyUntil(m_nThreads, 0);
 
         // MTasks ready to be assigned next. All their dependencies are already assigned.
-        std::set<const ExecMTask*, MTaskCmp> readyMTasks;
+        std::set<ExecMTask*, MTaskCmp> readyMTasks;
 
         // Build initial ready list
         for (V3GraphVertex* vxp = mtaskGraph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
-            const ExecMTask* const mtaskp = dynamic_cast<ExecMTask*>(vxp);
+            ExecMTask* const mtaskp = dynamic_cast<ExecMTask*>(vxp);
             if (isReady(schedule, mtaskp)) readyMTasks.insert(mtaskp);
         }
 
@@ -2250,9 +2252,9 @@ public:
             // on each thread (in that thread's local time frame.)
             uint32_t bestTime = 0xffffffff;
             uint32_t bestThreadId = 0;
-            const ExecMTask* bestMtaskp = nullptr;  // Todo: const ExecMTask*
+            ExecMTask* bestMtaskp = nullptr;  // Todo: const ExecMTask*
             for (uint32_t threadId = 0; threadId < m_nThreads; ++threadId) {
-                for (const ExecMTask* const mtaskp : readyMTasks) {
+                for (ExecMTask* const mtaskp : readyMTasks) {
                     uint32_t timeBegin = busyUntil[threadId];
                     if (timeBegin > bestTime) {
                         UINFO(6, "th " << threadId << " busy until " << timeBegin
@@ -2287,10 +2289,11 @@ public:
             std::vector<const ExecMTask*>& bestThread = schedule.threads[bestThreadId];
 
             // Update algorithm state
+            bestMtaskp->predictStart(bestTime);  // Only for gantt reporting
             const uint32_t bestEndTime = bestTime + bestMtaskp->cost();
             schedule.mtaskState[bestMtaskp].completionTime = bestEndTime;
             schedule.mtaskState[bestMtaskp].threadId = bestThreadId;
-            if (!bestThread.empty()) { schedule.mtaskState[bestThread.back()].nextp = bestMtaskp; }
+            if (!bestThread.empty()) schedule.mtaskState[bestThread.back()].nextp = bestMtaskp;
             busyUntil[bestThreadId] = bestEndTime;
 
             // Add the MTask to the schedule
@@ -2301,7 +2304,7 @@ public:
             UASSERT_OBJ(erased > 0, bestMtaskp, "Should have erased something?");
             for (V3GraphEdge* edgeOutp = bestMtaskp->outBeginp(); edgeOutp;
                  edgeOutp = edgeOutp->outNextp()) {
-                const ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgeOutp->top());
+                ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgeOutp->top());
                 // Dependent MTask should not yet be assigned to a thread
                 UASSERT(schedule.threadId(nextp) == ThreadSchedule::UNASSIGNED,
                         "Tasks after one being assigned should not be assigned yet");
@@ -2614,15 +2617,152 @@ void V3Partition::go(V3Graph* mtasksp) {
     }
 }
 
+void add(std::unordered_map<int, vluint64_t>& cmap, int id, vluint64_t cost) { cmap[id] += cost; }
+
+using EstimateAndProfiled = std::pair<uint64_t, vluint64_t>;  // cost est, cost profiled
+using Costs = std::unordered_map<uint32_t, EstimateAndProfiled>;
+
+static void normalizeCosts(Costs& costs) {
+    const auto scaleCost = [](vluint64_t value, double multiplier) {
+        double scaled = static_cast<double>(value) * multiplier;
+        if (value && scaled < 1) scaled = 1;
+        return static_cast<uint64_t>(scaled);
+    };
+
+    // For all costs with a profile, compute sum
+    vluint64_t sumCostProfiled = 0;  // For data with estimate and profile
+    vluint64_t sumCostEstimate = 0;  // For data with estimate and profile
+    for (const auto& est : costs) {
+        if (est.second.second) {
+            sumCostEstimate += est.second.first;
+            sumCostProfiled += est.second.second;
+        }
+    }
+
+    if (sumCostEstimate) {
+        // For data where we don't have profiled data, compute how much to
+        // scale up/down the estimate to make on same relative scale as
+        // profiled data.  (Improves results if only a few profiles missing.)
+        double estToProfile
+            = static_cast<double>(sumCostProfiled) / static_cast<double>(sumCostEstimate);
+        UINFO(5, "Estimated data needs scaling by "
+                     << estToProfile << ", sumCostProfiled=" << sumCostProfiled
+                     << " sumCostEstimate=" << sumCostEstimate << endl);
+        for (auto& est : costs) {
+            uint64_t& costEstimate = est.second.first;
+            costEstimate = scaleCost(costEstimate, estToProfile);
+        }
+    }
+
+    // COSTS can overflow a uint32.  Using maximum value of costs, scale all down
+    vluint64_t maxCost = 0;
+    for (auto& est : costs) {
+        const uint64_t& costEstimate = est.second.first;
+        const uint64_t& costProfiled = est.second.second;
+        if (maxCost < costEstimate) maxCost = costEstimate;
+        if (maxCost < costProfiled) maxCost = costProfiled;
+        UINFO(9,
+              "Post uint scale: ce = " << est.second.first << " cp=" << est.second.second << endl);
+    }
+    vluint64_t scaleDownTo = 10000000;  // Extra room for future algorithms to add costs
+    if (maxCost > scaleDownTo) {
+        const double scaleup = static_cast<double>(scaleDownTo) / static_cast<double>(maxCost);
+        UINFO(5, "Scaling data to within 32-bits by multiply by=" << scaleup << ", maxCost="
+                                                                  << maxCost << endl);
+        for (auto& est : costs) {
+            est.second.first = scaleCost(est.second.first, scaleup);
+            est.second.second = scaleCost(est.second.second, scaleup);
+        }
+    }
+}
+
+void V3Partition::selfTestNormalizeCosts() {
+    {  // Test that omitted profile data correctly scales estimates
+        Costs costs({// id  est  prof
+                     {1, {10, 1000}},
+                     {2, {20, 0}},  // Note no profile
+                     {3, {30, 3000}}});
+        normalizeCosts(costs);
+        UASSERT_SELFTEST(uint64_t, costs[1].first, 1000);
+        UASSERT_SELFTEST(uint64_t, costs[1].second, 1000);
+        UASSERT_SELFTEST(uint64_t, costs[2].first, 2000);
+        UASSERT_SELFTEST(uint64_t, costs[2].second, 0);
+        UASSERT_SELFTEST(uint64_t, costs[3].first, 3000);
+        UASSERT_SELFTEST(uint64_t, costs[3].second, 3000);
+    }
+    {  // Test that very large profile data properly scales
+        Costs costs({// id  est  prof
+                     {1, {10, 100000000000}},
+                     {2, {20, 200000000000}},
+                     {3, {30, 1}}});  // Make sure doesn't underflow
+        normalizeCosts(costs);
+        UASSERT_SELFTEST(uint64_t, costs[1].first, 2500000);
+        UASSERT_SELFTEST(uint64_t, costs[1].second, 5000000);
+        UASSERT_SELFTEST(uint64_t, costs[2].first, 5000000);
+        UASSERT_SELFTEST(uint64_t, costs[2].second, 10000000);
+        UASSERT_SELFTEST(uint64_t, costs[3].first, 7500000);
+        UASSERT_SELFTEST(uint64_t, costs[3].second, 1);
+    }
+}
+
+static void fillinCosts(V3Graph* execMTaskGraphp) {
+    V3UniqueNames m_uniqueNames;  // For generating unique mtask profile hash names
+
+    // Pass 1: See what profiling data applies
+    Costs costs;  // For each mtask, costs
+
+    for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
+         vxp = vxp->verticesNextp()) {
+        ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
+        // Compute name of mtask, for hash lookup
+        mtp->hashName(m_uniqueNames.get(mtp->bodyp()));
+
+        // This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits
+        vluint64_t costEstimate = V3InstrCount::count(mtp->bodyp(), false);
+        vluint64_t costProfiled = V3Config::getProfileData(v3Global.opt.prefix(), mtp->hashName());
+        if (costProfiled) {
+            UINFO(5, "Profile data for mtask " << mtp->id() << " " << mtp->hashName()
+                                               << " cost override " << costProfiled << endl);
+        }
+        costs[mtp->id()] = std::make_pair(costEstimate, costProfiled);
+    }
+
+    normalizeCosts(costs /*ref*/);
+
+    int totalEstimates = 0;
+    int missingProfiles = 0;
+    for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
+         vxp = vxp->verticesNextp()) {
+        ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
+        const uint32_t costEstimate = costs[mtp->id()].first;
+        const uint64_t costProfiled = costs[mtp->id()].second;
+        UINFO(9, "ce = " << costEstimate << " cp=" << costProfiled << endl);
+        UASSERT(costEstimate <= (1UL << 31), "cost scaling math would overflow uint32");
+        UASSERT(costProfiled <= (1UL << 31), "cost scaling math would overflow uint32");
+        const uint64_t costProfiled32 = static_cast<uint32_t>(costProfiled);
+        uint32_t costToUse = costProfiled32;
+        if (!costProfiled32) {
+            costToUse = costEstimate;
+            if (costEstimate != 0) ++missingProfiles;
+        }
+        if (costEstimate != 0) ++totalEstimates;
+        mtp->cost(costToUse);
+        mtp->priority(costToUse);
+    }
+
+    if (missingProfiles) {
+        if (FileLine* fl = V3Config::getProfileDataFileLine()) {
+            fl->v3warn(PROFOUTOFDATE, "Profile data for mtasks may be out of date. "
+                                          << missingProfiles << " of " << totalEstimates
+                                          << " mtasks had no data");
+        }
+    }
+}
+
 static void finalizeCosts(V3Graph* execMTaskGraphp) {
     GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE);
-
     while (const V3GraphVertex* vxp = ser.nextp()) {
         ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
-        uint32_t costCount = V3InstrCount::count(mtp->bodyp(), false);
-        mtp->cost(costCount);
-        mtp->priority(costCount);
-
         // "Priority" is the critical path from the start of the mtask, to
         // the end of the graph reachable from this mtask.  Given the
         // choice among several ready mtasks, we'll want to start the
@@ -2661,6 +2801,14 @@ static void finalizeCosts(V3Graph* execMTaskGraphp) {
         }
     }
 
+    // Assign profiler IDs
+    vluint64_t profilerId = 0;
+    for (const V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp;
+         vxp = vxp->verticesNextp()) {
+        ExecMTask* mtp = dynamic_cast<ExecMTask*>(const_cast<V3GraphVertex*>(vxp));
+        mtp->profilerId(profilerId++);
+    }
+
     // Removing tasks may cause edges that were formerly non-transitive to
     // become transitive. Also we just created new edges around the removed
     // tasks, which could be transitive. Prune out all transitive edges.
@@ -2711,12 +2859,17 @@ static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t th
         // Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
         addStrStmt("if (VL_UNLIKELY(vlSymsp->__Vm_profile_cycle_start)) {\n" +  //
                    recName + " = vlSymsp->__Vm_threadPoolp->profileAppend();\n" +  //
-                   recName + "->startRecord(VL_RDTSC_Q() - vlSymsp->__Vm_profile_cycle_start,"
-                   +  //
+                   recName + "->startRecord(VL_RDTSC_Q()," +  //
                    " " + cvtToStr(mtaskp->id()) + "," +  //
+                   " " + cvtToStr(mtaskp->predictStart()) + "," +  //
                    " " + cvtToStr(mtaskp->cost()) + ");\n" +  //
                    "}\n");
     }
+    if (v3Global.opt.profThreads()) {
+        // No lock around startCounter, as counter numbers are unique per thread
+        addStrStmt("vlSymsp->_vm_profiler.startCounter(" + cvtToStr(mtaskp->profilerId())
+                   + ");\n");
+    }
 
     //
     addStrStmt("Verilated::mtaskId(" + cvtToStr(mtaskp->id()) + ");\n");
@@ -2725,10 +2878,12 @@ static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t th
     funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack());
 
     if (v3Global.opt.profThreads()) {
-        // Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
-        addStrStmt("if (VL_UNLIKELY(" + recName + ")) {\n" +  //
-                   recName + "->endRecord(VL_RDTSC_Q() - vlSymsp->__Vm_profile_cycle_start);\n"
-                   + "}\n");
+        // No lock around stopCounter, as counter numbers are unique per thread
+        addStrStmt("vlSymsp->_vm_profiler.stopCounter(" + cvtToStr(mtaskp->profilerId()) + ");\n");
+    }
+    if (v3Global.opt.profThreads()) {
+        addStrStmt("if (VL_UNLIKELY(" + recName + ")) "  //
+                   + recName + "->endRecord(VL_RDTSC_Q());\n");
     }
 
     // Flush message queue
@@ -2852,9 +3007,10 @@ void V3Partition::finalize() {
     // V3LifePost) that can change the cost of logic within each mtask.
     // Now that logic is final, recompute the cost and priority of each
     // ExecMTask.
+    fillinCosts(execGraphp->mutableDepGraphp());
     finalizeCosts(execGraphp->mutableDepGraphp());
 
-    // Replace the graph body with it's multi-threaded implementation.
+    // Replace the graph body with its multi-threaded implementation.
     implementExecGraph(execGraphp);
 }
 
diff --git a/src/V3Partition.h b/src/V3Partition.h
index 0c9ca80d2..8d22d740a 100644
--- a/src/V3Partition.h
+++ b/src/V3Partition.h
@@ -50,6 +50,7 @@ public:
     void go(V3Graph* mtasksp);
 
     static void selfTest();
+    static void selfTestNormalizeCosts();
 
     // Print out a hash of the shape of graphp.  Only needed to debug the
     // origin of some nondeterminism; otherwise this is pretty useless.
diff --git a/src/V3PartitionGraph.h b/src/V3PartitionGraph.h
index 24e081d90..d4518bb23 100644
--- a/src/V3PartitionGraph.h
+++ b/src/V3PartitionGraph.h
@@ -56,11 +56,14 @@ class ExecMTask final : public AbstractMTask {
 private:
     AstMTaskBody* const m_bodyp;  // Task body
     const uint32_t m_id;  // Unique id of this mtask.
+    string m_hashName;  // Hashed name for profile-driven optimization
     uint32_t m_priority = 0;  // Predicted critical path from the start of
                               // this mtask to the ends of the graph that are reachable from this
                               // mtask. In abstract time units.
     uint32_t m_cost = 0;  // Predicted runtime of this mtask, in the same
                           // abstract time units as priority().
+    uint64_t m_predictStart = 0;  // Predicted start time of task
+    uint64_t m_profilerId = 0;  // VerilatedCounter number for profiling
     VL_UNCOPYABLE(ExecMTask);
 
 public:
@@ -74,11 +77,17 @@ public:
     void priority(uint32_t pri) { m_priority = pri; }
     virtual uint32_t cost() const override { return m_cost; }
     void cost(uint32_t cost) { m_cost = cost; }
+    void predictStart(vluint64_t time) { m_predictStart = time; }
+    vluint64_t predictStart() const { return m_predictStart; }
+    void profilerId(vluint64_t id) { m_profilerId = id; }
+    vluint64_t profilerId() const { return m_profilerId; }
     string cFuncName() const {
         // If this MTask maps to a C function, this should be the name
         return string("__Vmtask") + "__" + cvtToStr(m_id);
     }
     virtual string name() const override { return string("mt") + cvtToStr(id()); }
+    string hashName() const { return m_hashName; }
+    void hashName(const string& name) { m_hashName = name; }
     void dump(std::ostream& str) const {
         str << name() << "." << cvtToHex(this);
         if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
diff --git a/src/V3PreProc.cpp b/src/V3PreProc.cpp
index 02dbeeaa4..c3a43f880 100644
--- a/src/V3PreProc.cpp
+++ b/src/V3PreProc.cpp
@@ -824,9 +824,7 @@ void V3PreProcImp::openFile(FileLine*, VInFilter* filterp, const string& filenam
         for (const char* cp = sp; cp < ep; cp++) {
             if (VL_UNLIKELY(*cp == '\r' || *cp == '\0')) {
                 strip = true;
-                break;
-            }
-            if (VL_UNLIKELY(*cp == '\n')) {
+            } else if (VL_UNLIKELY(*cp == '\n')) {
                 eof_newline = 0;
                 ++eof_lineno;
             } else {
@@ -853,7 +851,7 @@ void V3PreProcImp::openFile(FileLine*, VInFilter* filterp, const string& filenam
         FileLine* fl = new FileLine{flsp};
         fl->contentLineno(eof_lineno);
         fl->column(eof_newline + 1, eof_newline + 1);
-        fl->v3warn(EOFNEWLINE, "Missing newline at end of file (POSIX 3.206)."
+        fl->v3warn(EOFNEWLINE, "Missing newline at end of file (POSIX 3.206).\n"
                                    << fl->warnMore() << "... Suggest add newline.");
     }
 }
diff --git a/src/V3Waiver.cpp b/src/V3Waiver.cpp
index 7e96d7ca3..2e08c57c8 100644
--- a/src/V3Waiver.cpp
+++ b/src/V3Waiver.cpp
@@ -24,8 +24,11 @@
 void V3Waiver::addEntry(V3ErrorCode errorCode, const std::string& filename,
                         const std::string& str) {
     std::stringstream entry;
+    const size_t pos = str.find('\n');
     entry << "lint_off -rule " << errorCode.ascii() << " -file \"*" << filename << "\" -match \""
-          << str << "\"";
+          << str.substr(0, pos);
+    if (pos != std::string::npos) entry << "*";
+    entry << "\"";
     s_waiverList.push_back(entry.str());
 }
 
diff --git a/src/V3Width.cpp b/src/V3Width.cpp
index 24b8e9c12..a3b912670 100644
--- a/src/V3Width.cpp
+++ b/src/V3Width.cpp
@@ -1179,7 +1179,6 @@ private:
                 return;
             }
         }
-        nodep->backp()->dumpTree(cout, "-FIXME-tr ");
         nodep->v3warn(E_UNSUPPORTED, "Unsupported/illegal unbounded ('$') in this context.");
     }
     virtual void visit(AstIsUnbounded* nodep) override {
@@ -2196,12 +2195,13 @@ private:
     virtual void visit(AstInitArray* nodep) override {
         // InitArray has type of the array; children are array values
         if (m_vup->prelim()) {  // First stage evaluation
-            AstNodeDType* vdtypep = m_vup->dtypep();
+            AstNodeDType* const vdtypep = m_vup->dtypeNullp();
             UASSERT_OBJ(vdtypep, nodep, "InitArray type not assigned by AstPattern/Var visitor");
             nodep->dtypep(vdtypep);
-            if (AstNodeArrayDType* arrayp = VN_CAST(vdtypep->skipRefp(), NodeArrayDType)) {
+            if (AstNodeArrayDType* const arrayp = VN_CAST(vdtypep->skipRefp(), NodeArrayDType)) {
                 userIterateChildren(nodep, WidthVP(arrayp->subDTypep(), BOTH).p());
             } else {
+                UINFO(1, "dtype object " << vdtypep->skipRefp() << endl);
                 nodep->v3fatalSrc("InitArray on non-array");
             }
         }
@@ -3191,7 +3191,7 @@ private:
     }
 
     virtual void visit(AstNew* nodep) override {
-        if (nodep->didWidthAndSet()) return;
+        if (nodep->didWidth()) return;
         AstClassRefDType* refp
             = m_vup ? VN_CAST(m_vup->dtypeNullSkipRefp(), ClassRefDType) : nullptr;
         if (!refp) {  // e.g. int a = new;
diff --git a/src/Verilator.cpp b/src/Verilator.cpp
index 28400d9e4..6bde19f70 100644
--- a/src/Verilator.cpp
+++ b/src/Verilator.cpp
@@ -590,6 +590,7 @@ static void verilate(const string& argString) {
         V3TSP::selfTest();
         V3ScoreboardBase::selfTest();
         V3Partition::selfTest();
+        V3Partition::selfTestNormalizeCosts();
         V3Broken::selfTest();
     }
 
diff --git a/src/astgen b/src/astgen
index a2cf43088..6328e4adc 100755
--- a/src/astgen
+++ b/src/astgen
@@ -516,52 +516,23 @@ def write_visitor(filename):
 def write_impl(filename):
     with open_file(filename) as fh:
         fh.write("\n")
-        fh.write("    // These for use by VN_IS only\n")
+        fh.write("// For internal use. They assume argument is not nullptr.\n")
         for typen in sorted(Classes.keys()):
-            fh.write("template<> inline bool AstNode::privateIs<Ast" + typen +
-                     ">(const AstNode* nodep) { ")
+            fh.write("template<> inline bool AstNode::privateTypeTest<Ast" +
+                     typen + ">(const AstNode* nodep) { ")
             if typen == "Node":
-                fh.write("return nodep != NULL; ")
+                fh.write("return true; ")
             else:
-                fh.write("return nodep && ")
+                fh.write("return ")
                 if re.search(r'^Node', typen):
                     fh.write(
-                        "(static_cast<int>(nodep->type()) >= static_cast<int>(AstType::first"
-                        + typen + ")) && ")
+                        "static_cast<int>(nodep->type()) >= static_cast<int>(AstType::first"
+                        + typen + ") && ")
                     fh.write(
-                        "(static_cast<int>(nodep->type()) <= static_cast<int>(AstType::last"
-                        + typen + ")); ")
+                        "static_cast<int>(nodep->type()) <= static_cast<int>(AstType::last"
+                        + typen + "); ")
                 else:
-                    fh.write(
-                        "(static_cast<int>(nodep->type()) == static_cast<int>(AstType::at"
-                        + typen + ")); ")
-            fh.write("}\n")
-
-        fh.write("    // These for use by VN_CAST macro only\n")
-        for typen in sorted(Classes.keys()):
-            fh.write("template<> inline Ast" + typen +
-                     "* AstNode::privateCast<Ast" + typen +
-                     ">(AstNode* nodep) { ")
-            if typen == "Node":
-                fh.write("return nodep; ")
-            else:
-                fh.write("return AstNode::privateIs<Ast" + typen +
-                         ">(nodep) ? ")
-                fh.write("reinterpret_cast<Ast" + typen + "*>(nodep) : NULL; ")
-            fh.write("}\n")
-
-        fh.write("    // These for use by VN_CAST_CONST macro only\n")
-        for typen in sorted(Classes.keys()):
-            fh.write("template<> inline const Ast" + typen +
-                     "* AstNode::privateConstCast<Ast" + typen +
-                     ">(const AstNode* nodep) { ")
-            if typen == "Node":
-                fh.write("return nodep; ")
-            else:
-                fh.write("return AstNode::privateIs<Ast" + typen +
-                         ">(nodep) ? ")
-                fh.write("reinterpret_cast<const Ast" + typen +
-                         "*>(nodep) : NULL; ")
+                    fh.write("nodep->type() == AstType::at" + typen + "; ")
             fh.write("}\n")
 
 
diff --git a/src/verilog.l b/src/verilog.l
index 3c831abea..166d9b0f3 100644
--- a/src/verilog.l
+++ b/src/verilog.l
@@ -121,6 +121,7 @@ vnum    {vnum1}|{vnum2}|{vnum3}|{vnum4}|{vnum5}
   "no_clocker"          { FL; return yVLT_NO_CLOCKER; }
   "no_inline"           { FL; return yVLT_NO_INLINE; }
   "parallel_case"       { FL; return yVLT_PARALLEL_CASE; }
+  "profile_data"        { FL; return yVLT_PROFILE_DATA; }
   "public"              { FL; return yVLT_PUBLIC; }
   "public_flat"         { FL; return yVLT_PUBLIC_FLAT; }
   "public_flat_rd"      { FL; return yVLT_PUBLIC_FLAT_RD; }
@@ -133,12 +134,15 @@ vnum    {vnum1}|{vnum2}|{vnum3}|{vnum4}|{vnum5}
   "tracing_on"          { FL; return yVLT_TRACING_ON; }
 
   -?"-block"            { FL; return yVLT_D_BLOCK; }
+  -?"-cost"             { FL; return yVLT_D_COST; }
   -?"-file"             { FL; return yVLT_D_FILE; }
   -?"-function"         { FL; return yVLT_D_FUNCTION; }
   -?"-lines"            { FL; return yVLT_D_LINES; }
   -?"-match"            { FL; return yVLT_D_MATCH; }
+  -?"-model"            { FL; return yVLT_D_MODEL; }
   -?"-module"           { FL; return yVLT_D_MODULE; }
   -?"-msg"              { FL; return yVLT_D_MSG; }
+  -?"-mtask"            { FL; return yVLT_D_MTASK; }
   -?"-rule"             { FL; return yVLT_D_RULE; }
   -?"-task"             { FL; return yVLT_D_TASK; }
   -?"-var"              { FL; return yVLT_D_VAR; }
diff --git a/src/verilog.y b/src/verilog.y
index 2d56fa9a7..39f45bb57 100644
--- a/src/verilog.y
+++ b/src/verilog.y
@@ -103,7 +103,7 @@ public:
         return nodep;
     }
     AstNode* createGatePin(AstNode* exprp) {
-        AstRange* rangep = m_gateRangep;
+        AstRange* const rangep = m_gateRangep;
         if (!rangep) {
             return exprp;
         } else {
@@ -112,14 +112,14 @@ public:
     }
     AstNode* createTypedef(FileLine* fl, const string& name, AstNode* attrsp, AstNodeDType* basep,
                            AstNodeRange* rangep) {
-        AstNode* nodep = new AstTypedef(fl, name, attrsp, VFlagChildDType(),
-                                        GRAMMARP->createArray(basep, rangep, false));
+        AstNode* const nodep = new AstTypedef{fl, name, attrsp, VFlagChildDType{},
+                                              GRAMMARP->createArray(basep, rangep, false)};
         SYMP->reinsert(nodep);
         PARSEP->tagNodep(nodep);
         return nodep;
     }
     AstNode* createTypedefFwd(FileLine* fl, const string& name) {
-        AstNode* nodep = new AstTypedefFwd(fl, name);
+        AstNode* const nodep = new AstTypedefFwd{fl, name};
         SYMP->reinsert(nodep);
         PARSEP->tagNodep(nodep);
         return nodep;
@@ -166,12 +166,12 @@ public:
                     finalp->unlinkFrBack();
                     rangearraysp = rangesp;
                 }
-                if (AstRange* finalRangep = VN_CAST(finalp, Range)) {  // not an UnsizedRange
+                if (AstRange* const finalRangep = VN_CAST(finalp, Range)) {  // not an UnsizedRange
                     if (dtypep->implicit()) {
                         // It's no longer implicit but a wire logic type
-                        AstBasicDType* newp = new AstBasicDType(
+                        AstBasicDType* const newp = new AstBasicDType{
                             dtypep->fileline(), AstBasicDTypeKwd::LOGIC, dtypep->numeric(),
-                            dtypep->width(), dtypep->widthMin());
+                            dtypep->width(), dtypep->widthMin()};
                         VL_DO_DANGLING(dtypep->deleteTree(), dtypep);
                         dtypep = newp;
                     }
@@ -261,7 +261,7 @@ int V3ParseGrammar::s_modTypeImpNum = 0;
 static void ERRSVKWD(FileLine* fileline, const string& tokname) {
     static int toldonce = 0;
     fileline->v3error(
-        string("Unexpected '") + tokname + "': '" + tokname
+        std::string{"Unexpected '"} + tokname + "': '" + tokname
         + "' is a SystemVerilog keyword misused as an identifier."
         + (!toldonce++ ? "\n" + V3Error::warnMore()
                              + "... Suggest modify the Verilog-2001 code to avoid SV keywords,"
@@ -278,19 +278,6 @@ static void UNSUPREAL(FileLine* fileline) {
 
 void yyerror(const char* errmsg) { PARSEP->bisonLastFileline()->v3error(errmsg); }
 
-void yyerrorf(const char* format, ...) {
-    const int maxlen = 2000;
-    char msg[maxlen];
-
-    va_list ap;
-    va_start(ap, format);
-    VL_VSNPRINTF(msg, maxlen, format, ap);
-    msg[maxlen - 1] = '\0';
-    va_end(ap);
-
-    yyerror(msg);
-}
-
 //======================================================================
 
 class AstSenTree;
@@ -363,6 +350,7 @@ BISONPRE_VERSION(3.7,%define api.header.include {"V3ParseBison.h"})
 %token<fl>              yVLT_NO_CLOCKER             "no_clocker"
 %token<fl>              yVLT_NO_INLINE              "no_inline"
 %token<fl>              yVLT_PARALLEL_CASE          "parallel_case"
+%token<fl>              yVLT_PROFILE_DATA           "profile_data"
 %token<fl>              yVLT_PUBLIC                 "public"
 %token<fl>              yVLT_PUBLIC_FLAT            "public_flat"
 %token<fl>              yVLT_PUBLIC_FLAT_RD         "public_flat_rd"
@@ -375,12 +363,15 @@ BISONPRE_VERSION(3.7,%define api.header.include {"V3ParseBison.h"})
 %token<fl>              yVLT_TRACING_ON             "tracing_on"
 
 %token<fl>              yVLT_D_BLOCK    "--block"
+%token<fl>              yVLT_D_COST     "--cost"
 %token<fl>              yVLT_D_FILE     "--file"
 %token<fl>              yVLT_D_FUNCTION "--function"
 %token<fl>              yVLT_D_LINES    "--lines"
-%token<fl>              yVLT_D_MODULE   "--module"
 %token<fl>              yVLT_D_MATCH    "--match"
+%token<fl>              yVLT_D_MODEL    "--model"
+%token<fl>              yVLT_D_MODULE   "--module"
 %token<fl>              yVLT_D_MSG      "--msg"
+%token<fl>              yVLT_D_MTASK    "--mtask"
 %token<fl>              yVLT_D_RULE     "--rule"
 %token<fl>              yVLT_D_TASK     "--task"
 %token<fl>              yVLT_D_VAR      "--var"
@@ -1127,7 +1118,7 @@ package_import_itemList<nodep>:
 package_import_item<nodep>:	// ==IEEE: package_import_item
 		idCC/*package_identifier*/ yP_COLONCOLON package_import_itemObj
 			{
-			  if (!VN_CAST($<scp>1, Package)) {
+			  if (!VN_IS($<scp>1, Package)) {
 			      $$ = nullptr;
 			      $<fl>1->v3error("Importing from missing package '" << *$<strp>1 << "'");
 			  } else {
@@ -1137,8 +1128,8 @@ package_import_item<nodep>:	// ==IEEE: package_import_item
 	;
 
 package_import_itemObj<strp>:	// IEEE: part of package_import_item
-		idAny/*package_identifier*/		{ $<fl>$=$<fl>1; $$=$1; }
-	|	'*'					{ $<fl>$=$<fl>1; static string star="*"; $$=&star; }
+		idAny/*package_identifier*/		{ $<fl>$ = $<fl>1; $$ = $1; }
+	|	'*'					{ $<fl>$ = $<fl>1; static string star = "*"; $$ = &star; }
 	;
 
 package_export_declaration<nodep>: // IEEE: package_export_declaration
@@ -1166,7 +1157,8 @@ module_declaration:		// ==IEEE: module_declaration
 		modFront importsAndParametersE portsStarE ';'
 	/*cont*/    module_itemListE yENDMODULE endLabelE
 			{ $1->modTrace(GRAMMARP->allTracingOn($1->fileline()));  // Stash for implicit wires, etc
-			  if ($2) $1->addStmtp($2); if ($3) $1->addStmtp($3);
+			  if ($2) $1->addStmtp($2);
+			  if ($3) $1->addStmtp($3);
 			  if ($5) $1->addStmtp($5);
 			  GRAMMARP->m_modp = nullptr;
 			  SYMP->popScope($1);
@@ -1174,7 +1166,8 @@ module_declaration:		// ==IEEE: module_declaration
 	|	udpFront parameter_port_listE portsStarE ';'
 	/*cont*/    module_itemListE yENDPRIMITIVE endLabelE
 			{ $1->modTrace(false);  // Stash for implicit wires, etc
-			  if ($2) $1->addStmtp($2); if ($3) $1->addStmtp($3);
+			  if ($2) $1->addStmtp($2);
+			  if ($3) $1->addStmtp($3);
 			  if ($5) $1->addStmtp($5);
 			  GRAMMARP->m_tracingParse = true;
 			  GRAMMARP->m_modp = nullptr;
@@ -1285,7 +1278,7 @@ list_of_ports<nodep>:		// IEEE: list_of_ports + list_of_port_declarations
 portAndTagE<nodep>:
 		/* empty */
 			{ int p = PINNUMINC();
-			   const string name = "__pinNumber" + cvtToStr(p);
+			  const string name = "__pinNumber" + cvtToStr(p);
 			  $$ = new AstPort{CRELINE(), p, name};
 			  AstVar* varp = new AstVar{CRELINE(), AstVarType::PORT, name, VFlagChildDType{},
 			                            new AstBasicDType{CRELINE(), LOGIC_IMPLICIT}};
@@ -1377,13 +1370,13 @@ port<nodep>:			// ==IEEE: port
 			{ $$=$2; /*VARDTYPE-same*/ $$->addNextNull(VARDONEP($$,$3,$4)); }
 	//
 	|	portDirNetE data_type           portSig variable_dimensionListE sigAttrListE '=' constExpr
-			{ $$=$3; VARDTYPE($2); if (AstVar* vp=VARDONEP($$,$4,$5)) { $$->addNextNull(vp); vp->valuep($7); } }
+			{ $$=$3; VARDTYPE($2); if (AstVar* vp = VARDONEP($$, $4, $5)) { $$->addNextNull(vp); vp->valuep($7); } }
 	|	portDirNetE yVAR data_type      portSig variable_dimensionListE sigAttrListE '=' constExpr
-			{ $$=$4; VARDTYPE($3); if (AstVar* vp=VARDONEP($$,$5,$6)) { $$->addNextNull(vp); vp->valuep($8); } }
+			{ $$=$4; VARDTYPE($3); if (AstVar* vp = VARDONEP($$, $5, $6)) { $$->addNextNull(vp); vp->valuep($8); } }
 	|	portDirNetE yVAR implicit_typeE portSig variable_dimensionListE sigAttrListE '=' constExpr
-			{ $$=$4; VARDTYPE($3); if (AstVar* vp=VARDONEP($$,$5,$6)) { $$->addNextNull(vp); vp->valuep($8); } }
+			{ $$=$4; VARDTYPE($3); if (AstVar* vp = VARDONEP($$, $5, $6)) { $$->addNextNull(vp); vp->valuep($8); } }
 	|	portDirNetE /*implicit*/        portSig variable_dimensionListE sigAttrListE '=' constExpr
-			{ $$=$2; /*VARDTYPE-same*/ if (AstVar* vp=VARDONEP($$,$3,$4)) { $$->addNextNull(vp); vp->valuep($6); } }
+			{ $$=$2; /*VARDTYPE-same*/ if (AstVar* vp = VARDONEP($$, $3, $4)) { $$->addNextNull(vp); vp->valuep($6); } }
 	;
 
 portDirNetE:			// IEEE: part of port, optional net type and/or direction
@@ -1474,7 +1467,7 @@ interface_or_generate_item<nodep>:  // ==IEEE: interface_or_generate_item
 anonymous_program<nodep>:	// ==IEEE: anonymous_program
 	//			// See the spec - this doesn't change the scope, items still go up "top"
 		yPROGRAM ';' anonymous_program_itemListE yENDPROGRAM
-			{ BBUNSUP($<fl>1, "Unsupported: Anonymous programs"); $$ = nullptr; }
+			{ $$ = nullptr; BBUNSUP($<fl>1, "Unsupported: Anonymous programs"); }
 	;
 
 anonymous_program_itemListE<nodep>:	// IEEE: { anonymous_program_item }
@@ -1501,7 +1494,8 @@ program_declaration:		// IEEE: program_declaration + program_nonansi_header + pr
 		pgmFront parameter_port_listE portsStarE ';'
 	/*cont*/    program_itemListE yENDPROGRAM endLabelE
 			{ $1->modTrace(GRAMMARP->allTracingOn($1->fileline()));  // Stash for implicit wires, etc
-			  if ($2) $1->addStmtp($2); if ($3) $1->addStmtp($3);
+			  if ($2) $1->addStmtp($2);
+			  if ($3) $1->addStmtp($3);
 			  if ($5) $1->addStmtp($5);
 			  GRAMMARP->m_modp = nullptr;
 			  SYMP->popScope($1);
@@ -1628,7 +1622,8 @@ list_of_genvar_identifiers<nodep>:	// IEEE: list_of_genvar_identifiers (for decl
 
 genvar_identifierDecl<varp>:		// IEEE: genvar_identifier (for declaration)
 		id/*new-genvar_identifier*/ sigAttrListE
-			{ VARRESET_NONLIST(GENVAR); VARDTYPE(new AstBasicDType($<fl>1,AstBasicDTypeKwd::INTEGER));
+			{ VARRESET_NONLIST(GENVAR);
+			  VARDTYPE(new AstBasicDType($<fl>1, AstBasicDTypeKwd::INTEGER));
 			  $$ = VARDONEA($<fl>1, *$1, nullptr, $2); }
 	;
 
@@ -1725,11 +1720,11 @@ varParamReset:
 port_direction:			// ==IEEE: port_direction + tf_port_direction
 	//			// IEEE 19.8 just "input" FIRST forces type to wire - we'll ignore that here
 	//			// Only used for ANSI declarations
-		yINPUT					{ GRAMMARP->m_pinAnsi=true; VARIO(INPUT); }
-	|	yOUTPUT					{ GRAMMARP->m_pinAnsi=true; VARIO(OUTPUT); }
-	|	yINOUT					{ GRAMMARP->m_pinAnsi=true; VARIO(INOUT); }
-	|	yREF					{ GRAMMARP->m_pinAnsi=true; VARIO(REF); }
-	|	yCONST__REF yREF			{ GRAMMARP->m_pinAnsi=true; VARIO(CONSTREF); }
+		yINPUT					{ GRAMMARP->m_pinAnsi = true; VARIO(INPUT); }
+	|	yOUTPUT					{ GRAMMARP->m_pinAnsi = true; VARIO(OUTPUT); }
+	|	yINOUT					{ GRAMMARP->m_pinAnsi = true; VARIO(INOUT); }
+	|	yREF					{ GRAMMARP->m_pinAnsi = true; VARIO(REF); }
+	|	yCONST__REF yREF			{ GRAMMARP->m_pinAnsi = true; VARIO(CONSTREF); }
 	;
 
 port_directionReset:		// IEEE: port_direction that starts a port_declaraiton
@@ -1765,7 +1760,7 @@ port_declaration<nodep>:	// ==IEEE: port_declaration
 	/*mid*/		{ VARDTYPE_NDECL(new AstBasicDType($<fl>3, LOGIC_IMPLICIT, $3)); }
 	/*cont*/    list_of_variable_decl_assignments			{ $$ = $5; }
 	|	port_directionReset port_declNetE /*implicit*/
-	/*mid*/		{ VARDTYPE_NDECL(nullptr);/*default_nettype*/}
+	/*mid*/		{ VARDTYPE_NDECL(nullptr); /*default_nettype*/ }
 	/*cont*/    list_of_variable_decl_assignments			{ $$ = $4; }
 	//			// IEEE: interface_declaration
 	//			// Looks just like variable declaration unless has a period
@@ -1826,7 +1821,7 @@ simple_type<dtypep>:		// ==IEEE: simple_type
 	//			// Even though we looked up the type and have a AstNode* to it,
 	//			// we can't fully resolve it because it may have been just a forward definition.
 	|	packageClassScopeE idType
-			{ AstRefDType* refp = new AstRefDType($<fl>2, *$2, $1, nullptr);
+			{ AstRefDType* const refp = new AstRefDType{$<fl>2, *$2, $1, nullptr};
 			  $$ = refp; }
 	//
 	//			// { generate_block_identifer ... } '.'
@@ -1844,10 +1839,10 @@ data_type<dtypep>:		// ==IEEE: data_type
 	//			// IEEE: ps_covergroup_identifier
 	//			// Don't distinguish between types and classes so all these combined
 	|	packageClassScopeE idType packed_dimensionListE
-			{ AstRefDType* refp = new AstRefDType($<fl>2, *$2, $1, nullptr);
+			{ AstRefDType* const refp = new AstRefDType{$<fl>2, *$2, $1, nullptr};
 			  $$ = GRAMMARP->createArray(refp, $3, true); }
 	|	packageClassScopeE idType parameter_value_assignmentClass packed_dimensionListE
-			{ AstRefDType* refp = new AstRefDType($<fl>2, *$2, $1, $3);
+			{ AstRefDType* const refp = new AstRefDType{$<fl>2, *$2, $1, $3};
 			  $$ = GRAMMARP->createArray(refp, $4, true); }
 	;
 
@@ -1904,11 +1899,11 @@ struct_unionDecl<uorstructp>:	// IEEE: part of data_type
 		ySTRUCT        packedSigningE '{'
 	/*mid*/ 	{ $<uorstructp>$ = new AstStructDType($1, $2); SYMP->pushNew($<uorstructp>$); }
 	/*cont*/    struct_union_memberList '}'
-			{ $$=$<uorstructp>4; $$->addMembersp($5); SYMP->popScope($$); }
+			{ $$ = $<uorstructp>4; $$->addMembersp($5); SYMP->popScope($$); }
 	|	yUNION taggedE packedSigningE '{'
 	/*mid*/		{ $<uorstructp>$ = new AstUnionDType($1, $3); SYMP->pushNew($<uorstructp>$); }
 	/*cont*/    struct_union_memberList '}'
-			{ $$=$<uorstructp>5; $$->addMembersp($6); SYMP->popScope($$); }
+			{ $$ = $<uorstructp>5; $$->addMembersp($6); SYMP->popScope($$); }
 	;
 
 struct_union_memberList<nodep>:	// IEEE: { struct_union_member }
@@ -2493,13 +2488,14 @@ loop_generate_construct<nodep>:	// ==IEEE: loop_generate_construct
 			  AstBegin* lowerBegp = VN_CAST($9, Begin);
 			  UASSERT_OBJ(!($9 && !lowerBegp), $9, "Child of GENFOR should have been begin");
 
-			  if (!lowerBegp) lowerBegp = new AstBegin($1, "", nullptr, true, false);  // Empty body
-			  AstNode* lowerNoBegp = lowerBegp->stmtsp();
+			  if (!lowerBegp) lowerBegp = new AstBegin{$1, "", nullptr, true, false};  // Empty body
+			  AstNode* const lowerNoBegp = lowerBegp->stmtsp();
 			  if (lowerNoBegp) lowerNoBegp->unlinkFrBackWithNext();
 			  //
-			  AstBegin* blkp = new AstBegin($1, lowerBegp->name(), nullptr, true, true);
+			  AstBegin* const blkp = new AstBegin{$1, lowerBegp->name(), nullptr, true, true};
 			  // V3LinkDot detects BEGIN(GENFOR(...)) as a special case
-			  AstNode* initp = $3;  AstNode* varp = $3;
+			  AstNode* initp = $3;
+			  AstNode* const varp = $3;
 			  if (VN_IS(varp, Var)) {  // Genvar
 				initp = varp->nextp();
 				initp->unlinkFrBackWithNext();  // Detach 2nd from varp, make 1st init
@@ -2550,8 +2546,8 @@ case_generate_itemListE<nodep>:	// IEEE: [{ case_generate_itemList }]
 	;
 
 case_generate_itemList<nodep>:	// IEEE: { case_generate_itemList }
-		~c~case_generate_item			{ $$=$1; }
-	|	~c~case_generate_itemList ~c~case_generate_item	{ $$=$1; $1->addNext($2); }
+		~c~case_generate_item			{ $$ = $1; }
+	|	~c~case_generate_itemList ~c~case_generate_item		{ $$ = $1; $1->addNext($2); }
 	;
 
 //UNSUPc_case_generate_itemList<nodep>:  // IEEE: { case_generate_item } (for checkers)
@@ -2559,9 +2555,9 @@ case_generate_itemList<nodep>:	// IEEE: { case_generate_itemList }
 //UNSUP	;
 
 case_generate_item<nodep>:	// ==IEEE: case_generate_item
-		caseCondList colon generate_block_or_null		{ $$ = new AstCaseItem($2,$1,$3); }
-	|	yDEFAULT colon generate_block_or_null		{ $$ = new AstCaseItem($1,nullptr,$3); }
-	|	yDEFAULT generate_block_or_null			{ $$ = new AstCaseItem($1,nullptr,$2); }
+		caseCondList colon generate_block_or_null	{ $$ = new AstCaseItem{$2, $1, $3}; }
+	|	yDEFAULT colon generate_block_or_null		{ $$ = new AstCaseItem{$1, nullptr, $3}; }
+	|	yDEFAULT generate_block_or_null			{ $$ = new AstCaseItem{$1, nullptr, $2}; }
 	;
 
 //UNSUPc_case_generate_item<nodep>:  // IEEE: case_generate_item (for checkers)
@@ -2635,8 +2631,8 @@ netSig<varp>:			// IEEE: net_decl_assignment -  one element from list_of_port_id
 	;
 
 netId<strp>:
-		id/*new-net*/				{ $$ = $1; $<fl>$=$<fl>1; }
-	|	idSVKwd					{ $$ = $1; $<fl>$=$<fl>1; }
+		id/*new-net*/				{ $$ = $1; $<fl>$ = $<fl>1; }
+	|	idSVKwd					{ $$ = $1; $<fl>$ = $<fl>1; }
 	;
 
 sigAttrListE<nodep>:
@@ -2677,7 +2673,7 @@ rangeList<rangep>:		// IEEE: {packed_dimension}
 
 //UNSUPbit_selectE<fl>:  // IEEE: constant_bit_select (IEEE included empty)
 //UNSUP		/* empty */				{ $$ = nullptr; }
-//UNSUP	|	'[' constExpr ']'			{ $<fl>$=$<fl>1; $$ = "["+$2+"]"; }
+//UNSUP	|	'[' constExpr ']'			{ $<fl>$ = $<fl>1; $$ = "[" + $2 + "]"; }
 //UNSUP	;
 
 // IEEE: select
@@ -2712,12 +2708,13 @@ param_assignment<varp>:		// ==IEEE: param_assignment
 		id/*new-parameter*/ variable_dimensionListE sigAttrListE exprOrDataTypeEqE
 			{ // To handle  #(type A=int, B=A) and properly imply B
                           // as a type (for parsing) we need to detect "A" is a type
-			  if (AstNodeDType* refp = VN_CAST($4, NodeDType)) {
-			    if (VSymEnt* foundp = SYMP->symCurrentp()->findIdFallback(refp->name())) {
+			  if (AstNodeDType* const refp = VN_CAST($4, NodeDType)) {
+			    if (VSymEnt* const foundp = SYMP->symCurrentp()->findIdFallback(refp->name())) {
 				UINFO(9, "declaring type via param assignment" << foundp->nodep() << endl);
-				VARDTYPE(new AstParseTypeDType($<fl>1))
+				VARDTYPE(new AstParseTypeDType{$<fl>1})
 				SYMP->reinsert(foundp->nodep()->cloneTree(false), nullptr, *$1); }}
-			  $$ = VARDONEA($<fl>1, *$1, $2, $3); if ($4) $$->valuep($4); }
+			  $$ = VARDONEA($<fl>1, *$1, $2, $3);
+			  if ($4) $$->valuep($4); }
 	;
 
 list_of_param_assignments<varp>:	// ==IEEE: list_of_param_assignments
@@ -2765,8 +2762,9 @@ instDecl<nodep>:
 	//      		// Currently disambiguated from data_declaration based on
 	//			// VARs being type, and cells non-type.
 	//			// IEEE requires a '(' to disambiguate, we need TODO force this
-		id parameter_value_assignmentE {INSTPREP($<fl>1,*$1,$2);} instnameList ';'
-			{ $$ = $4; GRAMMARP->m_impliedDecl=false;
+		id parameter_value_assignmentE {INSTPREP($<fl>1, *$1, $2);} instnameList ';'
+			{ $$ = $4;
+			  GRAMMARP->m_impliedDecl = false;
 			  if (GRAMMARP->m_instParamp) {
 			      VL_DO_CLEAR(GRAMMARP->m_instParamp->deleteTree(),
 					  GRAMMARP->m_instParamp = nullptr);
@@ -2851,10 +2849,10 @@ cellparamItemE<pinp>:		// IEEE: named_parameter_assignment + empty
 	|	yP_DOTSTAR				{ $$ = new AstPin($1,PINNUMINC(),".*",nullptr); }
 	|	'.' idSVKwd				{ $$ = new AstPin($<fl>2,PINNUMINC(), *$2,
 									  new AstParseRef($<fl>2,VParseRefExp::PX_TEXT,*$2,nullptr,nullptr));
-									  $$->svImplicit(true);}
+									  $$->svImplicit(true); }
 	|	'.' idAny				{ $$ = new AstPin($<fl>2,PINNUMINC(), *$2,
 									  new AstParseRef($<fl>2,VParseRefExp::PX_TEXT,*$2,nullptr,nullptr));
-									  $$->svImplicit(true);}
+									  $$->svImplicit(true); }
 	|	'.' idAny '(' ')'			{ $$ = new AstPin($<fl>2,PINNUMINC(),*$2,nullptr); }
 	//			// mintypmax is expanded here, as it might be a UDP or gate primitive
 	//			// data_type for 'parameter type' hookups
@@ -3057,7 +3055,7 @@ block_item_declaration<nodep>:	// ==IEEE: block_item_declaration
 
 stmtList<nodep>:
 		stmtBlock				{ $$ = $1; }
-	|	stmtList stmtBlock			{ $$ = ($2==nullptr)?($1):($1->addNext($2)); }
+	|	stmtList stmtBlock			{ $$ = $2 ? $1->addNext($2) : $1; }
 	;
 
 stmt<nodep>:			// IEEE: statement_or_null == function_statement_or_null
@@ -3101,26 +3099,27 @@ statement_item<nodep>:		// IEEE: statement_item
 							  if ($1 == uniq_UNIQUE0) $2->unique0Pragma(true);
 							  if ($1 == uniq_PRIORITY) $2->priorityPragma(true); }
 	//UNSUP	caseStart caseAttrE yMATCHES case_patternListE yENDCASE	{ }
-	|	unique_priorityE caseStart caseAttrE yINSIDE case_insideListE yENDCASE	{ $$ = $2; if ($5) $2->addItemsp($5);
-							  if (!$2->caseSimple()) $2->v3error("Illegal to have inside on a casex/casez");
-							  $2->caseInsideSet();
-							  if ($1 == uniq_UNIQUE) $2->uniquePragma(true);
-							  if ($1 == uniq_UNIQUE0) $2->unique0Pragma(true);
-							  if ($1 == uniq_PRIORITY) $2->priorityPragma(true); }
+	|	unique_priorityE caseStart caseAttrE yINSIDE case_insideListE yENDCASE
+			{ $$ = $2; if ($5) $2->addItemsp($5);
+			  if (!$2->caseSimple()) $2->v3error("Illegal to have inside on a casex/casez");
+			  $2->caseInsideSet();
+			  if ($1 == uniq_UNIQUE) $2->uniquePragma(true);
+			  if ($1 == uniq_UNIQUE0) $2->unique0Pragma(true);
+			  if ($1 == uniq_PRIORITY) $2->priorityPragma(true); }
 	//
 	//			// IEEE: conditional_statement
 	|	unique_priorityE yIF '(' expr ')' stmtBlock	%prec prLOWER_THAN_ELSE
-							{ AstIf* newp = new AstIf($2,$4,$6,nullptr);
-							  $$ = newp;
-							  if ($1 == uniq_UNIQUE) newp->uniquePragma(true);
-							  if ($1 == uniq_UNIQUE0) newp->unique0Pragma(true);
-							  if ($1 == uniq_PRIORITY) newp->priorityPragma(true); }
+			{ AstIf* const newp = new AstIf{$2, $4, $6, nullptr};
+			  $$ = newp;
+			  if ($1 == uniq_UNIQUE) newp->uniquePragma(true);
+			  if ($1 == uniq_UNIQUE0) newp->unique0Pragma(true);
+			  if ($1 == uniq_PRIORITY) newp->priorityPragma(true); }
 	|	unique_priorityE yIF '(' expr ')' stmtBlock yELSE stmtBlock
-							{ AstIf* newp = new AstIf($2,$4,$6,$8);
-							  $$ = newp;
-							  if ($1 == uniq_UNIQUE) newp->uniquePragma(true);
-							  if ($1 == uniq_UNIQUE0) newp->unique0Pragma(true);
-							  if ($1 == uniq_PRIORITY) newp->priorityPragma(true); }
+			{ AstIf* const newp = new AstIf{$2, $4, $6, $8};
+			  $$ = newp;
+			  if ($1 == uniq_UNIQUE) newp->uniquePragma(true);
+			  if ($1 == uniq_UNIQUE0) newp->unique0Pragma(true);
+			  if ($1 == uniq_PRIORITY) newp->priorityPragma(true); }
 	//
 	|	finc_or_dec_expression ';'		{ $$ = $1; }
 	//			// IEEE: inc_or_dec_expression
@@ -3133,15 +3132,15 @@ statement_item<nodep>:		// IEEE: statement_item
 	//			// so parse as if task
 	//			// Alternative would be shim with new AstVoidStmt.
 	|	yVOID yP_TICK '(' task_subroutine_callNoMethod ')' ';'
-							{ $$ = $4;
-							  FileLine* newfl = new FileLine($$->fileline());
-							  newfl->warnOff(V3ErrorCode::IGNOREDRETURN, true);
-							  $$->fileline(newfl); }
+			{ $$ = $4;
+			  FileLine* const newfl = new FileLine{$$->fileline()};
+			  newfl->warnOff(V3ErrorCode::IGNOREDRETURN, true);
+			  $$->fileline(newfl); }
 	|	yVOID yP_TICK '(' expr '.' task_subroutine_callNoMethod ')' ';'
-							{ $$ = new AstDot($5, false, $4, $6);
-							  FileLine* newfl = new FileLine($6->fileline());
-							  newfl->warnOff(V3ErrorCode::IGNOREDRETURN, true);
-							  $6->fileline(newfl); }
+			{ $$ = new AstDot{$5, false, $4, $6};
+			  FileLine* const newfl = new FileLine{$6->fileline()};
+			  newfl->warnOff(V3ErrorCode::IGNOREDRETURN, true);
+			  $6->fileline(newfl); }
 	//			// Expr included here to resolve our not knowing what is a method call
 	//			// Expr here must result in a subroutine_call
 	|	task_subroutine_callNoMethod ';'	{ $$ = $1; }
@@ -3171,8 +3170,8 @@ statement_item<nodep>:		// IEEE: statement_item
 	//
 	//			// IEEE: loop_statement
 	|	yFOREVER stmtBlock			{ $$ = new AstWhile($1,new AstConst($1, AstConst::BitTrue()), $2); }
-	|	yREPEAT '(' expr ')' stmtBlock		{ $$ = new AstRepeat($1,$3,$5);}
-	|	yWHILE '(' expr ')' stmtBlock		{ $$ = new AstWhile($1,$3,$5);}
+	|	yREPEAT '(' expr ')' stmtBlock		{ $$ = new AstRepeat{$1, $3, $5}; }
+	|	yWHILE '(' expr ')' stmtBlock		{ $$ = new AstWhile{$1, $3, $5}; }
 	//			// for's first ';' is in for_initialization
 	|	statementFor				{ $$ = $1; }
 	|	yDO stmtBlock yWHILE '(' expr ')' ';'	{ if ($2) {
@@ -3275,11 +3274,15 @@ foperator_assignment<nodep>:	// IEEE: operator_assignment (for first part of exp
 
 inc_or_dec_expression<nodep>:	// ==IEEE: inc_or_dec_expression
 	//			// Need fexprScope instead of variable_lvalue to prevent conflict
-		~l~exprScope yP_PLUSPLUS		{ $<fl>$=$<fl>1; $$ = new AstPostAdd($2, new AstConst($2, AstConst::StringToParse(), "'b1"), $1, $1->cloneTree(true)); }
-	|	~l~exprScope yP_MINUSMINUS		{ $<fl>$=$<fl>1; $$ = new AstPostSub($2, new AstConst($2, AstConst::StringToParse(), "'b1"), $1, $1->cloneTree(true)); }
+		~l~exprScope yP_PLUSPLUS
+			{ $<fl>$ = $<fl>1; $$ = new AstPostAdd{$2, new AstConst{$2, AstConst::StringToParse(), "'b1"}, $1, $1->cloneTree(true)}; }
+	|	~l~exprScope yP_MINUSMINUS
+			{ $<fl>$ = $<fl>1; $$ = new AstPostSub{$2, new AstConst{$2, AstConst::StringToParse(), "'b1"}, $1, $1->cloneTree(true)}; }
 	//			// Need expr instead of variable_lvalue to prevent conflict
-	|	yP_PLUSPLUS	expr			{ $<fl>$=$<fl>1; $$ = new AstPreAdd($1, new AstConst($1, AstConst::StringToParse(), "'b1"), $2, $2->cloneTree(true)); }
-	|	yP_MINUSMINUS	expr			{ $<fl>$=$<fl>1; $$ = new AstPreSub($1, new AstConst($1, AstConst::StringToParse(), "'b1"), $2, $2->cloneTree(true)); }
+	|	yP_PLUSPLUS	expr
+			{ $<fl>$ = $<fl>1; $$ = new AstPreAdd{$1, new AstConst{$1, AstConst::StringToParse(), "'b1"}, $2, $2->cloneTree(true)}; }
+	|	yP_MINUSMINUS	expr
+			{ $<fl>$ = $<fl>1; $$ = new AstPreSub{$1, new AstConst{$1, AstConst::StringToParse(), "'b1"}, $2, $2->cloneTree(true)}; }
 	;
 
 finc_or_dec_expression<nodep>:	// ==IEEE: inc_or_dec_expression
@@ -3352,26 +3355,26 @@ case_insideListE<caseitemp>:	// IEEE: [ { case_inside_item } ]
 	;
 
 case_itemList<caseitemp>:	// IEEE: { case_item + ... }
-		caseCondList colon stmtBlock		{ $$ = new AstCaseItem($2,$1,$3); }
-	|	yDEFAULT colon stmtBlock			{ $$ = new AstCaseItem($1,nullptr,$3); }
-	|	yDEFAULT stmtBlock			{ $$ = new AstCaseItem($1,nullptr,$2); }
-	|	case_itemList caseCondList colon stmtBlock	{ $$ = $1;$1->addNext(new AstCaseItem($3,$2,$4)); }
-	|       case_itemList yDEFAULT stmtBlock		{ $$ = $1;$1->addNext(new AstCaseItem($2,nullptr,$3)); }
-	|	case_itemList yDEFAULT colon stmtBlock		{ $$ = $1;$1->addNext(new AstCaseItem($2,nullptr,$4)); }
+		caseCondList colon stmtBlock			{ $$ = new AstCaseItem{$2, $1, $3}; }
+	|	yDEFAULT colon stmtBlock			{ $$ = new AstCaseItem{$1, nullptr, $3}; }
+	|	yDEFAULT stmtBlock				{ $$ = new AstCaseItem{$1, nullptr, $2}; }
+	|	case_itemList caseCondList colon stmtBlock	{ $$ = $1; $1->addNext(new AstCaseItem{$3, $2, $4}); }
+	|       case_itemList yDEFAULT stmtBlock		{ $$ = $1; $1->addNext(new AstCaseItem{$2, nullptr, $3}); }
+	|	case_itemList yDEFAULT colon stmtBlock		{ $$ = $1; $1->addNext(new AstCaseItem{$2, nullptr, $4}); }
 	;
 
 case_inside_itemList<caseitemp>:	// IEEE: { case_inside_item + open_range_list ... }
-		open_range_list colon stmtBlock		{ $$ = new AstCaseItem($2,$1,$3); }
-	|	yDEFAULT colon stmtBlock			{ $$ = new AstCaseItem($1,nullptr,$3); }
-	|	yDEFAULT stmtBlock			{ $$ = new AstCaseItem($1,nullptr,$2); }
-	|	case_inside_itemList open_range_list colon stmtBlock { $$ = $1;$1->addNext(new AstCaseItem($3,$2,$4)); }
-	|       case_inside_itemList yDEFAULT stmtBlock		{ $$ = $1;$1->addNext(new AstCaseItem($2,nullptr,$3)); }
-	|	case_inside_itemList yDEFAULT colon stmtBlock	{ $$ = $1;$1->addNext(new AstCaseItem($2,nullptr,$4)); }
+		open_range_list colon stmtBlock			{ $$ = new AstCaseItem{$2, $1, $3}; }
+	|	yDEFAULT colon stmtBlock			{ $$ = new AstCaseItem{$1, nullptr, $3}; }
+	|	yDEFAULT stmtBlock				{ $$ = new AstCaseItem{$1, nullptr, $2}; }
+	|	case_inside_itemList open_range_list colon stmtBlock { $$ = $1; $1->addNext(new AstCaseItem{$3, $2, $4}); }
+	|       case_inside_itemList yDEFAULT stmtBlock		{ $$ = $1; $1->addNext(new AstCaseItem{$2, nullptr, $3}); }
+	|	case_inside_itemList yDEFAULT colon stmtBlock	{ $$ = $1; $1->addNext(new AstCaseItem{$2, nullptr, $4}); }
 	;
 
 open_range_list<nodep>:		// ==IEEE: open_range_list + open_value_range
 		open_value_range			{ $$ = $1; }
-	|	open_range_list ',' open_value_range	{ $$ = $1;$1->addNext($3); }
+	|	open_range_list ',' open_value_range	{ $$ = $1; $1->addNext($3); }
 	;
 
 open_value_range<nodep>:	// ==IEEE: open_value_range
@@ -3390,7 +3393,7 @@ value_range<nodep>:		// ==IEEE: value_range
 
 caseCondList<nodep>:		// IEEE: part of case_item
 		expr 					{ $$ = $1; }
-	|	caseCondList ',' expr			{ $$ = $1;$1->addNext($3); }
+	|	caseCondList ',' expr			{ $$ = $1; $1->addNext($3); }
 	;
 
 patternNoExpr<nodep>:		// IEEE: pattern **Excluding Expr*
@@ -3411,8 +3414,9 @@ patternList<nodep>:		// IEEE: part of pattern
 	;
 
 patternOne<nodep>:		// IEEE: part of pattern
-		expr					{ if ($1) { $$ = new AstPatMember($1->fileline(),$1,nullptr,nullptr); } else { $$=nullptr; } }
-	|	expr '{' argsExprList '}'		{ $$ = new AstPatMember($2,$3,nullptr,$1); }
+		expr
+			{ if ($1) $$ = new AstPatMember{$1->fileline(), $1, nullptr, nullptr}; else $$ = nullptr; }
+	|	expr '{' argsExprList '}'		{ $$ = new AstPatMember{$2, $3, nullptr, $1}; }
 	|	patternNoExpr				{ $$ = $1; }
 	;
 
@@ -3434,7 +3438,7 @@ patternKey<nodep>:		// IEEE: merge structure_pattern_key, array_pattern_key, ass
 	//			// id/*member*/ is part of constExpr below
 	//UNSUP	constExpr				{ $$ = $1; }
 	//			// IEEE: assignment_pattern_key
-	//UNSUP	simple_type				{ $1->v3error("Unsupported: '{} with data type as key"); $$=$1; }
+	//UNSUP	simple_type				{ $1->v3error("Unsupported: '{} with data type as key"); $$ = $1; }
 	//			// simple_type reference looks like constExpr
 	//			// Verilator:
 	//			//   The above expressions cause problems because "foo" may be a constant identifier
@@ -3518,7 +3522,7 @@ for_step_assignment<nodep>:  // ==IEEE: for_step_assignment
 
 loop_variables<nodep>:		// IEEE: loop_variables
 		varRefBase				{ $$ = $1; }
-	|	loop_variables ',' varRefBase		{ $$ = $1;$1->addNext($3); }
+	|	loop_variables ',' varRefBase		{ $$ = $1; $1->addNext($3); }
 	;
 
 //************************************************
@@ -3704,7 +3708,7 @@ system_t_call<nodep>:		// IEEE: system_tf_call (as task)
 	|	yD_WRITEMEMH '(' expr ',' idClassSel ',' expr ',' expr ')'	{ $$ = new AstWriteMem($1, true,  $3, $5, $7, $9); }
 	//
 	|	yD_CAST '(' expr ',' expr ')'
-			{ FileLine* fl_nowarn = new FileLine($1);
+			{ FileLine* const fl_nowarn = new FileLine{$1};
 			  fl_nowarn->warnOff(V3ErrorCode::WIDTH, true);
 			  $$ = new AstAssertIntrinsic(fl_nowarn, new AstCastDynamic(fl_nowarn, $5, $3), nullptr, nullptr, true); }
 	//
@@ -3892,8 +3896,10 @@ task_declaration<ftaskp>:	// ==IEEE: task_declaration
 	;
 
 task_prototype<ftaskp>:		// ==IEEE: task_prototype
-		yTASK taskId '(' tf_port_listE ')'	{ $$=$2; $$->addStmtsp($4); $$->prototype(true); SYMP->popScope($$); }
-	|	yTASK taskId				{ $$=$2; $$->prototype(true); SYMP->popScope($$); }
+		yTASK taskId '(' tf_port_listE ')'
+			{ $$ = $2; $$->addStmtsp($4); $$->prototype(true); SYMP->popScope($$); }
+	|	yTASK taskId
+			{ $$ = $2; $$->prototype(true); SYMP->popScope($$); }
 	;
 
 function_declaration<ftaskp>:	// IEEE: function_declaration + function_body_declaration
@@ -3910,13 +3916,17 @@ function_declaration<ftaskp>:	// IEEE: function_declaration + function_body_decl
 	;
 
 function_prototype<ftaskp>:	// IEEE: function_prototype
-		yFUNCTION funcId '(' tf_port_listE ')'	{ $$=$2; $$->addStmtsp($4); $$->prototype(true); SYMP->popScope($$); }
-	|	yFUNCTION funcId			{ $$=$2; $$->prototype(true); SYMP->popScope($$); }
+		yFUNCTION funcId '(' tf_port_listE ')'
+			{ $$ = $2; $$->addStmtsp($4); $$->prototype(true); SYMP->popScope($$); }
+	|	yFUNCTION funcId
+			{ $$ = $2; $$->prototype(true); SYMP->popScope($$); }
 	;
 
 class_constructor_prototype<ftaskp>:	// ==IEEE: class_constructor_prototype
-		yFUNCTION funcIdNew '(' tf_port_listE ')' ';'	{ $$ = $2; $$->addStmtsp($4); $$->prototype(true); SYMP->popScope($$); }
-	|	yFUNCTION funcIdNew ';'				{ $$ = $2; $$->prototype(true); SYMP->popScope($$); }
+		yFUNCTION funcIdNew '(' tf_port_listE ')' ';'
+			{ $$ = $2; $$->addStmtsp($4); $$->prototype(true); SYMP->popScope($$); }
+	|	yFUNCTION funcIdNew ';'
+			{ $$ = $2; $$->prototype(true); SYMP->popScope($$); }
 	;
 
 funcIsolateE<cint>:
@@ -4137,7 +4147,7 @@ dpi_import_export<nodep>:	// ==IEEE: dpi_import_export
 
 dpi_importLabelE<strp>:		// IEEE: part of dpi_import_export
 		/* empty */				{ static string s; $$ = &s; }
-	|	idAny/*c_identifier*/ '='		{ $$ = $1; $<fl>$=$<fl>1; }
+	|	idAny/*c_identifier*/ '='		{ $$ = $1; $<fl>$ = $<fl>1; }
 	;
 
 dpi_tf_import_propertyE<iprop>:	// IEEE: [ dpi_function_import_property + dpi_task_import_property ]
@@ -4189,7 +4199,7 @@ expr<nodep>:			// IEEE: part of expression/constant_expression/primary
 	|	yP_XNOR ~r~expr	%prec prREDUCTION	{ $$ = new AstLogNot($1, new AstRedXor($1, $2)); }
 	//
 	//			// IEEE: inc_or_dec_expression
-	|	~l~inc_or_dec_expression		{ $<fl>$=$<fl>1; $$ = $1; }
+	|	~l~inc_or_dec_expression		{ $<fl>$ = $<fl>1; $$ = $1; }
 	//
 	//			// IEEE: '(' operator_assignment ')'
 	//			// Need exprScope of variable_lvalue to prevent conflict
@@ -4239,7 +4249,7 @@ expr<nodep>:			// IEEE: part of expression/constant_expression/primary
 	//			// Conflicts with constraint_expression:"expr yP_MINUSGT constraint_set"
 	//			// To duplicating expr for constraints, just allow the more general form
 	//			// Later Ast processing must ignore constraint terms where inappropriate
-	//UNSUP	~l~expr yP_MINUSGT constraint_set		{ $<fl>$=$<fl>1; $$ = $1+$2+$3; }
+	//UNSUP	~l~expr yP_MINUSGT constraint_set		{ $<fl>$ = $<fl>1; $$ = $1 + $2 + $3; }
 	//UNSUP remove line below
 	|	~l~expr yP_MINUSGT ~r~expr		{ $$ = new AstLogIf($2, $1, $3); }
 	//
@@ -4368,10 +4378,10 @@ fexpr<nodep>:			// For use as first part of statement (disambiguates <=)
 //UNSUP	//			// IEEE: '(' event_expression ')'
 //UNSUP	//			// expr:'(' x ')' conflicts with event_expression:'(' event_expression ')'
 //UNSUP	//			// so we use a special expression class
-//UNSUP	|	'(' event_expression ')'		{ $<fl>$=$<fl>1; $$ = "(...)"; }
+//UNSUP	|	'(' event_expression ')'		{ $<fl>$ = $<fl>1; $$ = "(...)"; }
 //UNSUP	//			// IEEE: From normal expr: '(' expr ':' expr ':' expr ')'
 //UNSUP	//			// But must avoid conflict
-//UNSUP	|	'(' event_expression ':' expr ':' expr ')'	{ $<fl>$=$<fl>1; $$ = "(...)"; }
+//UNSUP	|	'(' event_expression ':' expr ':' expr ')'	{ $<fl>$ = $<fl>1; $$ = "(...)"; }
 //UNSUP	;
 
 exprNoStr<nodep>:		// expression with string removed
@@ -4421,7 +4431,7 @@ fexprOkLvalue<nodep>:		// exprOkLValue, For use as first part of statement (disa
 //UNSUP	;
 
 fexprLvalue<nodep>:		// For use as first part of statement (disambiguates <=)
-		fexprOkLvalue				{ $<fl>$=$<fl>1; $$ = $1; }
+		fexprOkLvalue				{ $<fl>$ = $<fl>1; $$ = $1; }
 	;
 
 exprScope<nodep>:		// scope and variable for use to inside an expression
@@ -4472,7 +4482,7 @@ exprStrText<nodep>:
 
 cStrList<nodep>:
 		exprStrText				{ $$ = $1; }
-	|	exprStrText ',' cStrList		{ $$ = $1;$1->addNext($3); }
+	|	exprStrText ',' cStrList		{ $$ = $1; $1->addNext($3); }
 	;
 
 cateList<nodep>:
@@ -4488,7 +4498,7 @@ exprListE<nodep>:
 
 exprList<nodep>:
 		expr					{ $$ = $1; }
-	|	exprList ',' expr			{ $$ = $1;$1->addNext($3); }
+	|	exprList ',' expr			{ $$ = $1; $1->addNext($3); }
 	;
 
 exprDispList<nodep>:		// exprList for within $display
@@ -4501,7 +4511,7 @@ exprDispList<nodep>:		// exprList for within $display
 
 vrdList<nodep>:
 		idClassSel				{ $$ = $1; }
-	|	vrdList ',' idClassSel			{ $$ = $1;$1->addNext($3); }
+	|	vrdList ',' idClassSel			{ $$ = $1; $1->addNext($3); }
 	;
 
 commaVRDListE<nodep>:
@@ -4852,24 +4862,24 @@ junkToSemi:
 // IDs
 
 id<strp>:
-		yaID__ETC				{ $$ = $1; $<fl>$=$<fl>1; }
-	|	idRandomize				{ $$ = $1; $<fl>$=$<fl>1; }
+		yaID__ETC				{ $$ = $1; $<fl>$ = $<fl>1; }
+	|	idRandomize				{ $$ = $1; $<fl>$ = $<fl>1; }
 	;
 
 idAny<strp>:			// Any kind of identifier
-		yaID__ETC				{ $$ = $1; $<fl>$=$<fl>1; }
-	|	yaID__aTYPE				{ $$ = $1; $<fl>$=$<fl>1; }
-	|	idRandomize				{ $$ = $1; $<fl>$=$<fl>1; }
+		yaID__ETC				{ $$ = $1; $<fl>$ = $<fl>1; }
+	|	yaID__aTYPE				{ $$ = $1; $<fl>$ = $<fl>1; }
+	|	idRandomize				{ $$ = $1; $<fl>$ = $<fl>1; }
 	;
 
 idType<strp>:			// IEEE: class_identifier or other type identifier
 	//			// Used where reference is needed
-		yaID__aTYPE				{ $$ = $1; $<fl>$=$<fl>1; }
+		yaID__aTYPE				{ $$ = $1; $<fl>$ = $<fl>1; }
 	;
 
 idCC<strp>:			// IEEE: class/package then ::
 				// lexer matches this:  yaID_LEX [ '#' '(' ... ')' ] yP_COLONCOLON
-		yaID__CC				{ $$ = $1; $<fl>$=$<fl>1; }
+		yaID__CC				{ $$ = $1; $<fl>$ = $<fl>1; }
 	;
 
 idRandomize<strp>:		// Keyword as an identifier
@@ -4878,8 +4888,10 @@ idRandomize<strp>:		// Keyword as an identifier
 
 idSVKwd<strp>:			// Warn about non-forward compatible Verilog 2001 code
 	//			// yBIT, yBYTE won't work here as causes conflicts
-		yDO					{ static string s = "do"   ; $$ = &s; ERRSVKWD($1,*$$); $<fl>$=$<fl>1; }
-	|	yFINAL					{ static string s = "final"; $$ = &s; ERRSVKWD($1,*$$); $<fl>$=$<fl>1; }
+		yDO
+			{ static string s = "do"   ; $$ = &s; ERRSVKWD($1,*$$); $<fl>$ = $<fl>1; }
+	|	yFINAL
+			{ static string s = "final"; $$ = &s; ERRSVKWD($1,*$$); $<fl>$ = $<fl>1; }
 	;
 
 variable_lvalue<nodep>:		// IEEE: variable_lvalue or net_lvalue
@@ -4993,7 +5005,8 @@ str<strp>:			// yaSTRING but with \{escapes} need decoded
 	;
 
 strAsInt<nodep>:
-		yaSTRING				{ $$ = new AstConst($<fl>1, AstConst::VerilogStringLiteral(), GRAMMARP->deQuote($<fl>1, *$1));}
+		yaSTRING
+			{ $$ = new AstConst{$<fl>1, AstConst::VerilogStringLiteral(), GRAMMARP->deQuote($<fl>1, *$1)}; }
 	;
 
 strAsIntIgnore<nodep>:		// strAsInt, but never matches for when expr shouldn't parse strings
@@ -5001,13 +5014,13 @@ strAsIntIgnore<nodep>:		// strAsInt, but never matches for when expr shouldn't p
 	;
 
 strAsText<nodep>:
-		yaSTRING				{ $$ = GRAMMARP->createTextQuoted($<fl>1,*$1);}
+		yaSTRING				{ $$ = GRAMMARP->createTextQuoted($<fl>1, *$1); }
 	;
 
 endLabelE<strp>:
-		/* empty */				{ $$ = nullptr; $<fl>$=nullptr; }
-	|	':' idAny				{ $$ = $2; $<fl>$=$<fl>2; }
-	|	':' yNEW__ETC				{ static string n = "new"; $$ = &n; $<fl>$=$<fl>2; }
+		/* empty */				{ $$ = nullptr; $<fl>$ = nullptr; }
+	|	':' idAny				{ $$ = $2; $<fl>$ = $<fl>2; }
+	|	':' yNEW__ETC				{ static string n = "new"; $$ = &n; $<fl>$ = $<fl>2; }
 	;
 
 //************************************************
@@ -5075,7 +5088,7 @@ clocking_declaration<nodep>:		// IEEE: clocking_declaration  (INCOMPLETE)
 //UNSUP	;
 
 //UNSUPclocking_skewE:  // IEEE: [clocking_skew]
-//UNSUP		/* empty */				{ $$ = nullptr;}
+//UNSUP		/* empty */				{ $$ = nullptr; }
 //UNSUP	|	clocking_skew				{ $$ = $1; }
 //UNSUP	;
 
@@ -5480,21 +5493,21 @@ pexpr<nodep>:  // IEEE: property_expr  (The name pexpr is important as regexps j
 //UNSUP	//			// As sequence_expr includes expression_or_dist, and boolean_abbrev includes sequence_abbrev:
 //UNSUP	//			// '(' sequence_expr {',' sequence_match_item } ')' [ boolean_abbrev ]
 //UNSUP	//			// "'(' sexpr ')' boolean_abbrev" matches "[sexpr:'(' expr ')'] boolean_abbrev" so we can simply drop it
-//UNSUP	|	'(' ~p~sexpr ')'			{ $<fl>$=$<fl>1; $$=$1+$2+$3; }
+//UNSUP	|	'(' ~p~sexpr ')'			{ $<fl>$ = $<fl>1; $$ = ...; }
 //UNSUP	|	'(' ~p~sexpr ',' sequence_match_itemList ')'	{ }
 //UNSUP	//
 //UNSUP	//			// AND/OR are between pexprs OR sexprs
-//UNSUP	|	~p~sexpr yAND ~p~sexpr			{ $<fl>$=$<fl>1; $$=$1+$2+$3; }
-//UNSUP	|	~p~sexpr yOR ~p~sexpr			{ $<fl>$=$<fl>1; $$=$1+$2+$3; }
+//UNSUP	|	~p~sexpr yAND ~p~sexpr			{ $<fl>$ = $<fl>1; $$ = ...; }
+//UNSUP	|	~p~sexpr yOR ~p~sexpr			{ $<fl>$ = $<fl>1; $$ = ...; }
 //UNSUP	//			// Intersect always has an sexpr rhs
-//UNSUP	|	~p~sexpr yINTERSECT sexpr		{ $<fl>$=$<fl>1; $$=$1+$2+$3; }
+//UNSUP	|	~p~sexpr yINTERSECT sexpr		{ $<fl>$ = $<fl>1; $$ = ...; }
 //UNSUP	//
 //UNSUP	|	yFIRST_MATCH '(' sexpr ')'		{ }
 //UNSUP	|	yFIRST_MATCH '(' sexpr ',' sequence_match_itemList ')'	{ }
 //UNSUP	|	~p~sexpr/*sexpression_or_dist*/ yTHROUGHOUT sexpr		{ }
 //UNSUP	//			// Below pexpr's are really sequence_expr, but avoid conflict
 //UNSUP	//			// IEEE: sexpr yWITHIN sexpr
-//UNSUP	|	~p~sexpr yWITHIN sexpr			{ $<fl>$=$<fl>1; $$=$1+$2+$3; }
+//UNSUP	|	~p~sexpr yWITHIN sexpr			{ $<fl>$ = $<fl>1; $$ = ...; }
 //UNSUP	//			// Note concurrent_assertion had duplicate rule for below
 //UNSUP	|	clocking_event ~p~sexpr %prec prSEQ_CLOCKING	{ }
 //UNSUP	//
@@ -6307,8 +6320,8 @@ dist_list<nodep>:  // ==IEEE: dist_list
 
 dist_item<nodep>:  // ==IEEE: dist_item + dist_weight
 		value_range				{ $$ = $1; /* Same as := 1 */ }
-	|	value_range yP_COLONEQ  expr		{ $$ = $1; nullptr; /*UNSUP-no-UVM*/ }
-	|	value_range yP_COLONDIV expr		{ $$ = $1; nullptr; /*UNSUP-no-UVM*/ }
+	|	value_range yP_COLONEQ  expr		{ $$ = $1; /*UNSUP-no-UVM*/ }
+	|	value_range yP_COLONDIV expr		{ $$ = $1; /*UNSUP-no-UVM*/ }
 	;
 
 //UNSUPextern_constraint_declaration:  // ==IEEE: extern_constraint_declaration
@@ -6350,11 +6363,11 @@ vltItem:
 	|	vltOffFront yVLT_D_FILE yaSTRING yVLT_D_LINES yaINTNUM '-' yaINTNUM
 			{ V3Config::addIgnore($1, false, *$3, $5->toUInt(), $7->toUInt()+1); }
 	|	vltOffFront yVLT_D_FILE yaSTRING yVLT_D_MATCH yaSTRING
-			{	if (($1==V3ErrorCode::I_COVERAGE) || ($1==V3ErrorCode::I_TRACING)) {
-					$<fl>1->v3error("Argument -match only supported for lint_off");
-				} else {
-					V3Config::addWaiver($1,*$3,*$5);
-				}}
+			{ if (($1 == V3ErrorCode::I_COVERAGE) || ($1 == V3ErrorCode::I_TRACING)) {
+			      $<fl>1->v3error("Argument -match only supported for lint_off");
+			  } else {
+			      V3Config::addWaiver($1,*$3,*$5);
+			  }}
 	|	vltOnFront			{ V3Config::addIgnore($1, true, "*", 0, 0); }
 	|	vltOnFront yVLT_D_FILE yaSTRING
 			{ V3Config::addIgnore($1, true, *$3, 0, 0); }
@@ -6382,6 +6395,8 @@ vltItem:
 			{ V3Config::addCaseParallel(*$3, 0); }
 	|	yVLT_PARALLEL_CASE yVLT_D_FILE yaSTRING yVLT_D_LINES yaINTNUM
 			{ V3Config::addCaseParallel(*$3, $5->toUInt()); }
+	|	yVLT_PROFILE_DATA yVLT_D_MODEL yaSTRING yVLT_D_MTASK yaSTRING yVLT_D_COST yaINTNUM
+			{ V3Config::addProfileData($<fl>1, *$3, *$5, $7->toUQuad()); }
 	;
 
 vltOffFront<errcodeen>:
@@ -6416,7 +6431,7 @@ vltDModuleE<strp>:
 	;
 
 vltDFTaskE<strp>:
-		/* empty */				{ static string empty = ""; $$ = &empty; }
+		/* empty */				{ static string empty; $$ = &empty; }
 	|	yVLT_D_FUNCTION str			{ $$ = $2; }
 	|	yVLT_D_TASK str				{ $$ = $2; }
 	;
@@ -6427,7 +6442,7 @@ vltInlineFront<cbool>:
 	;
 
 vltVarAttrVarE<strp>:
-		/* empty */				{ static string empty = ""; $$ = &empty; }
+		/* empty */				{ static string empty; $$ = &empty; }
 	|	yVLT_D_VAR str				{ $$ = $2; }
 	;
 
diff --git a/test_regress/Makefile_obj b/test_regress/Makefile_obj
index 6795aeed4..945d0cdcd 100644
--- a/test_regress/Makefile_obj
+++ b/test_regress/Makefile_obj
@@ -41,8 +41,6 @@ CPPFLAGS += $(CPPFLAGS_ADD)
 ifeq ($(CFG_WITH_LONGTESTS),yes)
   ifeq ($(DRIVER_STD),newest)
     CPPFLAGS += $(CFG_CXXFLAGS_STD_NEWEST)
-  else ifeq ($(DRIVER_STD),oldest)
-    CPPFLAGS += $(CFG_CXXFLAGS_STD_OLDEST)
   endif
 endif
 
diff --git a/test_regress/driver.pl b/test_regress/driver.pl
index 240049350..0200feddf 100755
--- a/test_regress/driver.pl
+++ b/test_regress/driver.pl
@@ -5,9 +5,10 @@
 require 5.006_001;
 use warnings;
 use Cwd;
+
 BEGIN {
     if (!$ENV{VERILATOR_ROOT} && -x "../bin/verilator") {
-        $ENV{VERILATOR_ROOT} = Cwd::getcwd()."/..";
+        $ENV{VERILATOR_ROOT} = Cwd::getcwd() . "/..";
     }
     $ENV{MAKE} ||= "make";
     $ENV{CXX} ||= "c++";
@@ -16,7 +17,7 @@ BEGIN {
 use Getopt::Long;
 use IO::File;
 use Pod::Usage;
-use Data::Dumper; $Data::Dumper::Sortkeys=1;
+use Data::Dumper; $Data::Dumper::Sortkeys = 1;
 use FindBin qw($RealBin);
 use strict;
 use vars qw($Debug %Vars $Driver $Fork);
@@ -30,7 +31,7 @@ $::Driver = 1;
 $::Have_Forker = 0;
 
 eval "use Parallel::Forker; \$Fork=Parallel::Forker->new(use_sig_child=>1, poll_interval=>10*1000); \$::Have_Forker=1;";
-$Fork = Forker->new(use_sig_child=>1) if !$Fork;
+$Fork = Forker->new(use_sig_child => 1) if !$Fork;
 my $forker_Min_Version = 1.258;
 if ($::Have_Forker && $Parallel::Forker::VERSION < $forker_Min_Version) {
     print STDERR "driver.pl: Parallel::Forker is older than $forker_Min_Version, suggest 'cpan install Parallel::Forker'\n";
@@ -62,7 +63,7 @@ autoflush STDOUT 1;
 autoflush STDERR 1;
 
 our @Orig_ARGV = @ARGV;
-our @Orig_ARGV_Sw;  foreach (@Orig_ARGV) { push @Orig_ARGV_Sw, $_ if /^-/ && !/^-j/; }
+our @Orig_ARGV_Sw; foreach (@Orig_ARGV) { push @Orig_ARGV_Sw, $_ if /^-/ && !/^-j/; }
 our $Start = time();
 our $Vltmt_threads = 3;
 
@@ -133,7 +134,7 @@ if (! GetOptions(
     die "%Error: Bad usage, try '$0 --help'\n";
 }
 
-$opt_jobs = calc_jobs() if defined $opt_jobs && $opt_jobs==0;
+$opt_jobs = calc_jobs() if defined $opt_jobs && $opt_jobs == 0;
 $Fork->max_proc($opt_jobs);
 
 if ((scalar keys %opt_scenarios) < 1) {
@@ -142,10 +143,10 @@ if ((scalar keys %opt_scenarios) < 1) {
 }
 
 our @Test_Dirs = "t";
-push @Test_Dirs, split(/:/,$ENV{VERILATOR_TESTS_SITE})
-    if (($#opt_tests<0 ? $opt_site : 1) && $ENV{VERILATOR_TESTS_SITE});
+push @Test_Dirs, split(/:/, $ENV{VERILATOR_TESTS_SITE})
+    if (($#opt_tests < 0 ? $opt_site : 1) && $ENV{VERILATOR_TESTS_SITE});
 
-if ($#opt_tests<0) {  # Run everything
+if ($#opt_tests < 0) {  # Run everything
     my %uniq;
     foreach my $dir (@Test_Dirs) {
         my @stats = stat($dir);  # Uniquify by inode, so different paths to same place get combined
@@ -155,7 +156,7 @@ if ($#opt_tests<0) {  # Run everything
 }
 @opt_tests = _calc_hashset(@opt_tests) if $opt_hashset;
 
-if ($#opt_tests>=2 && $opt_jobs>=2) {
+if ($#opt_tests >= 2 && $opt_jobs >= 2) {
     # Without this tests such as t_debug_sigsegv_bt_bad.pl will occasionally
     # block on input and cause a SIGSTOP, then a "fg" was needed to resume testing.
     if (!$::Have_Forker) {
@@ -165,7 +166,6 @@ if ($#opt_tests>=2 && $opt_jobs>=2) {
     open(STDIN, "+>/dev/null");
 }
 
-
 mkdir "obj_dist";
 my $timestart = strftime("%Y%m%d_%H%M%S", localtime);
 
@@ -185,8 +185,8 @@ my $runner;
 }
 
 if ($opt_rerun && $runner->fail_count) {
-    print("="x70,"\n");
-    print("="x70,"\n");
+    print("=" x 70, "\n");
+    print("=" x 70, "\n");
     print("RERUN  ==\n\n");
 
     # Avoid parallel run to ensure that isn't causing problems
@@ -215,7 +215,7 @@ exit(10) if $runner->fail_count;
 #----------------------------------------------------------------------
 
 sub usage {
-    pod2usage(-verbose=>2, -exitval=>0, -output=>\*STDOUT);
+    pod2usage(-verbose => 2, -exitval => 0, -output => \*STDOUT);
     exit(1);  # Unreachable
 }
 
@@ -232,7 +232,7 @@ sub parameter {
         ($param =~ /^(\d+)$/)
             or die "%Error: Expected number following $_Parameter_Next_Level: $param\n";
         push @Opt_Driver_Verilator_Flags, $param;
-        $_Parameter_Next_Level  = undef;
+        $_Parameter_Next_Level = undef;
     }
     elsif ($param =~ /\.pl/) {
         push @opt_tests, $param;
@@ -250,6 +250,7 @@ sub parameter {
 }
 
 our $_Max_Procs;
+
 sub max_procs {
     if (!defined $_Max_Procs) {
         $_Max_Procs = `python3 -c 'import multiprocessing\nprint(multiprocessing.cpu_count())'`;
@@ -272,7 +273,7 @@ sub calc_threads {
 sub calc_jobs {
     my $ok = max_procs();
     $ok && !$@ or die "%Error: Can't use -j: $@\n";
-    print "driver.pl: Found $ok cores, using -j ",$ok+1,"\n";
+    print "driver.pl: Found $ok cores, using -j ", $ok + 1, "\n";
     return $ok + 1;
 }
 
@@ -351,10 +352,10 @@ sub one_test {
                  open(STDOUT, ">/dev/null");
                  open(STDERR, ">&STDOUT");
              }
-             print("="x70,"\n");
+             print("=" x 70, "\n");
              my $test = VTest->new(@params,
                                    running_id => $process->{running_id});
-             $test->oprint("="x50,"\n");
+             $test->oprint("=" x 50, "\n");
              unlink $test->{status_filename};
              $test->_prep;
              $test->_read;
@@ -373,30 +374,30 @@ sub one_test {
              } elsif ($test->scenario_off && !$test->errors) {
              } elsif ($test->skips && !$test->errors) {
                  push @{$self->{skip_msgs}},
-                     ("\t#".$test->soprint("-Skip:  $test->{skips}\n"));
+                     ("\t#" . $test->soprint("-Skip:  $test->{skips}\n"));
                  $self->{skip_cnt}++;
              } elsif ($test->unsupporteds && !$test->errors) {
                  $self->{unsup_cnt}++;
              } else {
                  $test->oprint("FAILED: $test->{errors}\n");
-                 my $j = ($opt_jobs>1?" -j":"");
+                 my $j = ($opt_jobs > 1 ? " -j" : "");
                  my $makecmd = $ENV{VERILATOR_MAKE} || "$ENV{MAKE}$j &&";
                  my $upperdir = (Cwd::getcwd() =~ /test_regress/
                                  ? 'test_regress/' : '');
                  push @{$self->{fail_msgs}},
-                     ("\t#".$test->soprint("%Error: $test->{errors}\n")
-                      ."\t\t$makecmd "
-                      .$upperdir.$test->{pl_filename}
-                      ." ".join(' ', _manual_args())
-                      ." --".$test->{scenario}."\n");
+                     ("\t#" . $test->soprint("%Error: $test->{errors}\n")
+                      . "\t\t$makecmd "
+                      . $upperdir . $test->{pl_filename}
+                      . " " . join(' ', _manual_args())
+                      . " --" . $test->{scenario} . "\n");
                  push @{$self->{fail_tests}}, $test;
                  $self->{fail_cnt}++;
                  $self->report($self->{driver_log_filename});
                  my $other = "";
                  foreach my $proc ($::Fork->running) {
-                     $other .= "  ".$proc->{test_pl_filename};
+                     $other .= "  " . $proc->{test_pl_filename};
                  }
-                 $test->oprint("Simultaneous running tests:",$other,"\n")
+                 $test->oprint("Simultaneous running tests:", $other, "\n")
                      if $other && !$opt_quiet;
                  if ($opt_stop) { die "%Error: --stop and errors found\n"; }
              }
@@ -409,15 +410,15 @@ sub one_test {
 
 sub wait_and_report {
     my $self = shift;
-    $self->print_summary(force=>1);
+    $self->print_summary(force => 1);
     # Wait for all children to finish
     while ($::Fork->is_any_left) {
         $::Fork->poll;
         if ((time() - ($self->{_last_summary_time} || 0) >= 30)
             && (!$opt_gdb && !$opt_gdbsim)) {  # Don't show for interactive gdb etc
-            $self->print_summary(force=>1, show_running=>1);
+            $self->print_summary(force => 1, show_running => 1);
         }
-        Time::HiRes::usleep 100*1000;
+        Time::HiRes::usleep 100 * 1000;
     }
     $runner->report(undef);
     $runner->report($self->{driver_log_filename});
@@ -433,7 +434,7 @@ sub report {
     }
 
     $fh->print("\n");
-    $fh->print("="x70,"\n");
+    $fh->print("=" x 70, "\n");
     foreach my $f (sort @{$self->{fail_msgs}}) {
         chomp $f;
         $fh->print("$f\n");
@@ -445,7 +446,7 @@ sub report {
     my $sum = ($self->{fail_cnt} && "FAILED"
                || $self->{skip_cnt} && "PASSED w/SKIPS"
                || "PASSED");
-    $fh->print("TESTS DONE, $sum: ".$self->sprint_summary."\n");
+    $fh->print("TESTS DONE, $sum: " . $self->sprint_summary . "\n");
 }
 
 sub print_summary {
@@ -457,13 +458,13 @@ sub print_summary {
         || ($self->{left_cnt} < 5)
         || (time() - ($self->{_last_summary_time} || 0) >= 15)) {  # Don't show for interactive gdb etc
         $self->{_last_summary_time} = time();
-        print STDERR ("==SUMMARY: ".$self->sprint_summary."\n");
+        print STDERR ("==SUMMARY: " . $self->sprint_summary . "\n");
         if ($params{show_running}) {
             my $other;
             foreach my $proc ($::Fork->running) {
-                $other .= "  ".$proc->{test_pl_filename};
+                $other .= "  " . $proc->{test_pl_filename};
             }
-            print STDERR ("==STILL RUNNING: ".$other."\n");
+            print STDERR ("==STILL RUNNING: " . $other . "\n");
         }
     }
 }
@@ -473,7 +474,7 @@ sub sprint_summary {
 
     my $delta = time() - $::Start;
     my $leftmsg = $::Have_Forker ? $self->{left_cnt} : "NO-FORKER";
-    my $pct = int(100*($self->{left_cnt} / ($self->{all_cnt} + 0.001)) + 0.999);
+    my $pct = int(100 * ($self->{left_cnt} / ($self->{all_cnt} + 0.001)) + 0.999);
     # Fudge of 120% works out about right so ETA correctly predicts completion time
     my $eta = 1.2 * (($self->{all_cnt}
                       * ($delta / (($self->{all_cnt} - $self->{left_cnt})+0.001)))
@@ -487,8 +488,8 @@ sub sprint_summary {
     $out .= "  Failed-First $self->{fail1_cnt}" if $self->{fail1_cnt};
     $out .= "  Skipped $self->{skip_cnt}" if $self->{skip_cnt};
     $out .= "  Unsup $self->{unsup_cnt}";
-    $out .= sprintf("  Eta %d:%02d", int($eta/60), $eta%60) if $self->{left_cnt} > 10 && $eta > 10;
-    $out .= sprintf("  Time %d:%02d", int($delta/60), $delta%60);
+    $out .= sprintf("  Eta %d:%02d", int($eta / 60), $eta % 60) if $self->{left_cnt} > 10 && $eta > 10;
+    $out .= sprintf("  Time %d:%02d", int($delta / 60), $delta % 60);
     return $out;
 }
 
@@ -566,7 +567,7 @@ sub new {
         my $scen_dir = File::Spec->abs2rel("$self->{t_dir}/../obj_$self->{scenario}");
         $scen_dir =~ s!^t/\.\./!!;  # Simplify filenames on local runs
         mkdir $scen_dir;  # Not a mkpath so find out if trying to build somewhere odd
-        $self->{obj_dir} ="$scen_dir/$self->{name}";
+        $self->{obj_dir} = "$scen_dir/$self->{name}";
     }
 
     my $define_opt = defineOpt($self->{xsim});
@@ -588,26 +589,26 @@ sub new {
                             (-r 'input.vc' ? " -f input.vc " : ""))
                            .($self->{t_dir} !~ m!/test_regress!  # Don't include standard dir, only site's
                              ? " +incdir+$self->{t_dir} -y $self->{t_dir}" : "")
-                           . " ".$define_opt."TEST_OBJ_DIR=$self->{obj_dir}"
-                           .($opt_verbose ? " ".$define_opt."TEST_VERBOSE=1":"")
-                           .($opt_benchmark ? " ".$define_opt."TEST_BENCHMARK=$opt_benchmark":"")
-                           .($opt_trace ? " ".$define_opt."WAVES=1":"")
+                           . " " . $define_opt . "TEST_OBJ_DIR=$self->{obj_dir}"
+                           .($opt_verbose ? " " . $define_opt . "TEST_VERBOSE=1" : "")
+                           .($opt_benchmark ? " " . $define_opt . "TEST_BENCHMARK=$opt_benchmark" : "")
+                           .($opt_trace ? " " . $define_opt . "WAVES=1" : "")
                           ))],
         v_flags2 => [],  # Overridden in some sim files
         v_other_filenames => [],  # After the filename so we can spec multiple files
         all_run_flags => [],
         pli_flags => ["-I$ENV{VERILATOR_ROOT}/include/vltstd -fPIC -shared"
-                      .(($^O eq "darwin" )
-                        ? " -Wl,-undefined,dynamic_lookup"
-                        : " -export-dynamic")
-                      .($opt_verbose ? " -DTEST_VERBOSE=1":"")
-                      .(cfg_with_m32() ? " -m32" : "")
-                      ." -o $self->{obj_dir}/libvpi.so"],
+                      . (($^O eq "darwin" )
+                         ? " -Wl,-undefined,dynamic_lookup"
+                         : " -export-dynamic")
+                      . ($opt_verbose ? " -DTEST_VERBOSE=1" : "")
+                      . (cfg_with_m32() ? " -m32" : "")
+                      . " -o $self->{obj_dir}/libvpi.so"],
         tool_c_flags => [],
         # ATSIM
         atsim => 0,
         atsim_define => 'ATSIM',
-        atsim_flags => [split(/\s+/,"-c +sv +define+ATSIM"),
+        atsim_flags => [split(/\s+/, "-c +sv +define+ATSIM"),
                         "+sv_dir+$self->{obj_dir}/.athdl_compile"],
         atsim_flags2 => [],  # Overridden in some sim files
         atsim_run_flags => [],
@@ -615,44 +616,44 @@ sub new {
         ghdl => 0,
         ghdl_define => 'GHDL',
         ghdl_work_dir => "$self->{obj_dir}/ghdl_compile",
-        ghdl_flags => [($::Debug?"-v":""),
+        ghdl_flags => [($::Debug ? "-v" : ""),
                        "--workdir=$self->{obj_dir}/ghdl_compile", ],
         ghdl_flags2 => [],  # Overridden in some sim files
         ghdl_run_flags => [],
         # IV
         iv => 0,
         iv_define => 'IVERILOG',
-        iv_flags => [split(/\s+/,"+define+IVERILOG -g2012 -o $self->{obj_dir}/simiv")],
+        iv_flags => [split(/\s+/, "+define+IVERILOG -g2012 -o $self->{obj_dir}/simiv")],
         iv_flags2 => [],  # Overridden in some sim files
         iv_pli => 0,  # need to use pli
         iv_run_flags => [],
         # VCS
         vcs => 0,
         vcs_define => 'VCS',
-        vcs_flags => [split(/\s+/,"+vcs+lic+wait +cli -debug_access +define+VCS+1 -q -sverilog -CFLAGS '-DVCS' ")],
+        vcs_flags => [split(/\s+/, "+vcs+lic+wait +cli -debug_access +define+VCS+1 -q -sverilog -CFLAGS '-DVCS' ")],
         vcs_flags2 => [],  # Overridden in some sim files
-        vcs_run_flags => [split(/\s+/,"+vcs+lic_wait")],
+        vcs_run_flags => [split(/\s+/, "+vcs+lic_wait")],
         # NC
         nc => 0,
         nc_define => 'NC',
-        nc_flags => [split(/\s+/,("+licqueue +nowarn+LIBNOU +define+NC=1 -q +assert +sv -c "
-                                  .($opt_trace ? " +access+r":"")))],
+        nc_flags => [split(/\s+/, ("+licqueue +nowarn+LIBNOU +define+NC=1 -q +assert +sv -c "
+                                   . ($opt_trace ? " +access+r" : "")))],
         nc_flags2 => [],  # Overridden in some sim files
-        nc_run_flags => [split(/\s+/,"+licqueue -q +assert +sv -R")],
+        nc_run_flags => [split(/\s+/, "+licqueue -q +assert +sv -R")],
         # ModelSim
         ms => 0,
         ms_define => 'MS',
         ms_flags => [split(/\s+/, ("-sv -work $self->{obj_dir}/work +define+MS=1 -ccflags \"-DMS=1\""))],
         ms_flags2 => [],  # Overridden in some sim files
         ms_pli => 1,  # need to use pli
-        ms_run_flags => [split(/\s+/,"-lib $self->{obj_dir}/work -c -do 'run -all;quit' ")],
+        ms_run_flags => [split(/\s+/, "-lib $self->{obj_dir}/work -c -do 'run -all;quit' ")],
         # XSim
         xsim => 0,
         xsim_define => 'XSIM',
-        xsim_flags => [split(/\s+/,("--nolog --sv --define XSIM --work $self->{name}=$self->{obj_dir}/xsim"))],
+        xsim_flags => [split(/\s+/, ("--nolog --sv --define XSIM --work $self->{name}=$self->{obj_dir}/xsim"))],
         xsim_flags2 => [],  # Overridden in some sim files
-        xsim_run_flags => [split(/\s+/,("--nolog --runall --lib $self->{name}=$self->{obj_dir}/xsim"
-                                        .($opt_trace ? " --debug all":"")))],
+        xsim_run_flags => [split(/\s+/, ("--nolog --runall --lib $self->{name}=$self->{obj_dir}/xsim"
+                                         .($opt_trace ? " --debug all" : "")))],
         xsim_run_flags2 => [],  # Overridden in some sim files
         # Verilator
         vlt => 0,
@@ -674,15 +675,15 @@ sub new {
 
     $self->{vlt_all} = $self->{vlt} || $self->{vltmt};  # Any Verilator scenario
 
-    $self->{VM_PREFIX} ||= "V".$self->{name};
-    $self->{stats} ||= "$self->{obj_dir}/V".$self->{name}."__stats.txt";
-    $self->{status_filename} ||= "$self->{obj_dir}/V".$self->{name}.".status";
+    $self->{VM_PREFIX} ||= "V" . $self->{name};
+    $self->{stats} ||= "$self->{obj_dir}/V" . $self->{name} . "__stats.txt";
+    $self->{status_filename} ||= "$self->{obj_dir}/V" . $self->{name} . ".status";
     $self->{run_log_filename} ||= "$self->{obj_dir}/vlt_sim.log";
     $self->{coverage_filename} ||= "$self->{obj_dir}/coverage.dat";
     $self->{main_filename} ||= "$self->{obj_dir}/$self->{VM_PREFIX}__main.cpp";
     ($self->{top_filename} ||= $self->{pl_filename}) =~ s/\.pl$//;
     ($self->{golden_filename} ||= $self->{pl_filename}) =~ s/\.pl$/.out/;
-    if (-e ($self->{top_filename}.".vhd")) {  # If VHDL file exists
+    if (-e ($self->{top_filename} . ".vhd")) {  # If VHDL file exists
         $self->{vhdl} = 1;
         $self->{top_filename} .= ".vhd";
     } else {
@@ -693,13 +694,13 @@ sub new {
     } else {
         $self->{top_shell_filename} = "$self->{obj_dir}/$self->{VM_PREFIX}__top.v";
     }
-    $self->{pli_filename} ||= $self->{name}.".cpp";
+    $self->{pli_filename} ||= $self->{name} . ".cpp";
     return $self;
 }
 
 sub benchmarksim_filename {
     my $self = (ref $_[0] ? shift : $Self);
-    return $self->{obj_dir}."/$self->{name}_benchmarksim.csv";
+    return $self->{obj_dir} . "/$self->{name}_benchmarksim.csv";
 }
 
 sub init_benchmarksim {
@@ -707,16 +708,16 @@ sub init_benchmarksim {
     # Simulations with benchmarksim enabled append to the same file between runs.
     # Test files must ensure a clean benchmark data file before executing tests.
     my $filename = $self->benchmarksim_filename();
-    my $fh = IO::File->new(">".$filename) or die "%Error: $! ".$filename;
+    my $fh = IO::File->new(">" . $filename) or die "%Error: $! " . $filename;
     print $fh "# Verilator simulation benchmark data\n";
-    print $fh "# Test name: ".$self->{name}."\n";
-    print $fh "# Top file: ".$self->{top_filename}."\n";
+    print $fh "# Test name: " . $self->{name} . "\n";
+    print $fh "# Top file: " . $self->{top_filename} . "\n";
     print $fh "evals, time[s]\n";
 }
 
 sub soprint {
     my $self = (ref $_[0] ? shift : $Self);
-    my $str = "$self->{scenario}/$self->{name}: ".join('',@_);
+    my $str = "$self->{scenario}/$self->{name}: " . join('', @_);
     $str =~ s/\n\n+$/\n/s;
     return $str;
 }
@@ -728,35 +729,35 @@ sub oprint {
 
 sub error {
     my $self = (ref $_[0] ? shift : $Self);
-    my $msg = join('',@_);
+    my $msg = join('', @_);
     # Called from tests as: error("Reason message"[, ...]);
-    warn "%Warning: $self->{scenario}/$self->{name}: ".$msg."\n";
+    warn "%Warning: $self->{scenario}/$self->{name}: " . $msg . "\n";
     $self->{errors} ||= $msg;
 }
 
 sub error_keep_going {
     my $self = (ref $_[0] ? shift : $Self);
-    my $msg = join('',@_);
+    my $msg = join('', @_);
     # Called from tests as: error_keep_going("Reason message"[, ...]);
-    warn "%Warning: $self->{scenario}/$self->{name}: ".$msg."\n";
+    warn "%Warning: $self->{scenario}/$self->{name}: " . $msg . "\n";
     $self->{errors_keep_going} ||= $msg;
 }
 
 sub skip {
     my $self = (ref $_[0] ? shift : $Self);
-    my $msg = join('',@_);
+    my $msg = join('', @_);
     # Called from tests as: skip("Reason message"[, ...]);
-    warn "-Skip: $self->{scenario}/$self->{name}: ".$msg."\n";
-    $self->{skips} ||= "Skip: ".$msg;
+    warn "-Skip: $self->{scenario}/$self->{name}: " . $msg . "\n";
+    $self->{skips} ||= "Skip: " . $msg;
 }
 
 sub unsupported {
     my $self = (ref $_[0] ? shift : $Self);
-    my $msg = join('',@_);
+    my $msg = join('', @_);
     # Called from tests as: unsupported("Reason message"[, ...]);
-    warn "-Unsupported: $self->{scenario}/$self->{name}: ".$msg."\n";
+    warn "-Unsupported: $self->{scenario}/$self->{name}: " . $msg . "\n";
     if (!$::Opt_Unsupported) {
-        $self->{unsupporteds} ||= "Unsupported: ".$msg;
+        $self->{unsupporteds} ||= "Unsupported: " . $msg;
     }
 }
 
@@ -879,7 +880,7 @@ sub clean_objs {
 }
 
 sub compile_vlt_cmd {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my %param = (%{$self}, @_);  # Default arguments are from $self
     return 1 if $self->errors || $self->skips || $self->unsupporteds;
 
@@ -888,17 +889,17 @@ sub compile_vlt_cmd {
         $self->compile_vlt_flags(%param),
         $param{top_filename},
         @{$param{v_other_filenames}},
-        $param{stdout_filename}?"> ".$param{stdout_filename}:""
+        $param{stdout_filename} ? "> " . $param{stdout_filename} : ""
     );
     return @vlt_cmd;
 }
 
 sub compile_vlt_flags {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my %param = (%{$self}, @_);  # Default arguments are from $self
     return 1 if $self->errors || $self->skips || $self->unsupporteds;
 
-    my $checkflags = join(' ',@{$param{v_flags}},
+    my $checkflags = join(' ', @{$param{v_flags}},
                           @{$param{v_flags2}},
                           @{$param{verilator_flags}},
                           @{$param{verilator_flags2}},
@@ -932,24 +933,24 @@ sub compile_vlt_flags {
     unshift @verilator_flags, "--make cmake" if $param{verilator_make_cmake};
     unshift @verilator_flags, "--exe" if
         $param{make_main} && $param{verilator_make_gmake};
-    unshift @verilator_flags, "../".$self->{main_filename} if
+    unshift @verilator_flags, "../" . $self->{main_filename} if
         $param{make_main} && $param{verilator_make_gmake};
     if (defined $opt_optimize) {
         my $letters = "";
         if ($opt_optimize =~ /[a-zA-Z]/) {
             $letters = $opt_optimize;
         } else {  # Randomly turn on/off different optimizations
-            foreach my $l ('a'..'z') {
+            foreach my $l ('a' .. 'z') {
                 $letters .= ((rand() > 0.5) ? $l : uc $l);
             }
             unshift @verilator_flags, "--trace" if rand() > 0.5;
             unshift @verilator_flags, "--coverage" if rand() > 0.5;
         }
-        unshift @verilator_flags, "--O".$letters;
+        unshift @verilator_flags, "--O" . $letters;
     }
 
     my @cmdargs = (
-                   "--prefix ".$param{VM_PREFIX},
+                   "--prefix " . $param{VM_PREFIX},
                    @verilator_flags,
                    @{$param{verilator_flags2}},
                    @{$param{verilator_flags3}},
@@ -963,8 +964,8 @@ sub compile_vlt_flags {
 }
 
 sub driver_verilator_flags {
-    #my $self = (ref $_[0] ? shift : $Self);
-    return @Opt_Driver_Verilator_Flags
+    # my $self = (ref $_[0] ? shift : $Self);
+    return @Opt_Driver_Verilator_Flags;
 }
 
 sub lint {
@@ -981,7 +982,7 @@ sub lint {
 }
 
 sub compile {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my %param = (tee => 1,
                  %{$self}, @_);  # Default arguments are from $self
     return 1 if $self->errors || $self->skips || $self->unsupporteds;
@@ -994,7 +995,7 @@ sub compile {
         = $self->{top_shell_filename} = "";
     } else {
         $param{top_shell_filename}
-        = $self->{top_shell_filename} = "$self->{obj_dir}/$self->{VM_PREFIX}__top.".$self->v_suffix;
+        = $self->{top_shell_filename} = "$self->{obj_dir}/$self->{VM_PREFIX}__top." . $self->v_suffix;
     }
 
     if ($param{atsim}) {
@@ -1002,7 +1003,7 @@ sub compile {
         $self->_make_top() if $param{make_top_shell};
         $self->_run(logfile=>"$self->{obj_dir}/atsim_compile.log",
                     fails=>$param{fails},
-                    cmd=>[($ENV{VERILATOR_ATSIM}||"atsim"),
+                    cmd=>[($ENV{VERILATOR_ATSIM} || "atsim"),
                           @{$param{atsim_flags}},
                           @{$param{atsim_flags2}},
                           @{$param{v_flags}},
@@ -1018,9 +1019,9 @@ sub compile {
         $self->_make_top() if $param{make_top_shell};
         $self->_run(logfile=>"$self->{obj_dir}/ghdl_compile.log",
                     fails=>$param{fails},
-                    cmd=>[($ENV{VERILATOR_GHDL}||"ghdl"),
+                    cmd=>[($ENV{VERILATOR_GHDL} || "ghdl"),
                           # Add -c here, as having -c twice freaks it out
-                          ((($ENV{VERILATOR_GHDL}||' ') =~ / -c\b/) ? "" : "-c"),
+                          ((($ENV{VERILATOR_GHDL} || ' ') =~ / -c\b/) ? "" : "-c"),
                           @{$param{ghdl_flags}},
                           @{$param{ghdl_flags2}},
                           #@{$param{v_flags}},  # Not supported
@@ -1036,10 +1037,10 @@ sub compile {
         $self->_make_top() if $param{make_top_shell};
         $self->_run(logfile=>"$self->{obj_dir}/vcs_compile.log",
                     fails=>$param{fails},
-                    cmd=>[($ENV{VERILATOR_VCS}||"vcs"),
+                    cmd=>[($ENV{VERILATOR_VCS} || "vcs"),
                           @{$param{vcs_flags}},
                           @{$param{vcs_flags2}},
-                          ($opt_verbose ? " -CFLAGS -DTEST_VERBOSE=1":""),
+                          ($opt_verbose ? " -CFLAGS -DTEST_VERBOSE=1" : ""),
                           @{$param{v_flags}},
                           @{$param{v_flags2}},
                           $param{top_filename},
@@ -1053,12 +1054,12 @@ sub compile {
         my @more_args;
         if ($self->vhdl) {
             ((my $ts = $param{top_shell_filename}) =~ s!\.v!!);
-            $ts =~ s!.*/!!;;
+            $ts =~ s!.*/!!;
             push @more_args, "-vhdltop", $ts;
         }
         $self->_run(logfile=>"$self->{obj_dir}/nc_compile.log",
                     fails=>$param{fails},
-                    cmd=>[($ENV{VERILATOR_NCVERILOG}||"ncverilog"),
+                    cmd=>[($ENV{VERILATOR_NCVERILOG} || "ncverilog"),
                           @{$param{nc_flags}},
                           @{$param{nc_flags2}},
                           @{$param{v_flags}},
@@ -1075,7 +1076,7 @@ sub compile {
         $self->_run(logfile=>"$self->{obj_dir}/ms_compile.log",
                     fails=>$param{fails},
                     cmd=>[("vlib $self->{obj_dir}/work && "),
-                          ($ENV{VERILATOR_MODELSIM}||"vlog"),
+                          ($ENV{VERILATOR_MODELSIM} || "vlog"),
                           @{$param{ms_flags}},
                           @{$param{ms_flags2}},
                           @{$param{v_flags}},
@@ -1088,7 +1089,7 @@ sub compile {
     elsif ($param{iv}) {
         $param{tool_define} ||= $param{iv_define};
         $self->_make_top() if $param{make_top_shell};
-        my @cmd = (($ENV{VERILATOR_IVERILOG}||"iverilog"),
+        my @cmd = (($ENV{VERILATOR_IVERILOG} || "iverilog"),
                    @{$param{iv_flags}},
                    @{$param{iv_flags2}},
                    @{$param{v_flags}},
@@ -1107,7 +1108,7 @@ sub compile {
         $self->_make_top() if $param{make_top_shell};
         $self->_run(logfile=>"$self->{obj_dir}/xsim_compile.log",
                     fails=>$param{fails},
-                    cmd=>[($ENV{VERILATOR_XVLOG}||"xvlog"),
+                    cmd=>[($ENV{VERILATOR_XVLOG} || "xvlog"),
                           @{$param{xsim_flags}},
                           @{$param{xsim_flags2}},
                           @{$param{v_flags}},
@@ -1165,16 +1166,16 @@ sub compile {
                         expect => $param{expect},
                         expect_filename => $param{expect_filename},
                         verilator_run => 1,
-                        cmd => ["cd \"".$self->{obj_dir}."\" && cmake",
-                                "\"".$self->{t_dir}."/..\"",
+                        cmd => ["cd \"" . $self->{obj_dir} . "\" && cmake",
+                                "\"" . $self->{t_dir} . "/..\"",
                                 "-DTEST_VERILATOR_ROOT=$ENV{VERILATOR_ROOT}",
                                 "-DTEST_NAME=$self->{name}",
                                 "-DTEST_CSOURCES=\"@csources\"",
                                 "-DTEST_VERILATOR_ARGS=\"@vlt_args\"",
                                 "-DTEST_VERILATOR_SOURCES=\"$param{top_filename} @{$param{v_other_filenames}}\"",
-                                "-DTEST_VERBOSE=\"".($self->{verbose} ? 1 : 0)."\"",
-                                "-DTEST_SYSTEMC=\"" .($self->sc ? 1 : 0). "\"",
-                                "-DCMAKE_PREFIX_PATH=\"".(($ENV{SYSTEMC_INCLUDE}||$ENV{SYSTEMC}||'')."/..\""),
+                                "-DTEST_VERBOSE=\"" . ($self->{verbose} ? 1 : 0) . "\"",
+                                "-DTEST_SYSTEMC=\""  . ($self->sc ? 1 : 0) . "\"",
+                                "-DCMAKE_PREFIX_PATH=\"" . (($ENV{SYSTEMC_INCLUDE} || $ENV{SYSTEMC} || '') . "/..\""),
                                 "-DTEST_OPT_FAST=\"" . ($param{benchmark} ? "-Os" : "-O0") . "\"",
                                 "-DTEST_OPT_GLOBAL=\"" . ($param{benchmark} ? "-Os" : "-O0") . "\"",
                                 "-DTEST_VERILATION=\"" . $::Opt_Verilation . "\"",
@@ -1187,17 +1188,17 @@ sub compile {
             $self->_run(logfile => "$self->{obj_dir}/vlt_gcc.log",
                         entering => "$self->{obj_dir}",
                         cmd => [$ENV{MAKE},
-                                "-C ".$self->{obj_dir},
-                                "-f ".$FindBin::RealBin."/Makefile_obj",
+                                "-C " . $self->{obj_dir},
+                                "-f " . $FindBin::RealBin . "/Makefile_obj",
                                 ($self->{verbose} ? "" : "--no-print-directory"),
                                 "VM_PREFIX=$self->{VM_PREFIX}",
                                 "TEST_OBJ_DIR=$self->{obj_dir}",
-                                "CPPFLAGS_DRIVER=-D".uc($self->{name}),
-                                ($self->{verbose} ? "CPPFLAGS_DRIVER2=-DTEST_VERBOSE=1":""),
+                                "CPPFLAGS_DRIVER=-D" . uc($self->{name}),
+                                ($self->{verbose} ? "CPPFLAGS_DRIVER2=-DTEST_VERBOSE=1" : ""),
                                 ($param{benchmark} ? "" : "OPT_FAST=-O0"),
                                 ($param{benchmark} ? "" : "OPT_GLOBAL=-O0"),
                                 "$self->{VM_PREFIX}",  # bypass default rule, as we don't need archive
-                                ($param{make_flags}||""),
+                                ($param{make_flags} || ""),
                         ]);
         }
 
@@ -1206,7 +1207,7 @@ sub compile {
             $self->_run(logfile => "$self->{obj_dir}/vlt_cmake_build.log",
                         cmd => ["cmake",
                                 "--build", $self->{obj_dir},
-                                ($self->{verbose}?"--verbose":""),
+                                ($self->{verbose} ? "--verbose" : ""),
                         ]);
         }
     }
@@ -1217,8 +1218,8 @@ sub compile {
     if ($param{make_pli}) {
         $self->oprint("Compile vpi\n") if $self->{verbose};
         my @cmd = ($ENV{CXX}, @{$param{pli_flags}},
-                   "-D".$param{tool_define},
-                   "-DIS_VPI", ($ENV{CFLAGS}||''),
+                   "-D" . $param{tool_define},
+                   "-DIS_VPI", ($ENV{CFLAGS} || ''),
                    "$self->{t_dir}/$self->{pli_filename}");
 
         $self->_run(logfile=>"$self->{obj_dir}/pli_compile.log",
@@ -1230,7 +1231,7 @@ sub compile {
 }
 
 sub execute {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     return 1 if $self->errors || $self->skips || $self->unsupporteds;
     my %param = (%{$self}, @_);  # Default arguments are from $self
     # params may be expect or {tool}_expect
@@ -1245,7 +1246,7 @@ sub execute {
     if ($param{atsim}) {
         $self->_run(logfile=>"$self->{obj_dir}/atsim_sim.log",
                     fails=>$param{fails},
-                    cmd=>["echo q | ".$run_env."$self->{obj_dir}/athdl_sv",
+                    cmd=>["echo q | " . $run_env . "$self->{obj_dir}/athdl_sv",
                           @{$param{atsim_run_flags}},
                           @{$param{all_run_flags}},
                           ],
@@ -1257,7 +1258,7 @@ sub execute {
     elsif ($param{ghdl}) {
         $self->_run(logfile=>"$self->{obj_dir}/ghdl_sim.log",
                     fails=>$param{fails},
-                    cmd=>[$run_env."$self->{obj_dir}/simghdl",
+                    cmd=>[$run_env . "$self->{obj_dir}/simghdl",
                           @{$param{ghdl_run_flags}},
                           @{$param{all_run_flags}},
                           ],
@@ -1267,7 +1268,7 @@ sub execute {
                     );
     }
     elsif ($param{iv}) {
-        my @cmd = ($run_env."$self->{obj_dir}/simiv",
+        my @cmd = ($run_env . "$self->{obj_dir}/simiv",
                    @{$param{iv_run_flags}},
                    @{$param{all_run_flags}},
                           );
@@ -1284,13 +1285,13 @@ sub execute {
                     );
     }
     elsif ($param{ms}) {
-        my @pli_opt=();
+        my @pli_opt = ();
         if ($param{use_libvpi}) {
             unshift @pli_opt, "-pli $self->{obj_dir}/libvpi.so";
         }
         $self->_run(logfile=>"$self->{obj_dir}/ms_sim.log",
                     fails=>$param{fails},
-                    cmd=>["echo q | ".$run_env.($ENV{VERILATOR_MODELSIM}||"vsim"),
+                    cmd=>["echo q | " . $run_env . ($ENV{VERILATOR_MODELSIM} || "vsim"),
                           @{$param{ms_run_flags}},
                           @{$param{all_run_flags}},
                           @{pli_opt},
@@ -1304,7 +1305,7 @@ sub execute {
     elsif ($param{nc}) {
         $self->_run(logfile=>"$self->{obj_dir}/nc_sim.log",
                     fails=>$param{fails},
-                    cmd=>["echo q | ".$run_env.($ENV{VERILATOR_NCVERILOG}||"ncverilog"),
+                    cmd=>["echo q | " . $run_env . ($ENV{VERILATOR_NCVERILOG} || "ncverilog"),
                           @{$param{nc_run_flags}},
                           @{$param{all_run_flags}},
                           ],
@@ -1317,7 +1318,7 @@ sub execute {
         #my $fh = IO::File->new(">simv.key") or die "%Error: $! simv.key,";
         #$fh->print("quit\n"); $fh->close;
         $self->_run(logfile=>"$self->{obj_dir}/vcs_sim.log",
-                    cmd=>["echo q | ".$run_env."./simv",
+                    cmd=>["echo q | " . $run_env . "./simv",
                           @{$param{vcs_run_flags}},
                           @{$param{all_run_flags}},
                           ],
@@ -1329,7 +1330,7 @@ sub execute {
     elsif ($param{xsim}) {
         $self->_run(logfile=>"$self->{obj_dir}/xsim_sim.log",
                     fails=>$param{fails},
-                    cmd=>[$run_env.($ENV{VERILATOR_XELAB}||"xelab"),
+                    cmd=>[$run_env.($ENV{VERILATOR_XELAB} || "xelab"),
                           @{$param{xsim_run_flags}},
                           @{$param{xsim_run_flags2}},
                           @{$param{all_run_flags}},
@@ -1346,7 +1347,7 @@ sub execute {
         $param{executable} ||= "$self->{obj_dir}/$param{VM_PREFIX}";
         my $debugger = "";
         if ($opt_gdbsim) {
-            $debugger = ($ENV{VERILATOR_GDB}||"gdb")." ";
+            $debugger = ($ENV{VERILATOR_GDB} || "gdb") . " ";
         } elsif ($opt_rrsim) {
             $debugger = "rr record ";
         }
@@ -1361,7 +1362,6 @@ sub execute {
                     %param,
                     expect=>$param{expect},  # backward compatible name
                     expect_filename=>$param{expect_filename},  # backward compatible name
-                    verilator_run => 1,
                     );
     }
     else {
@@ -1370,7 +1370,7 @@ sub execute {
 }
 
 sub setenv {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $var = shift;
     my $val = shift;
     print "\texport $var='$val'\n";
@@ -1378,7 +1378,7 @@ sub setenv {
 }
 
 sub inline_checks {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     return 1 if $self->errors || $self->skips || $self->unsupporteds;
     return 1 if !$self->{vlt_all};
 
@@ -1392,21 +1392,21 @@ sub inline_checks {
     while (defined(my $line = $fh->getline)) {
         if ($line =~ /CHECK/) {
             if ($line =~ /CHECK_COVER *\( *([---0-9]+) *, *"([^"]+)" *, *("([^"]+)" *,|) *(\d+) *\)/) {
-                my $lineno = ($. + $1); my $hier=$2; my $comment=$4; my $count=$5;
-                my $regexp = "\001l\002".$lineno;
-                $regexp .= ".*\001o\002".quotemeta($comment) if $comment;
-                $regexp .= ".*\001h\002".quotemeta($hier) if $hier;
-                $regexp .= ".*' ".$count;
+                my $lineno = ($. + $1); my $hier = $2; my $comment = $4; my $count = $5;
+                my $regexp = "\001l\002" . $lineno;
+                $regexp .= ".*\001o\002" . quotemeta($comment) if $comment;
+                $regexp .= ".*\001h\002" . quotemeta($hier) if $hier;
+                $regexp .= ".*' " . $count;
                 if ($contents !~ /$regexp/) {
-                    $self->error("CHECK_COVER: $covfn: Regexp not found: $regexp\n".
+                    $self->error("CHECK_COVER: $covfn: Regexp not found: $regexp\n" .
                                  "From $self->{top_filename}:$.: $line");
                 }
             }
             elsif ($line =~ /CHECK_COVER_MISSING *\( *([---0-9]+) *\)/) {
                 my $lineno = ($. + $1);
-                my $regexp = "\001l\002".$lineno;
+                my $regexp = "\001l\002" . $lineno;
                 if ($contents =~ /$regexp/) {
-                    $self->error("CHECK_COVER_MISSING: $covfn: Regexp found: $regexp\n".
+                    $self->error("CHECK_COVER_MISSING: $covfn: Regexp found: $regexp\n" .
                                  "From $self->{top_filename}:$.: $line");
                 }
             }
@@ -1422,51 +1422,51 @@ sub inline_checks {
 # Accessors
 
 sub ok {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     $self->{ok} = $_[0] if defined $_[0];
     $self->{ok} = 0 if $self->{errors} || $self->{errors_keep_going} || $self->{skips} || $self->unsupporteds;
     return $self->{ok};
 }
 
 sub continuing {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     return !($self->errors || $self->skips || $self->unsupporteds);
 }
 
 sub errors {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     return $self->{errors};
 }
 
 sub golden_filename {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     $self->{golden_filename} = shift if defined $_[0];
     return $self->{golden_filename};
 }
 
 sub scenario_off {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     return $self->{scenario_off};
 }
 
 sub skips {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     return $self->{skips};
 }
 
 sub unsupporteds {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     return $self->{unsupporteds};
 }
 
 sub top_filename {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     $self->{top_filename} = shift if defined $_[0];
     return $self->{top_filename};
 }
 
 sub vhdl {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     $self->{vhdl} = shift if defined $_[0];
     if ($self->{vhdl}) {
         $self->{top_filename} =~ s/\.v$/\.vhdl/;
@@ -1475,18 +1475,18 @@ sub vhdl {
 }
 
 sub v_suffix {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     # Suffix for file type, e.g. .vhdl or .v
     return $self->{vhdl} ? "vhdl" : "v";
 }
 
 sub sc {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     return $self->{sc};
 }
 
 sub have_sc {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     return 1 if (defined $ENV{SYSTEMC} || defined $ENV{SYSTEMC_INCLUDE} || $ENV{CFG_HAVE_SYSTEMC});
     return 1 if $self->verilator_version =~ /systemc found *= *1/i;
     return 0;
@@ -1534,7 +1534,7 @@ sub get_default_vltmt_threads {
 }
 
 sub pli_filename {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     $self->{pli_filename} = shift if defined $_[0];
     return $self->{pli_filename};
 }
@@ -1545,7 +1545,7 @@ sub too_few_cores {
 }
 
 sub skip_if_too_few_cores {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     if (too_few_cores()) {
         $self->skip("Skipping due to too few cores\n");
     }
@@ -1560,7 +1560,7 @@ sub wno_unopthreads_for_few_cores {
 }
 
 sub VM_PREFIX {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     $self->{VM_PREFIX} = shift if defined $_[0];
     return $self->{VM_PREFIX};
 }
@@ -1568,17 +1568,18 @@ sub VM_PREFIX {
 #----------------------------------------------------------------------
 
 sub run {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     $self->_run(@_);
 }
+
 sub _run {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my %param = (tee => 1,
                  #entering =>  # Print entering directory information
                  #verilator_run =>  # Move gcov data to parallel area
                  @_);
 
-    my $command = join(' ',@{$param{cmd}});
+    my $command = join(' ', @{$param{cmd}});
     $command = "time $command" if $opt_benchmark && $command !~ /^cd /;
 
     if ($param{verilator_run}) {
@@ -1610,15 +1611,15 @@ sub _run {
         if ($param{logfile}) {
             $logfh = IO::File->new(">$param{logfile}") or die "%Error: Can't open $param{logfile}";
         }
-        my $pid=fork();
+        my $pid = fork();
         if ($pid) {  # Parent
             close CHILDWR;
             print "driver: Entering directory '",
                 File::Spec->rel2abs($param{entering}), "'\n" if $param{entering};
             while (1) {
                 my $buf = '';
-                my $got = sysread PARENTRD,$buf,10000;
-                last if defined $got && $got==0;
+                my $got = sysread PARENTRD, $buf, 10000;
+                last if defined $got && $got == 0;
                 print $buf if $param{tee};
                 print $logfh $buf if $logfh;
             }
@@ -1647,7 +1648,7 @@ sub _run {
             }
             exit($? ? 10 : 0);  # $?>>8 misses coredumps
         }
-        waitpid($pid,0);
+        waitpid($pid, 0);
         $status = $? || 0;
     }
     flush STDOUT;
@@ -1702,7 +1703,7 @@ sub _run {
                     #print "**BAD  $self->{name} $param{logfile} MT $moretry  $try\n";
                     next if $moretry;
                     $self->error("Miscompares in output from $param{cmd}[0]\n");
-                    $self->error("Might be error in regexp format\n") if $ok<1;
+                    $self->error("Might be error in regexp format\n") if $ok < 1;
                     print "GOT:\n";
                     print $wholefile;
                     print "ENDGOT\n";
@@ -1753,7 +1754,7 @@ sub _make_main {
     my $fh = IO::File->new(">$filename") or die "%Error: $! $filename,";
 
     print $fh "// Test defines\n";
-    print $fh "#define MAIN_TIME_MULTIPLIER ".($self->{main_time_multiplier} || 1)."\n";
+    print $fh "#define MAIN_TIME_MULTIPLIER " . ($self->{main_time_multiplier} || 1) . "\n";
 
     print $fh "#include <memory>\n";
     print $fh "#include <fstream>\n" if $self->{benchmarksim};
@@ -1802,7 +1803,7 @@ sub _make_main {
         print $fh "extern int sc_main(int argc, char** argv);\n";
         print $fh "int sc_main(int argc, char** argv) {\n";
         print $fh "    sc_signal<bool> fastclk;\n" if $self->{inputs}{fastclk};
-        print $fh "    sc_signal<bool> clk;\n"  if $self->{inputs}{clk};
+        print $fh "    sc_signal<bool> clk;\n" if $self->{inputs}{clk};
         print $fh "    sc_set_time_resolution(1, $Self->{sc_time_resolution});\n";
         print $fh "    sc_time sim_time($self->{sim_time}, $Self->{sc_time_resolution});\n";
     } else {
@@ -1812,9 +1813,10 @@ sub _make_main {
 
     print $fh "    const std::unique_ptr<VerilatedContext> contextp{new VerilatedContext};\n";
     print $fh "    contextp->commandArgs(argc, argv);\n";
-    print $fh "    contextp->debug(".($self->{verilated_debug}?1:0).");\n";
+    print $fh "    contextp->debug(" . ($self->{verilated_debug} ? 1 : 0) . ");\n";
     print $fh "    srand48(5);\n";  # Ensure determinism
-    print $fh "    contextp->randReset(".$self->{verilated_randReset}.");\n" if defined $self->{verilated_randReset};
+    print $fh "    contextp->randReset(" . $self->{verilated_randReset} . ");\n"
+        if defined $self->{verilated_randReset};
     print $fh "    topp.reset(new $VM_PREFIX(\"top\"));\n";
     print $fh "    contextp->internalsDump()\n;" if $self->{verilated_debug};
 
@@ -1843,7 +1845,8 @@ sub _make_main {
         $fh->print("    std::unique_ptr<VerilatedVcdC> tfp{new VerilatedVcdC};\n") if $self->{trace_format} eq 'vcd-c';
         $fh->print("    std::unique_ptr<VerilatedVcdSc> tfp{new VerilatedVcdSc};\n") if $self->{trace_format} eq 'vcd-sc';
         $fh->print("    topp->trace(tfp.get(), 99);\n");
-        $fh->print("    tfp->open(\"".$self->trace_filename."\");\n");
+        $fh->print("    tfp->open(\"" . $self->trace_filename . "\");\n");
+
         if ($self->{trace} && !$self->sc) {
             $fh->print("    if (tfp) tfp->dump(contextp->time());\n");
         }
@@ -1874,13 +1877,13 @@ sub _make_main {
     print $fh "    while ((${time} < sim_time * MAIN_TIME_MULTIPLIER)\n";
     print $fh "           && !contextp->gotFinish()) {\n";
 
-    for (my $i=0; $i<5; $i++) {
+    for (my $i = 0; $i < 5; $i++) {
         my $action = 0;
         if ($self->{inputs}{fastclk}) {
             print $fh "        ${set}fastclk = !${set}fastclk;\n";
             $action = 1;
         }
-        if ($i==0 && $self->{inputs}{clk}) {
+        if ($i == 0 && $self->{inputs}{clk}) {
             print $fh "        ${set}clk = !${set}clk;\n";
             $action = 1;
         }
@@ -1906,20 +1909,20 @@ sub _make_main {
     if ($self->{benchmarksim}) {
         $fh->print("    {\n");
         $fh->print("        const std::chrono::duration<double> exec_s =  std::chrono::steady_clock::now() - starttime;\n");
-        $fh->print("        std::ofstream benchfile(\"".$self->benchmarksim_filename()."\", std::ofstream::out | std::ofstream::app);\n");
+        $fh->print("        std::ofstream benchfile(\"" . $self->benchmarksim_filename() . "\", std::ofstream::out | std::ofstream::app);\n");
         $fh->print("        benchfile << std::fixed << std::setprecision(9) << n_evals << \",\" << exec_s.count() << std::endl;\n");
         $fh->print("        benchfile.close();\n");
         $fh->print("    }\n");
     }
 
     print $fh "    if (!contextp->gotFinish()) {\n";
-    print $fh '        vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish");',"\n";
+    print $fh '        vl_fatal(__FILE__, __LINE__, "main", "%Error: Timeout; never got a $finish");', "\n";
     print $fh "    }\n";
     print $fh "    topp->final();\n";
 
     if ($self->{coverage}) {
         $fh->print("#if VM_COVERAGE\n");
-        $fh->print("    VerilatedCov::write(\"",$self->{coverage_filename},"\");\n");
+        $fh->print("    VerilatedCov::write(\"", $self->{coverage_filename}, "\");\n");
         $fh->print("#endif  // VM_COVERAGE\n");
     }
     if ($self->{trace}) {
@@ -1985,10 +1988,10 @@ sub _make_top_v {
     }
     # Inst
     print $fh "    t t (\n";
-    my $comma="";
+    my $comma = "";
     foreach my $inp (sort (keys %{$self->{inputs}})) {
         print $fh "      ${comma}.${inp} (${inp})\n";
-        $comma=",";
+        $comma = ",";
     }
     print $fh "    );\n";
 
@@ -1996,8 +1999,8 @@ sub _make_top_v {
     print $fh "\n";
     print $fh "`ifdef WAVES\n";
     print $fh "   initial begin\n";
-    print $fh "      \$display(\"-Tracing Waves to Dumpfile: ".$self->trace_filename."\");\n";
-    print $fh "      \$dumpfile(\"".$self->trace_filename."\");\n";
+    print $fh "      \$display(\"-Tracing Waves to Dumpfile: " . $self->trace_filename . "\");\n";
+    print $fh "      \$dumpfile(\"" . $self->trace_filename . "\");\n";
     print $fh "      \$dumpvars(0, top);\n";
     print $fh "   end\n";
     print $fh "`endif\n";
@@ -2011,10 +2014,10 @@ sub _make_top_v {
     print $fh "        fastclk = 1;\n" if $self->{inputs}{fastclk};
     print $fh "        clk = 1;\n" if $self->{inputs}{clk};
     print $fh "        while (\$time < $self->{sim_time}) begin\n";
-    for (my $i=0; $i<5; $i++) {
+    for (my $i = 0; $i < 5; $i++) {
         print $fh "          #1;\n";
         print $fh "          fastclk = !fastclk;\n" if $self->{inputs}{fastclk};
-        print $fh "          clk = !clk;\n" if $i==4 && $self->{inputs}{clk};
+        print $fh "          clk = !clk;\n" if $i == 4 && $self->{inputs}{clk};
     }
     print $fh "        end\n";
     print $fh "    end\n";
@@ -2039,7 +2042,7 @@ sub _make_top_vhdl {
         my $semi = "";
         foreach my $inp (@ports) {
             print $fh "        ${semi}${inp} : in std_logic\n";
-            $semi=";";
+            $semi = ";";
         }
         print $fh "    );\n";
     }
@@ -2060,10 +2063,10 @@ sub _make_top_vhdl {
     print $fh "    t : t_ent\n";
     if ($#ports >= 0) {
         print $fh "       port map(\n";
-        my $comma="";
+        my $comma = "";
         foreach my $inp (@ports) {
             print $fh "\t${comma}${inp} => ${inp}\n";
-            $comma=",";
+            $comma = ",";
         }
         print $fh "    )\n";
     }
@@ -2085,7 +2088,7 @@ sub _read_inputs_v {
     my $filename = $self->top_filename;
     $filename = "$self->{t_dir}/$filename" if !-r $filename;
     my $fh = IO::File->new("<$filename") or die "%Error: $! $filename,";
-    my $get_sigs=1;
+    my $get_sigs = 1;
     my %inputs;
     while (defined(my $line = $fh->getline)) {
         if ($get_sigs) {
@@ -2134,9 +2137,9 @@ sub verilator_version {
     # Returns verbose version, line 1 contains actual version
     if (!defined $_Verilator_Version) {
         my @args = ("perl", "$ENV{VERILATOR_ROOT}/bin/verilator", "-V");
-        my $args = join(' ',@args);
+        my $args = join(' ', @args);
         $_Verilator_Version = `$args`;
-        $_Verilator_Version or die "can't fork: $! ".join(' ',@args);
+        $_Verilator_Version or die "can't fork: $! " . join(' ', @args);
         chomp $_Verilator_Version;
     }
     return $_Verilator_Version if defined $_Verilator_Version;
@@ -2146,7 +2149,7 @@ sub verilator_version {
 # File utilities
 
 sub files_identical {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $fn1 = shift;
     my $fn2 = shift;
     my $fn1_is_logfile = shift;
@@ -2179,6 +2182,8 @@ sub files_identical {
                     && !/^dot [^\n]+\n/
                     && !/^In file: .*\/sc_.*:\d+/
                     && !/^libgcov.*/
+                    && !/--- \/tmp\//  # t_difftree.pl
+                    && !/\+\+\+ \/tmp\//  # t_difftree.pl
             } @l1;
             @l1 = map {
                 s/(Internal Error: [^\n]+\.cpp):[0-9]+:/$1:#:/;
@@ -2199,17 +2204,17 @@ sub files_identical {
                 }
             }
         }
-        my $nl = $#l1;  $nl = $#l2 if ($#l2 > $nl);
+        my $nl = $#l1; $nl = $#l2 if ($#l2 > $nl);
         for (my $l=0; $l<=$nl; ++$l) {
-            if (($l1[$l]||"") ne ($l2[$l]||"")) {
+            if (($l1[$l] || "") ne ($l2[$l] || "")) {
                 next try if $moretry;
-                $self->error_keep_going("Line ".($l+1)." miscompares; $fn1 != $fn2");
-                warn("F1: ".($l1[$l]||"*EOF*\n")
-                     ."F2: ".($l2[$l]||"*EOF*\n"));
+                $self->error_keep_going("Line " . ($l+1) . " miscompares; $fn1 != $fn2");
+                warn("F1: " . ($l1[$l] || "*EOF*\n")
+                     . "F2: " . ($l2[$l] || "*EOF*\n"));
                 if ($ENV{HARNESS_UPDATE_GOLDEN}) {  # Update golden files with current
                     warn "%Warning: HARNESS_UPDATE_GOLDEN set: cp $fn1 $fn2\n";
                     my $fhw = IO::File->new(">$fn2") or $self->error("Files_identical $! $fn2\n");
-                    $fhw->print(join('',@l1));
+                    $fhw->print(join('', @l1));
                 } else {
                     warn "To update reference: HARNESS_UPDATE_GOLDEN=1 {command} or --golden\n";
                 }
@@ -2221,7 +2226,7 @@ sub files_identical {
 }
 
 sub files_identical_sorted {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $fn1 = shift;
     my $fn2 = shift;
     my $fn1_is_logfile = shift;
@@ -2235,7 +2240,7 @@ sub files_identical_sorted {
 }
 
 sub copy_if_golden {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $fn1 = shift;
     my $fn2 = shift;
     if ($ENV{HARNESS_UPDATE_GOLDEN}) {  # Update golden files with current
@@ -2246,7 +2251,7 @@ sub copy_if_golden {
 }
 
 sub vcd_identical {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $fn1 = shift;
     my $fn2 = shift;
     return 0 if $self->errors || $self->skips || $self->unsupporteds;
@@ -2288,7 +2293,7 @@ sub vcd_identical {
 }
 
 sub fst2vcd {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $fn1 = shift;
     my $fn2 = shift;
     if (!-r $fn1) { $self->error("File does not exist $fn1\n"); return 0; }
@@ -2304,17 +2309,17 @@ sub fst2vcd {
 }
 
 sub fst_identical {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $fn1 = shift;
     my $fn2 = shift;
     return 0 if $self->errors || $self->skips || $self->unsupporteds;
-    my $tmp = $fn1.".vcd";
+    my $tmp = $fn1 . ".vcd";
     fst2vcd($fn1, $tmp);
     return vcd_identical($tmp, $fn2);
 }
 
 sub _vcd_read {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $filename = shift;
     my $data = {};
     my $fh = IO::File->new("<$filename");
@@ -2327,7 +2332,7 @@ sub _vcd_read {
             push @hier, $hier[$#hier]->{$1};
             $lasthier = $hier[$#hier];
         } elsif ($line =~ /(\$var \S+\s+\d+\s+)\S+\s+(\S+)/) {
-            $hier[$#hier]->{$1.$2} ||= {};
+            $hier[$#hier]->{$1 . $2} ||= {};
             $lasthier = $hier[$#hier];
         } elsif ($line =~ /(\$attrbegin .* \$end)/) {
             if ($lasthier) { $lasthier->{$1} ||= 1; }
@@ -2343,6 +2348,7 @@ sub _vcd_read {
 }
 
 our $_Cxx_Version;
+
 sub cxx_version {
     $_Cxx_Version ||= `$ENV{MAKE} -C $ENV{VERILATOR_ROOT}/test_regress -f Makefile print-cxx-version`;
     return $_Cxx_Version;
@@ -2353,12 +2359,14 @@ sub cfg_with_threaded {
 }
 
 our $_Cfg_with_ccache;
+
 sub cfg_with_ccache {
     $_Cfg_with_ccache ||= `grep "OBJCACHE \?= ccache" "$ENV{VERILATOR_ROOT}/include/verilated.mk"` ne "";
     return $_Cfg_with_ccache;
 }
 
 our $_Cfg_with_m32;
+
 sub cfg_with_m32 {
     $_Cfg_with_m32 ||= `grep "CXX.*=.*-m32" "$ENV{VERILATOR_ROOT}/include/verilated.mk"` ne "";
     return $_Cfg_with_m32;
@@ -2373,34 +2381,34 @@ sub tries {
 }
 
 sub glob_all {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $pattern = shift;
 
     return glob($pattern);
 }
 
 sub glob_one {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $pattern = shift;
     return if $self->errors || $self->skips || $self->unsupporteds;
 
     my @files = glob($pattern);
     my $n = scalar @files;
     if ($n == 0) {
-      $self->error("glob_one: pattern '$pattern' does not match any files\n");
+        $self->error("glob_one: pattern '$pattern' does not match any files\n");
     } elsif ($n != 1) {
-      my $msg = "glob_one: pattern '$pattern' matches multiple files:\n";
-      foreach my $file (@files) {
-        $msg .= $file."\n";
-      }
-      $self->error($msg);
+        my $msg = "glob_one: pattern '$pattern' matches multiple files:\n";
+        foreach my $file (@files) {
+            $msg .= $file . "\n";
+        }
+        $self->error($msg);
     } else {
-      return $files[0];
+        return $files[0];
     }
 }
 
 sub file_grep_not {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $filename = shift;
     my $regexp = shift;
     my $expvalue = shift;
@@ -2415,7 +2423,7 @@ sub file_grep_not {
 }
 
 sub file_grep {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $filename = shift;
     my $regexp = shift;
     my $expvalue = shift;
@@ -2438,18 +2446,18 @@ sub file_grep_any {
     return if $self->errors || $self->skips || $self->unsupporteds;
 
     foreach my $filename (@filenames) {
-      my $contents = $self->file_contents($filename);
-      return if ($contents eq "_Already_Errored_");
-      if ($contents =~ /$regexp/) {
-        if ($expvalue && $expvalue ne $1) {
-          $self->error("file_grep: $filename: Got='$1' Expected='$expvalue' in regexp: $regexp\n");
+        my $contents = $self->file_contents($filename);
+        return if ($contents eq "_Already_Errored_");
+        if ($contents =~ /$regexp/) {
+            if ($expvalue && $expvalue ne $1) {
+                $self->error("file_grep: $filename: Got='$1' Expected='$expvalue' in regexp: $regexp\n");
+            }
+            return;
         }
-        return;
-      }
     }
     my $msg = "file_grep_any: Regexp '$regexp' not found in any of the following files:\n";
     foreach my $filename (@filenames) {
-      $msg .= $filename."\n";
+        $msg .= $filename . "\n";
     }
     $self->error($msg);
 }
@@ -2457,14 +2465,14 @@ sub file_grep_any {
 my %_File_Contents_Cache;
 
 sub file_contents {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $filename = shift;
 
     if (!$_File_Contents_Cache{$filename}) {
         my $fh = IO::File->new("<$filename");
         if (!$fh) {
             $_File_Contents_Cache{$filename} = "_Already_Errored_";
-            $self->error("File_grep file not found: ".$filename."\n");
+            $self->error("File_grep file not found: " . $filename . "\n");
             return $_File_Contents_Cache{$filename};
         }
         local $/; undef $/;
@@ -2477,7 +2485,7 @@ sub file_contents {
 }
 
 sub write_wholefile {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $filename = shift;
     my $contents = shift;
     my $fh = IO::File->new(">$filename") or die "%Error: $! writing $filename,";
@@ -2487,7 +2495,7 @@ sub write_wholefile {
 }
 
 sub file_sed {
-    my $self = (ref $_[0]? shift : $Self);
+    my $self = (ref $_[0] ? shift : $Self);
     my $infilename = shift;
     my $outfilename = shift;
     my $editcb = shift;
@@ -2501,8 +2509,8 @@ sub file_sed {
 }
 
 sub extract {
-    my $self = (ref $_[0]? shift : $Self);
-    my %param = (#in =>,
+    my $self = (ref $_[0] ? shift : $Self);
+    my %param = (  #in =>,
         #out =>
         regexp => qr/.*/,
         lineno_adjust => -9999,
@@ -2548,6 +2556,7 @@ sub extract {
         $fhw->print("   :linenos:\n") if $lang && $#out > 0;
         $fhw->print("   :emphasize-lines: ${emph}\n") if $emph;
         $fhw->print("\n");
+
         foreach my $line (@out) {
             $fhw->print($line);
         }
@@ -2588,6 +2597,7 @@ sub new {
     bless $self, $class;
     return $self;
 }
+
 sub schedule {
     my $self = shift;
     my %params = (@_);
@@ -2602,16 +2612,17 @@ sub schedule {
     $params{run_on_finish}->($self);
     return $self;
 }
-sub max_proc {}
-sub sig_child {}
-sub kill_tree_all {}
-sub wait_all {}
-sub ready {}
-sub running {}
+sub max_proc { }
+sub sig_child { }
+sub kill_tree_all { }
+sub wait_all { }
+sub ready { }
+sub running { }
 sub is_any_left { return 0; }
 
 #######################################################################
 1;
+
 package main;
 __END__
 
diff --git a/test_regress/t/t_a4_examples.pl b/test_regress/t/t_a4_examples.pl
index 0859499b3..c2b374144 100755
--- a/test_regress/t/t_a4_examples.pl
+++ b/test_regress/t/t_a4_examples.pl
@@ -14,7 +14,7 @@ $Self->{clean_command} = 'rm -rf ../examples/*/build ../examples/*/obj*';
 
 my @examples = sort(glob("../examples/*"));
 for my $example (@examples) {
-    run(cmd=>["$ENV{MAKE} -C $example"]);
+    run(cmd => ["$ENV{MAKE} -C $example"]);
 }
 
 ok(1);
diff --git a/test_regress/t/t_alw_noreorder.pl b/test_regress/t/t_alw_noreorder.pl
index 7d6eceb88..46d021e6b 100755
--- a/test_regress/t/t_alw_noreorder.pl
+++ b/test_regress/t/t_alw_noreorder.pl
@@ -24,7 +24,7 @@ file_grep_any(\@files, qr/dly__t__DOT__v1/i);
 file_grep_any(\@files, qr/dly__t__DOT__v2/i);
 
 execute(
-    check_finished=>1,
+    check_finished => 1,
     );
 
 ok(1);
diff --git a/test_regress/t/t_alw_nosplit.pl b/test_regress/t/t_alw_nosplit.pl
index ae8fcb03f..a1d66aaad 100755
--- a/test_regress/t/t_alw_nosplit.pl
+++ b/test_regress/t/t_alw_nosplit.pl
@@ -19,7 +19,7 @@ if ($Self->{vlt_all}) {
 }
 
 execute(
-    check_finished=>1,
+    check_finished => 1,
     );
 
 ok(1);
diff --git a/test_regress/t/t_alw_reorder.pl b/test_regress/t/t_alw_reorder.pl
index dce2f101f..90a7126da 100755
--- a/test_regress/t/t_alw_reorder.pl
+++ b/test_regress/t/t_alw_reorder.pl
@@ -28,7 +28,7 @@ foreach my $file (
 }
 
 execute(
-    check_finished=>1,
+    check_finished => 1,
     );
 
 ok(1);
diff --git a/test_regress/t/t_assert_cover.pl b/test_regress/t/t_assert_cover.pl
index e72a962af..0ea346fe6 100755
--- a/test_regress/t/t_assert_cover.pl
+++ b/test_regress/t/t_assert_cover.pl
@@ -33,7 +33,7 @@ if ($Self->{nc}) {
     }
     run(logfile => "$Self->{obj_dir}/${name}__nccover.log",
         tee => 0,
-        cmd => [($ENV{VERILATOR_ICCR}||'iccr'),
+        cmd => [($ENV{VERILATOR_ICCR} || 'iccr'),
                 "-test ${name} ${cf}"]);
 }
 
diff --git a/test_regress/t/t_benchmarksim.pl b/test_regress/t/t_benchmarksim.pl
index d8a8bb18f..59fed2993 100755
--- a/test_regress/t/t_benchmarksim.pl
+++ b/test_regress/t/t_benchmarksim.pl
@@ -16,7 +16,7 @@ top_filename("t/t_gen_alw.v");
 init_benchmarksim();
 
 # As an example, compile and simulate the top file with varying optimization level
-my @l_opt = (1,2,3);
+my @l_opt = (1, 2, 3);
 
 foreach my $l_opt (@l_opt) {
     compile(
@@ -29,7 +29,7 @@ foreach my $l_opt (@l_opt) {
         );
 }
 
-my $fh = IO::File->new("<".benchmarksim_filename()) or error("Benchmark data file not found");
+my $fh = IO::File->new("<" . benchmarksim_filename()) or error("Benchmark data file not found");
 my $lines = 0;
 while (defined(my $line = $fh->getline)) {
     if ($line =~ /^#/) { next; }
@@ -37,16 +37,17 @@ while (defined(my $line = $fh->getline)) {
         error("Expected header but found $line") if $line ne "evals, time[s]\n";
     } else {
         my @data = grep {$_ != ""} ($line =~ /(\d*\.?\d*)/g);
-        error("Expected 2 tokens on line ".$lines." but got ".scalar(@data)) if scalar(@data) != 2;
+        error("Expected 2 tokens on line " . $lines . " but got  " . scalar(@data)) if scalar(@data) != 2;
         my $cycles = $data[0];
         my $time = $data[1];
-        error("Invalid data on line ".$lines) if $cycles <= 0.0 || $time <= 0.0;
+        error("Invalid data on line " . $lines) if $cycles <= 0.0 || $time <= 0.0;
     }
     $lines += 1;
 }
 my $n_lines_expected = scalar(@l_opt) + 1;
 
-error("Expected ".$n_lines_expected." lines but found ".$lines) if int($lines) != int($n_lines_expected);
+error("Expected " . $n_lines_expected . " lines but found " . $lines)
+    if int($lines) != int($n_lines_expected);
 
 1;
 ok(1);
diff --git a/test_regress/t/t_bigmem_bad.out b/test_regress/t/t_bigmem_bad.out
new file mode 100644
index 000000000..ec7bfe78a
--- /dev/null
+++ b/test_regress/t/t_bigmem_bad.out
@@ -0,0 +1,7 @@
+%Warning-SELRANGE: t/t_bigmem_bad.v:14:19: Selection index out of range: (nodep->declElWidth() == 0) -1:0 outside 268435455:0
+                                         : ... In instance t_bigmem
+   14 |       if (wen) mem[addr] <= data;
+      |                   ^
+                   ... For warning description see https://verilator.org/warn/SELRANGE?v=latest
+                   ... Use "/* verilator lint_off SELRANGE */" and lint_on around source to disable this message.
+%Error: Exiting due to
diff --git a/test_regress/t/t_bigmem_bad.pl b/test_regress/t/t_bigmem_bad.pl
new file mode 100755
index 000000000..8e592a491
--- /dev/null
+++ b/test_regress/t/t_bigmem_bad.pl
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2021 by filamoon. This program is free software; you can
+# redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(linter => 1);
+
+lint(
+    fails => 1,
+    expect_filename => $Self->{golden_filename},
+    );
+
+ok(1);
+1;
diff --git a/test_regress/t/t_bigmem_bad.v b/test_regress/t/t_bigmem_bad.v
new file mode 100644
index 000000000..eadfb3f1e
--- /dev/null
+++ b/test_regress/t/t_bigmem_bad.v
@@ -0,0 +1,16 @@
+// This test shall generate a warning, but not an internal error.
+//
+// This file ONLY is placed under the Creative Commons Public Domain, for
+// any use, without warranty, 2021 by Zhanglei Wang.
+// SPDX-License-Identifier: CC0-1.0
+module t_bigmem(
+   input wire clk,
+   input wire [27:0] addr,
+   input wire [255:0] data,
+   input wire wen
+);
+   reg [(1<<28)-1:0][255:0] mem;
+   always @(posedge clk) begin
+      if (wen) mem[addr] <= data;
+   end
+endmodule
diff --git a/test_regress/t/t_ccache_report.pl b/test_regress/t/t_ccache_report.pl
index d874fd27c..a17c2b02e 100755
--- a/test_regress/t/t_ccache_report.pl
+++ b/test_regress/t/t_ccache_report.pl
@@ -11,13 +11,13 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(vlt => 1);
 
 if (!$Self->cfg_with_ccache) {
-  skip("Requires configuring with ccache");
+    skip("Requires configuring with ccache");
 }
 
 top_filename("t_a1_first_cc.v");
 
 # This test requires rebuilding the object files to check the ccache log
-foreach my $filename (glob ("$Self->{obj_dir}/*.o")) {
+foreach my $filename (glob("$Self->{obj_dir}/*.o")) {
     print "rm $filename\n" if $Self->{verbose};
     unlink $filename;
 }
diff --git a/test_regress/t/t_cellarray.pl b/test_regress/t/t_cellarray.pl
index 78f9ca462..710539001 100755
--- a/test_regress/t/t_cellarray.pl
+++ b/test_regress/t/t_cellarray.pl
@@ -20,7 +20,7 @@ execute(
 
 if ($Self->{vlt_all}) {
     file_grep($Self->{stats}, qr/Optimizations, Gate assign merged\s+(\d+)/i, 28);
-};
+}
 
 ok(1);
 1;
diff --git a/test_regress/t/t_class_extends_protect_ids.pl b/test_regress/t/t_class_extends_protect_ids.pl
index b48534f3c..883b58df0 100755
--- a/test_regress/t/t_class_extends_protect_ids.pl
+++ b/test_regress/t/t_class_extends_protect_ids.pl
@@ -12,8 +12,8 @@ scenarios(vlt_all => 1);
 
 # This test makes randomly named .cpp/.h files, which tend to collect, so remove them first
 foreach my $filename (glob ("$Self->{obj_dir}/*_PS*.cpp"
-                            ." $Self->{obj_dir}/*_PS*.h"
-                            ." $Self->{obj_dir}/*.d" )) {
+                            . " $Self->{obj_dir}/*_PS*.h"
+                            . " $Self->{obj_dir}/*.d" )) {
     print "rm $filename\n" if $Self->{verbose};
     unlink $filename;
 }
diff --git a/test_regress/t/t_class_extends_this_protect_ids.pl b/test_regress/t/t_class_extends_this_protect_ids.pl
index 47bee151d..6e7e4d2b2 100755
--- a/test_regress/t/t_class_extends_this_protect_ids.pl
+++ b/test_regress/t/t_class_extends_this_protect_ids.pl
@@ -12,8 +12,8 @@ scenarios(vlt_all => 1);
 
 # This test makes randomly named .cpp/.h files, which tend to collect, so remove them first
 foreach my $filename (glob ("$Self->{obj_dir}/*_PS*.cpp"
-                            ." $Self->{obj_dir}/*_PS*.h"
-                            ." $Self->{obj_dir}/*.d" )) {
+                            . " $Self->{obj_dir}/*_PS*.h"
+                            . " $Self->{obj_dir}/*.d" )) {
     print "rm $filename\n" if $Self->{verbose};
     unlink $filename;
 }
diff --git a/test_regress/t/t_class_format.out b/test_regress/t/t_class_format.out
index f620c0e3f..e4886448b 100644
--- a/test_regress/t/t_class_format.out
+++ b/test_regress/t/t_class_format.out
@@ -1,3 +1,4 @@
-''{b:'h1, i:'h2a, carray4:'{'h11, 'h22, 'h33, 'h44} }'
-''{b:'h1, i:'h2a, carray4:'{'h911, 'h922, 'h933, 'h944} }'
+''{b:'h1, i:'h2a, carray4:'{'h11, 'h22, 'h33, 'h44} , name:"object_name"}'
+''{b:'h1, i:'h2a, carray4:'{'h911, 'h922, 'h933, 'h944} , name:"object_name"}'
+DEBUG: object_name (@0) message
 *-* All Finished *-*
diff --git a/test_regress/t/t_class_format.v b/test_regress/t/t_class_format.v
index 001a5284e..15a67bca9 100644
--- a/test_regress/t/t_class_format.v
+++ b/test_regress/t/t_class_format.v
@@ -15,6 +15,10 @@ class Cls;
    bit b;
    int i;
    bit [15:0] carray4 [4];
+   string     name;
+   task debug();
+      $display("DEBUG: %s (@%0t) %s", this.name, $realtime, "message");
+   endtask
 endclass
 
 module t (/*AUTOARG*/);
@@ -23,6 +27,7 @@ module t (/*AUTOARG*/);
       c = new;
       c.b = '1;
       c.i = 42;
+      c.name = "object_name";
 
       c.carray4[0] = 16'h11;
       c.carray4[1] = 16'h22;
@@ -33,6 +38,8 @@ module t (/*AUTOARG*/);
       c.carray4 = '{16'h911, 16'h922, 16'h933, 16'h944};
       $display("'%p'", c);
 
+      c.debug();
+
       $write("*-* All Finished *-*\n");
       $finish;
    end
diff --git a/test_regress/t/t_class_local_protect_ids.pl b/test_regress/t/t_class_local_protect_ids.pl
index a1bab0242..724eb9525 100755
--- a/test_regress/t/t_class_local_protect_ids.pl
+++ b/test_regress/t/t_class_local_protect_ids.pl
@@ -12,8 +12,8 @@ scenarios(vlt_all => 1);
 
 # This test makes randomly named .cpp/.h files, which tend to collect, so remove them first
 foreach my $filename (glob ("$Self->{obj_dir}/*_PS*.cpp"
-                            ." $Self->{obj_dir}/*_PS*.h"
-                            ." $Self->{obj_dir}/*.d" )) {
+                            . " $Self->{obj_dir}/*_PS*.h"
+                            . " $Self->{obj_dir}/*.d")) {
     print "rm $filename\n" if $Self->{verbose};
     unlink $filename;
 }
diff --git a/test_regress/t/t_class_new.v b/test_regress/t/t_class_new.v
index 811693dc5..f15328470 100644
--- a/test_regress/t/t_class_new.v
+++ b/test_regress/t/t_class_new.v
@@ -26,10 +26,27 @@ class ClsArg;
    endfunction
 endclass
 
+class Cls2Arg;
+   int imembera;
+   int imemberb;
+   function new(int i, int j);
+      imembera = i + 1;
+      imemberb = j + 2;
+   endfunction
+
+   function Cls2Arg clone();
+      Cls2Arg ret;
+      ret = new(imembera, imemberb);
+      return ret;
+   endfunction
+endclass
+
 module t (/*AUTOARG*/);
    initial begin
       ClsNoArg c1;
-      ClsArg c2;
+      ClsArg   c2;
+      Cls2Arg  c3;
+      Cls2Arg  c4;
 
       c1 = new;
       if (c1.imembera != 5) $stop;
@@ -42,6 +59,14 @@ module t (/*AUTOARG*/);
       if (c2.imembera != 6) $stop;
       if (c2.geta() != 6) $stop;
 
+      c3 = new(4, 5);
+      if (c3.imembera != 5) $stop;
+      if (c3.imemberb != 7) $stop;
+
+      c4 = c3.clone();
+      if (c4.imembera != 6) $stop;
+      if (c4.imemberb != 9) $stop;
+
       $write("*-* All Finished *-*\n");
       $finish;
    end
diff --git a/test_regress/t/t_class_new_bad.out b/test_regress/t/t_class_new_bad.out
index ac1b98bf5..d85240aab 100644
--- a/test_regress/t/t_class_new_bad.out
+++ b/test_regress/t/t_class_new_bad.out
@@ -1,12 +1,13 @@
 %Error: t/t_class_new_bad.v:31:16: Too many arguments in function call to FUNC 'new'
+                                 : ... In instance t
    31 |       c1 = new(3);   
       |                ^
 %Error: t/t_class_new_bad.v:32:16: Too many arguments in function call to FUNC 'new'
+                                 : ... In instance t
    32 |       c2 = new(3);   
       |                ^
 %Error: t/t_class_new_bad.v:33:12: Missing argument on non-defaulted argument 'i' in function call to FUNC 'new'
+                                 : ... In instance t
    33 |       c3 = new();   
       |            ^~~
-%Error: Internal Error: t/t_class_new_bad.v:33:12: ../V3Broken.cpp:#: Width != WidthMin
-   33 |       c3 = new();   
-      |            ^~~
+%Error: Exiting due to
diff --git a/test_regress/t/t_class_static_method_protect_ids.pl b/test_regress/t/t_class_static_method_protect_ids.pl
index 876725638..208380b3b 100755
--- a/test_regress/t/t_class_static_method_protect_ids.pl
+++ b/test_regress/t/t_class_static_method_protect_ids.pl
@@ -12,8 +12,8 @@ scenarios(vlt_all => 1);
 
 # This test makes randomly named .cpp/.h files, which tend to collect, so remove them first
 foreach my $filename (glob ("$Self->{obj_dir}/*_PS*.cpp"
-                            ." $Self->{obj_dir}/*_PS*.h"
-                            ." $Self->{obj_dir}/*.d" )) {
+                            . " $Self->{obj_dir}/*_PS*.h"
+                            . " $Self->{obj_dir}/*.d")) {
     print "rm $filename\n" if $Self->{verbose};
     unlink $filename;
 }
diff --git a/test_regress/t/t_class_virtual_protect_ids.pl b/test_regress/t/t_class_virtual_protect_ids.pl
index ff478e073..940170b06 100755
--- a/test_regress/t/t_class_virtual_protect_ids.pl
+++ b/test_regress/t/t_class_virtual_protect_ids.pl
@@ -12,8 +12,8 @@ scenarios(vlt_all => 1);
 
 # This test makes randomly named .cpp/.h files, which tend to collect, so remove them first
 foreach my $filename (glob ("$Self->{obj_dir}/*_PS*.cpp"
-                            ." $Self->{obj_dir}/*_PS*.h"
-                            ." $Self->{obj_dir}/*.d" )) {
+                            . " $Self->{obj_dir}/*_PS*.h"
+                            . " $Self->{obj_dir}/*.d")) {
     print "rm $filename\n" if $Self->{verbose};
     unlink $filename;
 }
diff --git a/test_regress/t/t_clk_2in.pl b/test_regress/t/t_clk_2in.pl
index aa918c4cb..9f3664937 100755
--- a/test_regress/t/t_clk_2in.pl
+++ b/test_regress/t/t_clk_2in.pl
@@ -13,7 +13,7 @@ scenarios(simulator => 1);
 compile(
     make_top_shell => 0,
     make_main => 0,
-    verilator_flags2 => ["--exe","$Self->{t_dir}/$Self->{name}.cpp"],
+    verilator_flags2 => ["--exe", "$Self->{t_dir}/$Self->{name}.cpp"],
     vcs_flags2 => ['-assert'],
     );
 
diff --git a/test_regress/t/t_clk_condflop_nord.pl b/test_regress/t/t_clk_condflop_nord.pl
index 57e493fa0..0c03448c5 100755
--- a/test_regress/t/t_clk_condflop_nord.pl
+++ b/test_regress/t/t_clk_condflop_nord.pl
@@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
-    verilator_flags2=>["-no-order-clock-delay"],
+    verilator_flags2 => ["-no-order-clock-delay"],
     );
 
 execute(
diff --git a/test_regress/t/t_const_opt.pl b/test_regress/t/t_const_opt.pl
index 5dbe1ff36..ee51e8030 100755
--- a/test_regress/t/t_const_opt.pl
+++ b/test_regress/t/t_const_opt.pl
@@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
-    verilator_flags2=>["-Wno-UNOPTTHREADS", "--stats"],
+    verilator_flags2 => ["-Wno-UNOPTTHREADS", "--stats"],
     );
 
 execute(
diff --git a/test_regress/t/t_const_opt_cov.pl b/test_regress/t/t_const_opt_cov.pl
index a052e9321..211e1d79a 100755
--- a/test_regress/t/t_const_opt_cov.pl
+++ b/test_regress/t/t_const_opt_cov.pl
@@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
-    verilator_flags2=>["-Wno-UNOPTTHREADS", "--stats", "--coverage", "--trace"],
+    verilator_flags2 => ["-Wno-UNOPTTHREADS", "--stats", "--coverage", "--trace"],
     );
 
 execute(
diff --git a/test_regress/t/t_const_opt_red.pl b/test_regress/t/t_const_opt_red.pl
index 666e5d478..a9e16fbf5 100755
--- a/test_regress/t/t_const_opt_red.pl
+++ b/test_regress/t/t_const_opt_red.pl
@@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
-    verilator_flags2=>["-Wno-UNOPTTHREADS", "--stats"],
+    verilator_flags2 => ["-Wno-UNOPTTHREADS", "--stats"],
     );
 
 execute(
diff --git a/test_regress/t/t_const_opt_shortcut.pl b/test_regress/t/t_const_opt_shortcut.pl
index 89c1226dd..8955f94f0 100755
--- a/test_regress/t/t_const_opt_shortcut.pl
+++ b/test_regress/t/t_const_opt_shortcut.pl
@@ -12,7 +12,7 @@ scenarios(simulator => 1);
 
 compile(
     v_flags2 => ["t/$Self->{name}.cpp"],
-    verilator_flags2=>["-Wno-UNOPTTHREADS", "--stats"],
+    verilator_flags2 => ["-Wno-UNOPTTHREADS", "--stats"],
     );
 
 execute(
diff --git a/test_regress/t/t_cover_lib.pl b/test_regress/t/t_cover_lib.pl
index f54ac29df..986f0e572 100755
--- a/test_regress/t/t_cover_lib.pl
+++ b/test_regress/t/t_cover_lib.pl
@@ -13,7 +13,7 @@ scenarios(vlt => 1);
 compile(
     v_flags2 => ["--coverage t/t_cover_lib_c.cpp"],
     verilator_flags2 => ["--exe -Wall -Wno-DECLFILENAME"],
-    make_flags => 'CPPFLAGS_ADD=-DTEST_OBJ_DIR="'.$Self->{obj_dir}.'"',
+    make_flags => 'CPPFLAGS_ADD=-DTEST_OBJ_DIR="' . $Self->{obj_dir} . '"',
     make_top_shell => 0,
     make_main => 0,
     );
diff --git a/test_regress/t/t_cover_lib_legacy.pl b/test_regress/t/t_cover_lib_legacy.pl
index 9d53f269e..d63014f09 100755
--- a/test_regress/t/t_cover_lib_legacy.pl
+++ b/test_regress/t/t_cover_lib_legacy.pl
@@ -15,7 +15,7 @@ top_filename("t/t_cover_lib.v");
 compile(
     v_flags2 => ["--coverage t/t_cover_lib_c.cpp"],
     verilator_flags2 => ["--exe -Wall -Wno-DECLFILENAME"],
-    make_flags => 'CPPFLAGS_ADD=-DTEST_OBJ_DIR="'.$Self->{obj_dir}.'"',
+    make_flags => 'CPPFLAGS_ADD=-DTEST_OBJ_DIR="' . $Self->{obj_dir} . '"',
     make_top_shell => 0,
     make_main => 0,
     );
diff --git a/test_regress/t/t_cover_sva_notflat.pl b/test_regress/t/t_cover_sva_notflat.pl
index 5effab8e8..e4839f963 100755
--- a/test_regress/t/t_cover_sva_notflat.pl
+++ b/test_regress/t/t_cover_sva_notflat.pl
@@ -24,7 +24,7 @@ execute(
 # Check that the hierarchy doesn't include __PVT__
 # Otherwise our coverage reports would look really ugly
 if ($Self->{vlt_all}) {
-    file_grep($Self->{coverage_filename}, qr/(top\.t\.sub.*.cyc_eq_5)/)
+    file_grep($Self->{coverage_filename}, qr/(top\.t\.sub.*.cyc_eq_5)/);
 }
 
 ok(1);
diff --git a/test_regress/t/t_dedupe_clk_gate.pl b/test_regress/t/t_dedupe_clk_gate.pl
index 631bd3480..c4a09eb65 100755
--- a/test_regress/t/t_dedupe_clk_gate.pl
+++ b/test_regress/t/t_dedupe_clk_gate.pl
@@ -21,6 +21,5 @@ if ($Self->{vlt_all}) {
     file_grep($Self->{stats}, qr/Optimizations, Gate sigs deduped\s+(\d+)/i, 4);
 }
 
-
 ok(1);
 1;
diff --git a/test_regress/t/t_difftree.a.tree b/test_regress/t/t_difftree.a.tree
new file mode 100644
index 000000000..5d6c8ea90
--- /dev/null
+++ b/test_regress/t/t_difftree.a.tree
@@ -0,0 +1,11 @@
+Verilator Tree Dump (format 0x3900) from <e0> to <e663>
+     NETLIST 0x555556bb6000 <e1#> {a0aa}  $root [1ps/1ps]
+    1: MODULE 0x555556bc0120 <e661#> {d19ai}  t  L2 [1ps]
+    1:2: PORT 0x555556bc60d0 <e8#> {d21ae}  clk
+    1:2: VAR 0x555556bbe180 <e572#> {d23ak} @dt=0@  clk INPUT PORT
+    1:2:1: BASICDTYPE 0x555556bc61a0 <e12#> {d23ak} @dt=this@(nw0)  LOGIC_IMPLICIT kwd=LOGIC_IMPLICIT
+    3: TYPETABLE 0x555556bbc000 <e2#> {a0aa}
+                   logic  -> BASICDTYPE 0x555556c71a00 <e426#> {d55ap} @dt=this@(G/nw1)  logic [GENERIC] kwd=logic
+    3: CONSTPOOL 0x555556bbe000 <e6#> {a0aa}
+    3:1: MODULE 0x555556bc0000 <e4#> {a0aa}  @CONST-POOL@  L0 [NONE]
+    3:1:2: SCOPE 0x555556bb60f0 <e5#> {a0aa}  @CONST-POOL@ [abovep=0] [cellp=0] [modp=0x555556bc0000]
diff --git a/test_regress/t/t_difftree.b.tree b/test_regress/t/t_difftree.b.tree
new file mode 100644
index 000000000..8fa15a731
--- /dev/null
+++ b/test_regress/t/t_difftree.b.tree
@@ -0,0 +1,11 @@
+Verilator Tree Dump (format 0x3900) from <e0> to <e663>
+     NETLIST 0x55d6994da000 <e1#> {a0aa}  $root [1ps/1ps]
+    1: MODULE 0x55d6994e4120 <e661#> {d19ai}  t  L2 [1ps]
+    1:2: PORT 0x55d6994ea0d0 <e8#> {d21ae}  clk
+    1:2: VAR 0x55d6994e2180 <e572#> {d23ak} @dt=0@  clkmod INPUT PORT
+    1:2:1: BASICDTYPE 0x55d6994ea1a0 <e12#> {d23ak} @dt=this@(nw0)  LOGIC_IMPLICIT kwd=LOGIC_IMPLICIT
+    3: TYPETABLE 0x55d6994e0000 <e2#> {a0aa}
+                   logic  -> BASICDTYPE 0x55d699595a00 <e426#> {d55ap} @dt=this@(G/nw1)  logic [GENERIC] kwd=logic
+    3: CONSTPOOL 0x55d6994e2000 <e6#> {a0aa}
+    3:1: MODULE 0x55d6994e4000 <e4#> {a0aa}  @CONST-POOL@  L0 [NONE]
+    3:1:2: SCOPE 0x55d6994da0f0 <e5#> {a0aa}  @CONST-POOL@ [abovep=0] [cellp=0] [modp=0x55d6994e4000]
diff --git a/test_regress/t/t_difftree.out b/test_regress/t/t_difftree.out
new file mode 100644
index 000000000..505086b2f
--- /dev/null
+++ b/test_regress/t/t_difftree.out
@@ -0,0 +1,8 @@
+@@ -2,7 +2,7 @@
+      NETLIST 0x <e> {a0aa}  $root [1ps/1ps]
+     1: MODULE 0x <e> {d19ai}  t  L2 [1ps]
+     1:2: PORT 0x <e> {d21ae}  clk
++    1:2: VAR 0x <e> {d23ak} @dt=0@  clkmod INPUT PORT
+     1:2:1: BASICDTYPE 0x <e> {d23ak} @dt=this@(nw0)  LOGIC_IMPLICIT kwd=LOGIC_IMPLICIT
+     3: TYPETABLE 0x <e> {a0aa}
+                    logic  -> BASICDTYPE 0x <e> {d55ap} @dt=this@(G/nw1)  logic [GENERIC] kwd=logic
diff --git a/test_regress/t/t_difftree.pl b/test_regress/t/t_difftree.pl
new file mode 100755
index 000000000..26d88577e
--- /dev/null
+++ b/test_regress/t/t_difftree.pl
@@ -0,0 +1,21 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(dist => 1);
+
+run(cmd => ["cd $Self->{obj_dir} && $ENV{VERILATOR_ROOT}/bin/verilator_difftree"
+            . " $Self->{t_dir}/t_difftree.a.tree $Self->{t_dir}/t_difftree.b.tree > diff.log"],
+    check_finished => 0);
+
+files_identical("$Self->{obj_dir}/diff.log", $Self->{golden_filename}, 'logfile');
+
+ok(1);
+
+1;
diff --git a/test_regress/t/t_display.out b/test_regress/t/t_display.out
index 9b2b995ce..0475a1952 100644
--- a/test_regress/t/t_display.out
+++ b/test_regress/t/t_display.out
@@ -74,4 +74,12 @@ multiline
 '    beep'
 'beep    '
 log10(2) =                    2
+x
+xxXa
+XXX 1x5X
+         x
+         z
+         X
+         Z
+ZzX
 *-* All Finished *-*
diff --git a/test_regress/t/t_display.v b/test_regress/t/t_display.v
index 073268617..3f191f9b1 100644
--- a/test_regress/t/t_display.v
+++ b/test_regress/t/t_display.v
@@ -179,6 +179,16 @@ multiline", $time);
       $display("log10(2) = %d", $log10(100));
       // verilator lint_on REALCVT
 
+      // unknown and high-impedance values
+      $display("%d", 1'bx);
+      $display("%h", 14'bx01010);
+      $display("%h %o", 12'b001xxx101x01, 12'b001xxx101x01);
+      $display("%d", 32'bx);
+      $display("%d", 32'bz);
+      $display("%d", 32'b11x11z111);
+      $display("%d", 32'b11111z111);
+      $display("%h", 12'b1zz1_zzzz_1x1z);
+
       $write("*-* All Finished *-*\n");
       $finish;
    end
diff --git a/test_regress/t/t_display_merge.pl b/test_regress/t/t_display_merge.pl
index 66642748d..75570aba2 100755
--- a/test_regress/t/t_display_merge.pl
+++ b/test_regress/t/t_display_merge.pl
@@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
-    verilator_flags2=>["--stats"],
+    verilator_flags2 => ["--stats"],
     );
 
 execute(
diff --git a/test_regress/t/t_dist_cinclude.pl b/test_regress/t/t_dist_cinclude.pl
index 873380dce..90b0723b7 100755
--- a/test_regress/t/t_dist_cinclude.pl
+++ b/test_regress/t/t_dist_cinclude.pl
@@ -51,7 +51,7 @@ if (!-r "$root/.git") {
     }
 
     if (keys %names) {
-        error("Files like stdint.h instead of cstdint: ",join(' ',sort keys %names));
+        error("Files like stdint.h instead of cstdint: ", join(' ', sort keys %names));
     }
 }
 
diff --git a/test_regress/t/t_dist_contributors.pl b/test_regress/t/t_dist_contributors.pl
index 527d3e157..a4b4db5a4 100755
--- a/test_regress/t/t_dist_contributors.pl
+++ b/test_regress/t/t_dist_contributors.pl
@@ -38,9 +38,9 @@ sub check {
         print "Check: $author\n" if $Self->{verbose};
         if (!$Contributors{$author}) {
             error("Certify your contribution by sorted-inserting '$author' into docs/CONTRIBUTORS.\n"
-                  ."   If '$author' is not your real name, please fix 'name=' in ~/.gitconfig\n"
-                  ."   Also check your https://github.com account's Settings->Profile->Name\n"
-                  ."   matches your ~/.gitconfig 'name='.\n");
+                  . "   If '$author' is not your real name, please fix 'name=' in ~/.gitconfig\n"
+                  . "   Also check your https://github.com account's Settings->Profile->Name\n"
+                  . "   matches your ~/.gitconfig 'name='.\n");
         }
     }
 }
diff --git a/test_regress/t/t_dist_error_format.pl b/test_regress/t/t_dist_error_format.pl
index 654a7c70f..30f8cebd4 100755
--- a/test_regress/t/t_dist_error_format.pl
+++ b/test_regress/t/t_dist_error_format.pl
@@ -58,7 +58,7 @@ sub formats {
                     }
                     else {
                         #print "FF $file $line\n";
-                        $warns{$file.":".$lineno} =
+                        $warns{$file . ":" . $lineno} =
                             "Non-standard warning/error: $file:$lineno: $line";
                     }
                 }
@@ -68,7 +68,7 @@ sub formats {
     $lnmatch or error("Check line number regexp is correct, no matches");
     if (keys %warns) {
         # First warning lists everything as that's shown in the driver summary
-        error($summary." ",join(' ',sort keys %warns));
+        error($summary . " ", join(' ', sort keys %warns));
         foreach my $file (sort keys %warns) {
             error($warns{$file});
         }
diff --git a/test_regress/t/t_dist_fixme.pl b/test_regress/t/t_dist_fixme.pl
index cbceb52c9..f722de3fe 100755
--- a/test_regress/t/t_dist_fixme.pl
+++ b/test_regress/t/t_dist_fixme.pl
@@ -21,16 +21,29 @@ if (!-r "$root/.git") {
     ### Must trim output before and after our file list
     my $files = `cd $root && git ls-files --exclude-standard`;
     print "ST $files\n" if $Debug;
+    my %names;
+
     $files =~ s/\s+/ /g;
-    my $cmd = "cd $root && grep -n -P '(FIX"."ME|BO"."ZO)' $files | sort";
-    my $grep = `$cmd`;
-    print "$grep\n";
-    if ($grep ne "") {
-        my %names;
-        foreach my $line (split /\n/, $grep) {
-            $names{$1} = 1 if $line =~ /^([^:]+)/;
+    my @batch;
+    my $n = 0;
+    foreach my $file (split /\s+/, $files) {
+        $batch[$n] .= $file . " ";
+        ++$n if (length($batch[$n]) > 10000);
+    }
+
+    foreach my $bfiles (@batch) {
+        my $cmd = "cd $root && grep -n -P '(FIX" . "ME|BO" . "ZO)' $bfiles | sort";
+        my $grep = `$cmd`;
+        if ($grep ne "") {
+            print "$grep\n";
+            foreach my $line (split /\n/, $grep) {
+                print "L $line\n";
+                $names{$1} = 1 if $line =~ /^([^:]+)/;
+            }
         }
-        error("Files with FIX"."MEs: ",join(' ',sort keys %names));
+    }
+    if (scalar(%names) >= 1) {
+        error("Files with FIX" . "MEs: ", join(' ', sort keys %names));
     }
 }
 
diff --git a/test_regress/t/t_dist_inctree.pl b/test_regress/t/t_dist_inctree.pl
index 8b579db84..acbbaa079 100755
--- a/test_regress/t/t_dist_inctree.pl
+++ b/test_regress/t/t_dist_inctree.pl
@@ -34,7 +34,7 @@ sub inctree {
     my $grep = `$cmd`;
     foreach my $line (split /\n/, $grep) {
         if ($line =~ /^(\S+):(\d+):#\s*include\s*(\S+)/) {
-            my $filename = $1; my $line = $2+0; my $inc = $3;
+            my $filename = $1; my $line = $2 + 0; my $inc = $3;
             (my $base = $filename) =~ s!.*/(.*?)!$1!;
             $inc =~ s/[<>"]//g;
             $Files{$base}{filename} = $filename;
diff --git a/test_regress/t/t_dist_install.pl b/test_regress/t/t_dist_install.pl
index 67c6d5b2a..1c92e19ac 100755
--- a/test_regress/t/t_dist_install.pl
+++ b/test_regress/t/t_dist_install.pl
@@ -19,7 +19,7 @@ if (!-r "$root/.git") {
     skip("Not in a git repository");
 } else {
     my $cwd = getcwd();
-    my $destdir = "$cwd/".$Self->{obj_dir};
+    my $destdir = "$cwd/" . $Self->{obj_dir};
     # Start clean
     run(cmd => ["rm -rf $destdir && mkdir -p $destdir"],
         check_finished => 0);
@@ -48,7 +48,7 @@ if (!-r "$root/.git") {
         push @files, $file;
     }
     if ($#files >= 0) {
-        error("Uninstall missed files: ",join(' ',@files));
+        error("Uninstall missed files: ", join(' ', @files));
     }
 }
 
diff --git a/test_regress/t/t_dist_portability.pl b/test_regress/t/t_dist_portability.pl
index 99fa2f164..b436e5cb5 100755
--- a/test_regress/t/t_dist_portability.pl
+++ b/test_regress/t/t_dist_portability.pl
@@ -48,7 +48,7 @@ sub uint {
         }
     }
     if (keys %names) {
-        error("Files with uint32*_t instead of vluint32s: ",join(' ',sort keys %names));
+        error("Files with uint32*_t instead of vluint32s: ", join(' ', sort keys %names));
     }
 }
 
@@ -69,7 +69,7 @@ sub printfll {
         }
     }
     if (keys %names) {
-        error("Files with %ll instead of VL_PRI64: ",join(' ',sort keys %names));
+        error("Files with %ll instead of VL_PRI64: ", join(' ', sort keys %names));
     }
 }
 
@@ -87,7 +87,7 @@ sub cstr {
         }
     }
     if (keys %names) {
-        error("Files with potential c_str() lifetime issue: ",join(' ',sort keys %names));
+        error("Files with potential c_str() lifetime issue: ", join(' ', sort keys %names));
     }
 }
 
@@ -106,7 +106,7 @@ sub vsnprintf {
         }
     }
     if (keys %names) {
-        error("Files with vsnprintf, use VL_VSNPRINTF: ",join(' ',sort keys %names));
+        error("Files with vsnprintf, use VL_VSNPRINTF: ", join(' ', sort keys %names));
     }
 }
 
@@ -127,7 +127,7 @@ sub final {
         }
     }
     if (keys %names) {
-        error("Files with classes without final/VL_NOT_FINAL: ",join(' ',sort keys %names));
+        error("Files with classes without final/VL_NOT_FINAL: ", join(' ', sort keys %names));
     }
 }
 
diff --git a/test_regress/t/t_dist_tabs.pl b/test_regress/t/t_dist_tabs.pl
index cd830d4fc..f1bb39258 100755
--- a/test_regress/t/t_dist_tabs.pl
+++ b/test_regress/t/t_dist_tabs.pl
@@ -43,10 +43,10 @@ if (!-r "$root/.git") {
                 $btab = 0;
                 print " File $file\n" if $Self->{verbose};
             }
-            elsif ($line  =~ m!^@@ -?[0-9]+,?[0-9]* \+?([0-9]+)!) {
+            elsif ($line =~ m!^@@ -?[0-9]+,?[0-9]* \+?([0-9]+)!) {
                 $lineno = $1 - 1;
             }
-            elsif ($line  =~ m!^ !) {
+            elsif ($line =~ m!^ !) {
                 ++$lineno;
                 if ($line =~ m!^[- ].*\t!) {
                     print "  Had tabs\n" if $Self->{verbose} && !$atab;
@@ -72,7 +72,7 @@ if (!-r "$root/.git") {
                 if ($len >= 100
                     && $file !~ $Tabs_Exempt_Re
                     && $file !~ $Wide_Exempt_Re) {
-                    print"  Wide $line\n" if $Self->{verbose};
+                    print "  Wide $line\n" if $Self->{verbose};
                     $summary = "File modification adds a new >100 column line:" if !$summary;
                     $warns{$file} = "File modification adds a new >100 column line: $file:$lineno";
                 }
@@ -81,7 +81,7 @@ if (!-r "$root/.git") {
     }
     if (keys %warns) {
         # First warning lists everything as that's shown in the driver summary
-        error($summary." ",join(' ',sort keys %warns));
+        error($summary . " ", join(' ', sort keys %warns));
         foreach my $file (sort keys %warns) {
             error($warns{$file});
         }
diff --git a/test_regress/t/t_dist_untracked.pl b/test_regress/t/t_dist_untracked.pl
index 9f25d1622..254e6aa3d 100755
--- a/test_regress/t/t_dist_untracked.pl
+++ b/test_regress/t/t_dist_untracked.pl
@@ -27,16 +27,16 @@ if (!-r "$root/.git") {
             next if $file =~ /nodist/;
             if (_has_tabs("$root/$file")) {
                 $warns{$file} = "File not in git or .gitignore (with tabs): $file";
-                $summary = "Files untracked in git or .gitignore (with tabs):"
+                $summary = "Files untracked in git or .gitignore (with tabs):";
             } else {
                 $warns{$file} = "File not in git or .gitignore: $file";
-                $summary ||= "Files untracked in git or .gitignore:"
+                $summary ||= "Files untracked in git or .gitignore:";
             }
         }
     }
     if (keys %warns) {
         # First warning lists everything as that's shown in the driver summary
-        error($summary." ",join(' ',sort keys %warns));
+        error($summary . " ", join(' ', sort keys %warns));
         foreach my $file (sort keys %warns) {
             error($warns{$file});
         }
diff --git a/test_regress/t/t_dist_whitespace.pl b/test_regress/t/t_dist_whitespace.pl
index bf7f6ca36..451d46e98 100755
--- a/test_regress/t/t_dist_whitespace.pl
+++ b/test_regress/t/t_dist_whitespace.pl
@@ -66,10 +66,10 @@ foreach my $file (sort keys %files) {
 if (keys %warns) {
     # First warning lists everything as that's shown in the driver summary
     if ($ENV{HARNESS_UPDATE_GOLDEN}) {
-        error("Updated files with whitespace errors: ",join(' ',sort keys %warns));
+        error("Updated files with whitespace errors: ", join(' ', sort keys %warns));
         error("To auto-fix: HARNESS_UPDATE_GOLDEN=1 {command} or --golden");
     } else {
-        error("Files have whitespace errors: ",join(' ',sort keys %warns));
+        error("Files have whitespace errors: ", join(' ', sort keys %warns));
         error("To auto-fix: HARNESS_UPDATE_GOLDEN=1 {command} or --golden");
     }
     foreach my $file (sort keys %warns) {
diff --git a/test_regress/t/t_dos.pl b/test_regress/t/t_dos.pl
index b46d46042..f2ebe7037 100755
--- a/test_regress/t/t_dos.pl
+++ b/test_regress/t/t_dos.pl
@@ -11,6 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
+    verilator_flags2 => ['-Wall -Wno-DECLFILENAME'],  # To check EOFNEWLINE with DOS CRs
     );
 
 execute(
diff --git a/test_regress/t/t_dpi_accessors.pl b/test_regress/t/t_dpi_accessors.pl
index c48a8a2ba..a21fb1c27 100755
--- a/test_regress/t/t_dpi_accessors.pl
+++ b/test_regress/t/t_dpi_accessors.pl
@@ -14,8 +14,8 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
-    make_top_shell   => 0,
-    make_main        => 0,
+    make_top_shell => 0,
+    make_main => 0,
     verilator_flags2 => ["-Wno-BLKANDNBLK -language 1800-2005 --exe $Self->{t_dir}/$Self->{name}.cpp"], );
 
 execute(
diff --git a/test_regress/t/t_dpi_export_scope_bad.cpp b/test_regress/t/t_dpi_export_scope_bad.cpp
new file mode 100644
index 000000000..0a19d5add
--- /dev/null
+++ b/test_regress/t/t_dpi_export_scope_bad.cpp
@@ -0,0 +1,44 @@
+// -*- mode: C++; c-file-style: "cc-mode" -*-
+//
+// DESCRIPTION: Verilator: Verilog Test module
+//
+// This file ONLY is placed under the Creative Commons Public Domain, for
+// any use, without warranty, 2010 by Wilson Snyder.
+// SPDX-License-Identifier: CC0-1.0
+
+#include <verilated.h>
+#include VM_PREFIX_INCLUDE
+
+//======================================================================
+
+#include "Vt_dpi_export_scope_bad__Dpi.h"
+
+#ifdef NEED_EXTERNS
+extern "C" {
+extern void dpix_task();
+}
+#endif
+
+//======================================================================
+
+unsigned int main_time = 0;
+
+double sc_time_stamp() { return main_time; }
+
+VM_PREFIX* topp = nullptr;
+
+int main(int argc, char* argv[]) {
+    topp = new VM_PREFIX;
+
+    Verilated::debug(0);
+
+    topp->eval();
+
+    topp->final();
+    VL_DO_DANGLING(delete topp, topp);
+    return 1;
+}
+
+void dpix_run_tests() {
+    dpix_task();  // Wrong scope
+}
diff --git a/test_regress/t/t_dpi_export_scope_bad.out b/test_regress/t/t_dpi_export_scope_bad.out
new file mode 100644
index 000000000..490d26c84
--- /dev/null
+++ b/test_regress/t/t_dpi_export_scope_bad.out
@@ -0,0 +1,2 @@
+%Error: unknown:0: Testbench C called 'dpix_task' but this DPI export function exists only in other scopes, not scope 'TOP.t'
+Aborting...
diff --git a/test_regress/t/t_dpi_export_scope_bad.pl b/test_regress/t/t_dpi_export_scope_bad.pl
new file mode 100755
index 000000000..5b97a7901
--- /dev/null
+++ b/test_regress/t/t_dpi_export_scope_bad.pl
@@ -0,0 +1,24 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(simulator => 1);
+
+compile(
+    v_flags2 => ["--exe $Self->{t_dir}/$Self->{name}.cpp"],
+    make_main => 0,
+    );
+
+execute(
+    fails => 1,
+    expect_filename => $Self->{golden_filename},
+    );
+
+ok(1);
+1;
diff --git a/test_regress/t/t_dpi_export_scope_bad.v b/test_regress/t/t_dpi_export_scope_bad.v
new file mode 100644
index 000000000..ad45d37e6
--- /dev/null
+++ b/test_regress/t/t_dpi_export_scope_bad.v
@@ -0,0 +1,21 @@
+// DESCRIPTION: Verilator: Verilog Test module
+//
+// Copyright 2020 by Wilson Snyder. This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+module t;
+   s s();
+
+   import "DPI-C" context function void dpix_run_tests();
+   initial dpix_run_tests();
+endmodule
+
+module s;
+   export "DPI-C" task dpix_task;
+   task dpix_task();
+      $write("Hello in %m\n");
+   endtask
+endmodule   
diff --git a/test_regress/t/t_dpi_import_hdr_only.pl b/test_regress/t/t_dpi_import_hdr_only.pl
index 92c6227ed..b497878f2 100755
--- a/test_regress/t/t_dpi_import_hdr_only.pl
+++ b/test_regress/t/t_dpi_import_hdr_only.pl
@@ -26,10 +26,10 @@ compile(
 my @files = glob($tmp_dir . "/*");
 
 error("Did not produce DPI header") if scalar(@files) == 0;
-error("Too many files created:".join(', ', @files)) if scalar(@files) > 1;
+error("Too many files created:" . join(', ', @files)) if scalar(@files) > 1;
 
 my $tmp_header = $files[0];
-print("============".$tmp_header."\n");
+print("============" . $tmp_header . "\n");
 error("Unexpected file $tmp_header") unless $tmp_header =~ /__Dpi\.h$/;
 
 compile(
diff --git a/test_regress/t/t_dpi_type_bad.out b/test_regress/t/t_dpi_type_bad.out
new file mode 100644
index 000000000..7fe755402
--- /dev/null
+++ b/test_regress/t/t_dpi_type_bad.out
@@ -0,0 +1,4 @@
+%Error: t/t_dpi_type_bad.v:11:4: Unsupported DPI type 'DPI-BAD': Use 'DPI-C'
+   11 |    import "DPI-BAD" task dpix_twice;
+      |    ^~~~~~
+%Error: Exiting due to
diff --git a/test_regress/t/t_dpi_type_bad.pl b/test_regress/t/t_dpi_type_bad.pl
new file mode 100755
index 000000000..59ba0d6c6
--- /dev/null
+++ b/test_regress/t/t_dpi_type_bad.pl
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(linter => 1);
+
+lint(
+    fails => $Self->{vlt_all},
+    expect_filename => $Self->{golden_filename},
+    );
+
+ok(1);
+1;
diff --git a/test_regress/t/t_dpi_type_bad.v b/test_regress/t/t_dpi_type_bad.v
new file mode 100644
index 000000000..a5113f66e
--- /dev/null
+++ b/test_regress/t/t_dpi_type_bad.v
@@ -0,0 +1,15 @@
+// DESCRIPTION: Verilator: Verilog Test module
+//
+// Copyright 2021 by Wilson Snyder. This program is free software; you can
+// redistribute it and/or modify it under the terms of either the GNU
+// Lesser General Public License Version 3 or the Perl Artistic License
+// Version 2.0.
+// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+module t;
+
+   import "DPI-BAD" task dpix_twice;
+   initial begin
+      $stop;
+   end
+endmodule
diff --git a/test_regress/t/t_dpi_vams.pl b/test_regress/t/t_dpi_vams.pl
index 68cbf7858..68f1f0e47 100755
--- a/test_regress/t/t_dpi_vams.pl
+++ b/test_regress/t/t_dpi_vams.pl
@@ -13,7 +13,7 @@ scenarios(simulator => 1);
 compile(
     make_top_shell => 0,
     make_main => 0,
-    verilator_flags2 => ["--exe","$Self->{t_dir}/$Self->{name}.cpp"],
+    verilator_flags2 => ["--exe", "$Self->{t_dir}/$Self->{name}.cpp"],
     );
 
 execute(
diff --git a/test_regress/t/t_driver_random.pl b/test_regress/t/t_driver_random.pl
index 29eeb00a9..6627cfe33 100755
--- a/test_regress/t/t_driver_random.pl
+++ b/test_regress/t/t_driver_random.pl
@@ -19,7 +19,7 @@ if (!$ENV{VERILATOR_TEST_RANDOM_FAILURE}) {
     # Randomly fail to test driver.pl
     my ($ign, $t) = Time::HiRes::gettimeofday();
     if ($t % 2) {
-        error("random failure ".$t);
+        error("random failure " . $t);
     }
     else {
         ok(1);
diff --git a/test_regress/t/t_embed1.pl b/test_regress/t/t_embed1.pl
index 1b298a9be..a28adebe6 100755
--- a/test_regress/t/t_embed1.pl
+++ b/test_regress/t/t_embed1.pl
@@ -29,9 +29,9 @@ mkdir $child_dir;
 
     run(logfile => "${child_dir}/vlt_gcc.log",
         cmd => ["cd ${child_dir} && ",
-                $ENV{MAKE}, "-f".getcwd()."/Makefile_obj",
-                "CPPFLAGS_DRIVER=-D".uc($self->{name}),
-                ($opt_verbose ? "CPPFLAGS_DRIVER2=-DTEST_VERBOSE=1":""),
+                $ENV{MAKE}, "-f" . getcwd() . "/Makefile_obj",
+                "CPPFLAGS_DRIVER=-D" . uc($self->{name}),
+                ($opt_verbose ? "CPPFLAGS_DRIVER2=-DTEST_VERBOSE=1" : ""),
                 "VM_PREFIX=$self->{VM_PREFIX}_child",
                 "V$self->{name}_child__ALL.a",  # bypass default rule, make archive
                 ($param{make_flags}||""),
diff --git a/test_regress/t/t_emit_memb_limit.pl b/test_regress/t/t_emit_memb_limit.pl
index 7920f341a..21d001dd2 100755
--- a/test_regress/t/t_emit_memb_limit.pl
+++ b/test_regress/t/t_emit_memb_limit.pl
@@ -22,17 +22,17 @@ sub gen {
     $fh->print("  input clk;\n");
     $fh->print("  input i;\n");
     $fh->print("  output logic o;\n");
-    for (my $i=0; $i<($n+1); ++$i) {
+    for (my $i = 0; $i < ($n + 1); ++$i) {
         $fh->print("  logic r$i;\n");
     }
     $fh->print("  always @ (posedge clk) begin\n");
     $fh->print("    r0 <= i;\n");
-    for (my $i=1; $i<$n; ++$i) {
-        $fh->print("    r".($i+1)." <= r$i;\n");
+    for (my $i = 1; $i < $n; ++$i) {
+        $fh->print("    r" . ($i+1) . " <= r$i;\n");
     }
     $fh->print("    o <= r$n;\n");
-    $fh->print('    $write("*-* All Finished *-*\n");',"\n");
-    $fh->print('    $finish;',"\n");
+    $fh->print('    $write("*-* All Finished *-*\n");', "\n");
+    $fh->print('    $finish;', "\n");
     $fh->print("  end\n");
     $fh->print("endmodule\n");
 }
diff --git a/test_regress/t/t_enum.v b/test_regress/t/t_enum.v
index 1a5c87b9b..569c2e4b3 100644
--- a/test_regress/t/t_enum.v
+++ b/test_regress/t/t_enum.v
@@ -31,6 +31,7 @@ module t (/*AUTOARG*/);
 	 z5 = e5
 	 } ZN;
 
+   typedef enum three_t;  // Forward
    typedef enum [2:0] { ONES=~0 } three_t;
    three_t three = ONES;
 
diff --git a/test_regress/t/t_flag_build.pl b/test_regress/t/t_flag_build.pl
index 7076e8443..c26b6bcb0 100755
--- a/test_regress/t/t_flag_build.pl
+++ b/test_regress/t/t_flag_build.pl
@@ -8,7 +8,6 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 # Version 2.0.
 # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
 
-
 scenarios(simulator => 1);
 top_filename("t/t_flag_make_cmake.v");
 
diff --git a/test_regress/t/t_flag_csplit.pl b/test_regress/t/t_flag_csplit.pl
index 0f6336f60..bef212f13 100755
--- a/test_regress/t/t_flag_csplit.pl
+++ b/test_regress/t/t_flag_csplit.pl
@@ -29,13 +29,13 @@ while (1) {
     run(logfile => "$Self->{obj_dir}/vlt_gcc.log",
         tee => $self->{verbose},
         cmd=>[$ENV{MAKE},
-              "-C ".$Self->{obj_dir},
+              "-C " . $Self->{obj_dir},
               "-f $Self->{VM_PREFIX}.mk",
               "-j 4",
               "VM_PREFIX=$Self->{VM_PREFIX}",
               "TEST_OBJ_DIR=$Self->{obj_dir}",
               "CPPFLAGS_DRIVER=-D".uc($Self->{name}),
-              ($opt_verbose ? "CPPFLAGS_DRIVER2=-DTEST_VERBOSE=1":""),
+              ($opt_verbose ? "CPPFLAGS_DRIVER2=-DTEST_VERBOSE=1" : ""),
               "OPT_FAST=-O2",
               "OPT_SLOW=-O0",
               "OPT_GLOBAL=-Os",
@@ -86,7 +86,7 @@ sub check_cpp {
     printf "  File %6d  %s\n", $size, $filename if $Self->{verbose};
     my $fh = IO::File->new("<$filename") or error("$! $filenme");
     my @funcs;
-    while (defined (my $line = $fh->getline)) {
+    while (defined(my $line = $fh->getline)) {
         if ($line =~ /^(void|IData)\s+(.*::.*)/) {
             my $func = $2;
             $func =~ s/\(.*$//;
@@ -103,19 +103,19 @@ sub check_cpp {
         }
     }
     if ($#funcs > 0) {
-        error("Split had multiple functions in $filename\n\t".join("\n\t",@funcs));
+        error("Split had multiple functions in $filename\n\t" . join("\n\t", @funcs));
     }
 }
 
 sub check_gcc_flags {
     my $filename = shift;
     my $fh = IO::File->new("<$filename") or error("$! $filenme");
-    while (defined (my $line = $fh->getline)) {
+    while (defined(my $line = $fh->getline)) {
         chomp $line;
         print ":log: $line\n" if $Self->{verbose};
         if ($line =~ /$Self->{VM_PREFIX}\S*\.cpp/) {
-            my $filetype = ($line =~ /Slow|Syms/) ? "slow":"fast";
-            my $opt = ($line !~ /-O2/) ? "slow":"fast";
+            my $filetype = ($line =~ /Slow|Syms/) ? "slow" : "fast";
+            my $opt = ($line !~ /-O2/) ? "slow" : "fast";
             print "$filetype, $opt, $line\n" if $Self->{verbose};
             if ($filetype ne $opt) {
                 error("${filetype} file compiled as if was ${opt}: $line");
diff --git a/test_regress/t/t_flag_csplit_eval.pl b/test_regress/t/t_flag_csplit_eval.pl
index c7d8da543..dfd67259c 100755
--- a/test_regress/t/t_flag_csplit_eval.pl
+++ b/test_regress/t/t_flag_csplit_eval.pl
@@ -22,7 +22,6 @@ sub check_evals {
     $got >= 3 or error("Too few _eval functions found: $got");
 }
 
-
 scenarios(vlt_all => 1);
 
 compile(
diff --git a/test_regress/t/t_flag_csplit_off.pl b/test_regress/t/t_flag_csplit_off.pl
index 8e3014653..8308d2a93 100755
--- a/test_regress/t/t_flag_csplit_off.pl
+++ b/test_regress/t/t_flag_csplit_off.pl
@@ -31,13 +31,13 @@ while (1) {
     run(logfile => "$Self->{obj_dir}/vlt_gcc.log",
         tee => $self->{verbose},
         cmd=>[$ENV{MAKE},
-              "-C ".$Self->{obj_dir},
+              "-C " . $Self->{obj_dir},
               "-f $Self->{VM_PREFIX}.mk",
               "-j 4",
               "VM_PREFIX=$Self->{VM_PREFIX}",
               "TEST_OBJ_DIR=$Self->{obj_dir}",
               "CPPFLAGS_DRIVER=-D".uc($Self->{name}),
-              ($opt_verbose ? "CPPFLAGS_DRIVER2=-DTEST_VERBOSE=1":""),
+              ($opt_verbose ? "CPPFLAGS_DRIVER2=-DTEST_VERBOSE=1" : ""),
               "OPT_FAST=-O2",
               "OPT_SLOW=-O0",
               ($param{make_flags}||""),
@@ -79,7 +79,7 @@ sub check_all_file {
 sub check_gcc_flags {
     my $filename = shift;
     my $fh = IO::File->new("<$filename") or error("$! $filenme");
-    while (defined (my $line = $fh->getline)) {
+    while (defined(my $line = $fh->getline)) {
         chomp $line;
         print ":log: $line\n" if $Self->{verbose};
         if ($line =~ /\.cpp/ && $line =~ qr/-O0/) {
diff --git a/test_regress/t/t_flag_ldflags.pl b/test_regress/t/t_flag_ldflags.pl
index 1701a0b90..d72594ba4 100755
--- a/test_regress/t/t_flag_ldflags.pl
+++ b/test_regress/t/t_flag_ldflags.pl
@@ -13,13 +13,13 @@ scenarios(vlt => 1);
 my $m32 = $Self->cfg_with_m32 ? "-m32" : "";
 
 run(cmd => ["cd $Self->{obj_dir}"
-            ." && $ENV{CXX} $m32 -c ../../t/t_flag_ldflags_a.cpp"
-            ." && ar -cr t_flag_ldflags_a.a t_flag_ldflags_a.o"
-            ." && ranlib t_flag_ldflags_a.a "],
+            . " && $ENV{CXX} $m32 -c ../../t/t_flag_ldflags_a.cpp"
+            . " && ar -cr t_flag_ldflags_a.a t_flag_ldflags_a.o"
+            . " && ranlib t_flag_ldflags_a.a "],
     check_finished => 0);
 run(cmd => ["cd $Self->{obj_dir}"
-            ." && $ENV{CXX} $m32 -fPIC -c ../../t/t_flag_ldflags_so.cpp"
-            ." && $ENV{CXX} $m32 -shared -o t_flag_ldflags_so.so -lc t_flag_ldflags_so.o"],
+            . " && $ENV{CXX} $m32 -fPIC -c ../../t/t_flag_ldflags_so.cpp"
+            . " && $ENV{CXX} $m32 -shared -o t_flag_ldflags_so.so -lc t_flag_ldflags_so.o"],
     check_finished => 0);
 
 compile(
@@ -34,12 +34,12 @@ compile(
 # On OS X, LD_LIBRARY_PATH is ignored, so set rpath of the exe to find the .so
 if ($^O eq "darwin") {
   run(cmd => ["cd $Self->{obj_dir}"
-              ." && install_name_tool -add_rpath \@executable_path/."
-              ." $Self->{VM_PREFIX}"],
+              . " && install_name_tool -add_rpath \@executable_path/."
+              . " $Self->{VM_PREFIX}"],
       check_finished => 0);
   run(cmd => ["cd $Self->{obj_dir}"
-              ." && install_name_tool -change t_flag_ldflags_so.so"
-              ." \@rpath/t_flag_ldflags_so.so $Self->{VM_PREFIX}"],
+              . " && install_name_tool -change t_flag_ldflags_so.so"
+              . " \@rpath/t_flag_ldflags_so.so $Self->{VM_PREFIX}"],
       check_finished => 0);
 }
 
diff --git a/test_regress/t/t_flag_make_cmake.pl b/test_regress/t/t_flag_make_cmake.pl
index ba6828b85..868582ff7 100755
--- a/test_regress/t/t_flag_make_cmake.pl
+++ b/test_regress/t/t_flag_make_cmake.pl
@@ -8,7 +8,6 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 # Version 2.0.
 # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
 
-
 scenarios(simulator => 1);
 
 compile(
@@ -19,7 +18,7 @@ compile(
 if (!$Self->have_cmake) {
     skip("cmake is not installed");
 } else {
-    my $cmakecache = $Self->{obj_dir}."/CMakeCache.txt";
+    my $cmakecache = $Self->{obj_dir} . "/CMakeCache.txt";
     if (! -e $cmakecache) {
         error("$cmakecache does not exist.")
     }
diff --git a/test_regress/t/t_flag_make_gmake.pl b/test_regress/t/t_flag_make_gmake.pl
index e5caca9d8..b25888d6e 100755
--- a/test_regress/t/t_flag_make_gmake.pl
+++ b/test_regress/t/t_flag_make_gmake.pl
@@ -8,7 +8,6 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 # Version 2.0.
 # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
 
-
 scenarios(simulator => 1);
 
 top_filename("t/t_flag_make_cmake.v");
diff --git a/test_regress/t/t_flag_skipidentical.pl b/test_regress/t/t_flag_skipidentical.pl
index f3be1255c..731515d06 100755
--- a/test_regress/t/t_flag_skipidentical.pl
+++ b/test_regress/t/t_flag_skipidentical.pl
@@ -15,9 +15,9 @@ scenarios(vlt => 1);
 
     print "NOTE: use --debugi, as --debug in driver turns off skip-identical\n";
 
-    my $outfile = "$Self->{obj_dir}/V".$Self->{name}.".cpp";
+    my $outfile = "$Self->{obj_dir}/V" . $Self->{name} . ".cpp";
     my @oldstats = stat($outfile);
-    print "Old mtime=",$oldstats[9],"\n";
+    print "Old mtime=", $oldstats[9], "\n";
     $oldstats[9] or error("No output file found: $outfile\n");
 
     sleep(2);  # Or else it might take < 1 second to compile and see no diff.
@@ -26,7 +26,7 @@ scenarios(vlt => 1);
     compile();
 
     my @newstats = stat($outfile);
-    print "New mtime=",$newstats[9],"\n";
+    print "New mtime=", $newstats[9], "\n";
 
     ($oldstats[9] == $newstats[9])
         or error("--skip-identical was ignored -- recompiled\n");
diff --git a/test_regress/t/t_flag_verilate.pl b/test_regress/t/t_flag_verilate.pl
index 59b78738a..649eac752 100755
--- a/test_regress/t/t_flag_verilate.pl
+++ b/test_regress/t/t_flag_verilate.pl
@@ -8,7 +8,6 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 # Version 2.0.
 # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
 
-
 scenarios(simulator => 1);
 top_filename("t/t_flag_make_cmake.v");
 
@@ -52,7 +51,7 @@ compile(  # Don't call cmake nor gmake from driver.pl. Just build here
     );
 
 # The previous run must not verilated, only build is expected.
-if ( -e $Self->{obj_dir} . '/Vt_flag_verilate_990_final.tree' ) {
+if (-e $Self->{obj_dir} . '/Vt_flag_verilate_990_final.tree') {
     $Self->error('Unexpectedly verilated.');
 }
 
@@ -60,6 +59,5 @@ execute(
     check_finished => 1,
     );
 
-
 ok(1);
 1;
diff --git a/test_regress/t/t_foreach.pl b/test_regress/t/t_foreach.pl
index 5c5b93dd0..cb80916f1 100755
--- a/test_regress/t/t_foreach.pl
+++ b/test_regress/t/t_foreach.pl
@@ -21,14 +21,14 @@ execute(
 # We expect all loops should be unrolled by verilator,
 # none of the loop variables should exist in the output:
 for my $file (glob_all("$Self->{obj_dir}/$Self->{VM_PREFIX}*.cpp")) {
-  file_grep_not($file, qr/index_/);
+    file_grep_not($file, qr/index_/);
 }
 
 # Further, we expect that all logic within the loop should
 # have been evaluated inside the compiler. So there should be
 # no references to 'sum' in the .cpp.
 for my $file (glob_all("$Self->{obj_dir}/$Self->{VM_PREFIX}*.cpp")) {
-  file_grep_not($file, qr/sum/);
+    file_grep_not($file, qr/sum/);
 }
 
 ok(1);
diff --git a/test_regress/t/t_gantt.pl b/test_regress/t/t_gantt.pl
index 77b8714a4..66b00e554 100755
--- a/test_regress/t/t_gantt.pl
+++ b/test_regress/t/t_gantt.pl
@@ -8,8 +8,6 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 # Version 2.0.
 # SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
 
-use IO::File;
-
 # Test for bin/verilator_gantt,
 #
 # Only needed in multithreaded regression.
@@ -29,6 +27,7 @@ execute(
     all_run_flags => ["+verilator+prof+threads+start+2",
                       " +verilator+prof+threads+window+2",
                       " +verilator+prof+threads+file+$Self->{obj_dir}/profile_threads.dat",
+                      " +verilator+prof+vlt+file+$Self->{obj_dir}/profile.vlt",
                       ],
     check_finished => 1,
     );
@@ -41,35 +40,11 @@ run(cmd => ["$ENV{VERILATOR_ROOT}/bin/verilator_gantt",
             "$Self->{obj_dir}/profile_threads.dat",
             "--vcd $Self->{obj_dir}/profile_threads.vcd",
             "| tee $Self->{obj_dir}/gantt.log"],
-    verilator_run => 1,
     );
 
-# We should have three lines of gantt chart, each with
-# an even number of mtask-bars (eg "[123--]")
-my $gantt_line_ct = 0;
-my $global_mtask_ct = 0;
-{
-    my $fh = IO::File->new("<$Self->{obj_dir}/gantt.log")
-        or error("$! $Self->{obj_dir}/gantt.log");
-    while (my $line = ($fh && $fh->getline)) {
-        if ($line !~ m/^  t:/) { next; }
-        $gantt_line_ct++;
-        my $this_thread_mtask_ct = 0;
-        my @mtasks = split(/\[/, $line);
-        shift @mtasks;  # throw the '>>  ' away
-        foreach my $mtask (@mtasks) {
-            # Format of each mtask is "[123--]" where the hyphens
-            # number or ] may or may not appear; it depends on exact timing.
-            $this_thread_mtask_ct++;
-            $global_mtask_ct++;
-        }
-        if ($this_thread_mtask_ct % 2 != 0) { error("odd number of mtasks found"); }
-    }
-}
-if ($gantt_line_ct != 2) { error("wrong number of gantt lines"); }
-if ($global_mtask_ct == 0) { error("wrong number of mtasks, should be > 0"); }
-print "Found $gantt_line_ct lines of gantt data with $global_mtask_ct mtasks\n"
-    if $Self->{verbose};
+file_grep("$Self->{obj_dir}/gantt.log", qr/Total threads += 2/i);
+file_grep("$Self->{obj_dir}/gantt.log", qr/Total mtasks += 7/i);
+file_grep("$Self->{obj_dir}/gantt.log", qr/Total evals += 2/i);
 
 # Diff to itself, just to check parsing
 vcd_identical("$Self->{obj_dir}/profile_threads.vcd", "$Self->{obj_dir}/profile_threads.vcd");
diff --git a/test_regress/t/t_gantt_io.dat b/test_regress/t/t_gantt_io.dat
new file mode 100644
index 000000000..c20bcd6c9
--- /dev/null
+++ b/test_regress/t/t_gantt_io.dat
@@ -0,0 +1,920 @@
+VLPROFTHREAD 1.1 # Verilator thread profile dump version 1.1
+VLPROF arg --threads 2
+VLPROF arg +verilator+prof+threads+start+2
+VLPROF arg +verilator+prof+threads+window+2
+VLPROF stat yields 0
+VLPROFPROC processor    : 0
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2134.599
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 0
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 0
+VLPROFPROC initial apicid       : 0
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 1
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 1932.526
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 1
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 2
+VLPROFPROC initial apicid       : 2
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 2
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 1862.405
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 2
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 4
+VLPROFPROC initial apicid       : 4
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 3
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 1862.009
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 3
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 6
+VLPROFPROC initial apicid       : 6
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 4
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2195.832
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 4
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 8
+VLPROFPROC initial apicid       : 8
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 5
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2190.061
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 5
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 10
+VLPROFPROC initial apicid       : 10
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 6
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2203.924
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 6
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 12
+VLPROFPROC initial apicid       : 12
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 7
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2193.174
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 7
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 14
+VLPROFPROC initial apicid       : 14
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 8
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2203.449
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 8
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 16
+VLPROFPROC initial apicid       : 16
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 9
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2197.717
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 9
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 18
+VLPROFPROC initial apicid       : 18
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 10
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2195.928
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 10
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 20
+VLPROFPROC initial apicid       : 20
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 11
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 1964.149
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 11
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 22
+VLPROFPROC initial apicid       : 22
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 12
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2194.738
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 12
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 24
+VLPROFPROC initial apicid       : 24
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 13
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2194.821
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 13
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 26
+VLPROFPROC initial apicid       : 26
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 14
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2196.191
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 14
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 28
+VLPROFPROC initial apicid       : 28
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 15
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2198.063
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 15
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 30
+VLPROFPROC initial apicid       : 30
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 16
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2152.652
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 0
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 1
+VLPROFPROC initial apicid       : 1
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 17
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2257.474
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 1
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 3
+VLPROFPROC initial apicid       : 3
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 18
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 1862.896
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 2
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 5
+VLPROFPROC initial apicid       : 5
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 19
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 1863.193
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 3
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 7
+VLPROFPROC initial apicid       : 7
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 20
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2189.303
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 4
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 9
+VLPROFPROC initial apicid       : 9
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 21
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2194.584
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 5
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 11
+VLPROFPROC initial apicid       : 11
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 22
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2195.060
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 6
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 13
+VLPROFPROC initial apicid       : 13
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 23
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2189.319
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 7
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 15
+VLPROFPROC initial apicid       : 15
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 24
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2195.031
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 8
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 17
+VLPROFPROC initial apicid       : 17
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 25
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2555.092
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 9
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 19
+VLPROFPROC initial apicid       : 19
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 26
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2191.830
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 10
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 21
+VLPROFPROC initial apicid       : 21
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 27
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2194.661
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 11
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 23
+VLPROFPROC initial apicid       : 23
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 28
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2194.445
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 12
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 25
+VLPROFPROC initial apicid       : 25
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 29
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2194.786
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 13
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 27
+VLPROFPROC initial apicid       : 27
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 30
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2189.282
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 14
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 29
+VLPROFPROC initial apicid       : 29
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROFPROC processor    : 31
+VLPROFPROC vendor_id    : AuthenticTest
+VLPROFPROC cpu family   : 23
+VLPROFPROC model                : 113
+VLPROFPROC model name   : Test Ryzen 9 3950X 16-Core Processor
+VLPROFPROC stepping     : 0
+VLPROFPROC microcode    : 0x8701013
+VLPROFPROC cpu MHz              : 2195.563
+VLPROFPROC cache size   : 512 KB
+VLPROFPROC physical id  : 0
+VLPROFPROC siblings     : 32
+VLPROFPROC core id              : 15
+VLPROFPROC cpu cores    : 16
+VLPROFPROC apicid               : 31
+VLPROFPROC initial apicid       : 31
+VLPROFPROC fpu          : yes
+VLPROFPROC fpu_exception        : yes
+VLPROFPROC cpuid level  : 16
+VLPROFPROC wp           : yes
+VLPROFPROC flags                : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate sme ssbd mba sev ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif umip rdpid overflow_recov succor smca
+VLPROFPROC bugs         : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
+VLPROFPROC bogomips     : 6987.10
+VLPROFPROC TLB size     : 3072 4K pages
+VLPROFPROC clflush size : 64
+VLPROFPROC cache_alignment      : 64
+VLPROFPROC address sizes        : 43 bits physical, 48 bits virtual
+VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
+VLPROFPROC 
+VLPROF eval start 595 elapsed 11655 cpu 19 on thread 1
+VLPROF eval_loop start 945 elapsed 11235 cpu 19 on thread 1
+VLPROF mtask 6 start 2695 elapsed 210 predict_start 0 predict_cost 30 cpu 19 on thread 1
+VLPROF mtask 10 start 9695 elapsed 175 predict_start 196 predict_cost 30 cpu 19 on thread 1
+VLPROF eval start 13720 elapsed 8610 cpu 19 on thread 1
+VLPROF eval_loop start 14000 elapsed 8085 cpu 19 on thread 1
+VLPROF mtask 6 start 15610 elapsed 210 predict_start 0 predict_cost 30 cpu 19 on thread 1
+VLPROF mtask 10 start 21700 elapsed 175 predict_start 196 predict_cost 30 cpu 19 on thread 1
+VLPROF mtask 5 start 5495 elapsed 595 predict_start 0 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 7 start 6300 elapsed 595 predict_start 30 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 8 start 7490 elapsed 1050 predict_start 60 predict_cost 107 cpu 10 on thread 2
+VLPROF mtask 9 start 9135 elapsed 595 predict_start 167 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 11 start 10255 elapsed 805 predict_start 197 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 5 start 18375 elapsed 595 predict_start 0 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 7 start 19145 elapsed 175 predict_start 30 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 8 start 19670 elapsed 140 predict_start 60 predict_cost 107 cpu 10 on thread 2
+VLPROF mtask 9 start 20650 elapsed 70 predict_start 167 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 11 start 21140 elapsed 105 predict_start 197 predict_cost 30 cpu 10 on thread 2
+VLPROF stat ticks 23415
diff --git a/test_regress/t/t_gantt_io.out b/test_regress/t/t_gantt_io.out
new file mode 100644
index 000000000..f5c1c2b92
--- /dev/null
+++ b/test_regress/t/t_gantt_io.out
@@ -0,0 +1,37 @@
+Verilator Gantt report
+
+Argument settings:
+  +verilator+prof+threads+start+2
+  +verilator+prof+threads+window+2
+  --threads 2
+
+Analysis:
+  Total threads             = 2
+  Total mtasks              = 7
+  Total cpus used           = 2
+  Total yields              = 0
+  Total evals               = 2
+  Total eval loops          = 2
+  Total eval time           = 21875 rdtsc ticks
+  Longest mtask time        = 1190 rdtsc ticks
+  All-thread mtask time     = 5495 rdtsc ticks
+  Longest-thread efficiency = 5.4%
+  All-thread efficiency     = 12.6%
+  All-thread speedup        = 0.3
+
+Prediction (what Verilator used for scheduling):
+  All-thread efficiency     = 63.2%
+  All-thread speedup        = 1.3
+
+Statistics:
+  min log(p2e) = -3.681  from mtask 5 (predict 30, elapsed 1190)
+  max log(p2e) = -2.409  from mtask 8 (predict 107, elapsed 1190)
+  mean = -2.992
+  stddev = 0.459
+  e ^ stddev = 1.583
+
+CPUs:
+  cpu 10: cpu_time=4725 socket=0 core=10  Test Ryzen 9 3950X 16-Core Processor
+  cpu 19: cpu_time=770 socket=0 core=3  Test Ryzen 9 3950X 16-Core Processor
+
+Writing profile_threads.vcd
diff --git a/test_regress/t/t_gantt_io.pl b/test_regress/t/t_gantt_io.pl
new file mode 100755
index 000000000..ba111c47f
--- /dev/null
+++ b/test_regress/t/t_gantt_io.pl
@@ -0,0 +1,22 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(dist => 1);
+
+run(cmd => ["cd $Self->{obj_dir} && $ENV{VERILATOR_ROOT}/bin/verilator_gantt"
+            . " $Self->{t_dir}/$Self->{name}.dat > gantt.log"],
+    check_finished => 0);
+
+files_identical("$Self->{obj_dir}/gantt.log", $Self->{golden_filename});
+
+vcd_identical("$Self->{obj_dir}/profile_threads.vcd", "$Self->{t_dir}/$Self->{name}.vcd.out");
+
+ok(1);
+1;
diff --git a/test_regress/t/t_gantt_io.vcd.out b/test_regress/t/t_gantt_io.vcd.out
new file mode 100644
index 000000000..1818f54b7
--- /dev/null
+++ b/test_regress/t/t_gantt_io.vcd.out
@@ -0,0 +1,258 @@
+$version Generated by verilator_gantt $end
+$timescale 1ns $end
+
+ $scope module gantt $end
+  $scope module Stats $end
+   $var wire 32 vf measured_parallelism [31:0] $end
+   $var wire 32 v10 predicted_parallelism [31:0] $end
+  $upscope $end
+  $scope module cpus $end
+   $var wire 32 v5 cpu10_thread [31:0] $end
+   $var wire 32 v1 cpu19_thread [31:0] $end
+  $upscope $end
+  $scope module evals $end
+   $var wire 32 vb eval [31:0] $end
+   $var wire 32 vc eval_loop [31:0] $end
+  $upscope $end
+  $scope module measured_threads $end
+   $var wire 32 v0 thread1_mtask [31:0] $end
+   $var wire 32 v4 thread2_mtask [31:0] $end
+  $upscope $end
+  $scope module mtasks $end
+   $var wire 32 v3 mtask10_cpu [31:0] $end
+   $var wire 32 va mtask11_cpu [31:0] $end
+   $var wire 32 v6 mtask5_cpu [31:0] $end
+   $var wire 32 v2 mtask6_cpu [31:0] $end
+   $var wire 32 v7 mtask7_cpu [31:0] $end
+   $var wire 32 v8 mtask8_cpu [31:0] $end
+   $var wire 32 v9 mtask9_cpu [31:0] $end
+  $upscope $end
+  $scope module predicted_threads $end
+   $var wire 32 vd predicted_thread1_mtask [31:0] $end
+   $var wire 32 ve predicted_thread2_mtask [31:0] $end
+  $upscope $end
+ $upscope $end
+$enddefinitions $end
+
+#0
+bz v0
+bz v1
+bz v2
+bz v3
+bz v4
+bz v5
+bz v6
+bz v7
+bz v8
+bz v9
+bz va
+bz vb
+bz vc
+bz vd
+bz ve
+b0 vf
+b0 v10
+#595
+b1 vb
+#945
+b1 vc
+b110 vd
+b101 ve
+b10 v10
+#2429
+bz vd
+b111 ve
+b1 v10
+#2695
+b110 v0
+b1 v1
+b10011 v2
+b1 vf
+#2905
+bz v0
+bz v1
+bz v2
+b0 vf
+#3914
+b1000 ve
+b1 v10
+#5495
+b101 v4
+b10 v5
+b1010 v6
+b1 vf
+#6090
+bz v4
+bz v5
+bz v6
+b0 vf
+#6300
+b111 v4
+b10 v5
+b1010 v7
+b1 vf
+#6895
+bz v4
+bz v5
+bz v7
+b0 vf
+#7490
+b1000 v4
+b10 v5
+b1010 v8
+b1 vf
+#8540
+bz v4
+bz v5
+bz v8
+b0 vf
+#9135
+b1001 v4
+b10 v5
+b1010 v9
+b1 vf
+#9210
+b1001 ve
+b1 v10
+#9695
+b1010 v0
+b1 v1
+b10011 v3
+b10 vf
+#9730
+bz v4
+bz v5
+bz v9
+b1 vf
+#9870
+bz v0
+bz v1
+bz v3
+b0 vf
+#10255
+b1011 v4
+b10 v5
+b1010 va
+b1 vf
+#10645
+b1010 vd
+b10 v10
+#10695
+b1011 ve
+b10 v10
+#11060
+bz v4
+bz v5
+bz va
+b0 vf
+#12130
+bz vd
+b1 v10
+#12180
+bz vc
+bz ve
+b0 v10
+#12250
+bz vb
+#13720
+b10 vb
+#14000
+b10 vc
+b110 vd
+b101 ve
+b10 v10
+#15068
+bz vd
+b111 ve
+b1 v10
+#15610
+b110 v0
+b1 v1
+b10011 v2
+b1 vf
+#15820
+bz v0
+bz v1
+bz v2
+b0 vf
+#16137
+b1000 ve
+b1 v10
+#18375
+b101 v4
+b10 v5
+b1010 v6
+b1 vf
+#18970
+bz v4
+bz v5
+bz v6
+b0 vf
+#19145
+b111 v4
+b10 v5
+b1010 v7
+b1 vf
+#19320
+bz v4
+bz v5
+bz v7
+b0 vf
+#19670
+b1000 v4
+b10 v5
+b1010 v8
+b1 vf
+#19810
+bz v4
+bz v5
+bz v8
+b0 vf
+#19947
+b1001 ve
+b1 v10
+#20650
+b1001 v4
+b10 v5
+b1010 v9
+b1 vf
+#20720
+bz v4
+bz v5
+bz v9
+b0 vf
+#20980
+b1010 vd
+b10 v10
+#21016
+b1011 ve
+b10 v10
+#21140
+b1011 v4
+b10 v5
+b1010 va
+b1 vf
+#21245
+bz v4
+bz v5
+bz va
+b0 vf
+#21700
+b1010 v0
+b1 v1
+b10011 v3
+b1 vf
+#21875
+bz v0
+bz v1
+bz v3
+b0 vf
+#22049
+bz vd
+b1 v10
+#22085
+bz vc
+bz ve
+b0 v10
+#22330
+bz vb
diff --git a/test_regress/t/t_gantt_io_arm.dat b/test_regress/t/t_gantt_io_arm.dat
new file mode 100644
index 000000000..33846b417
--- /dev/null
+++ b/test_regress/t/t_gantt_io_arm.dat
@@ -0,0 +1,53 @@
+VLPROFTHREAD 1.1 # Verilator thread profile dump version 1.1
+VLPROF arg --threads 4
+VLPROF arg +verilator+prof+threads+start+1
+VLPROF arg +verilator+prof+threads+window+2
+VLPROF stat yields 51
+VLPROFPROC processor	: 0
+VLPROFPROC model name	: Phytium,FT-2500/128
+VLPROFPROC BogoMIPS	: 100.00
+VLPROFPROC Features	: fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid
+VLPROFPROC CPU implementer	: 0x70
+VLPROFPROC CPU architecture: 8
+VLPROFPROC CPU variant	: 0x1
+VLPROFPROC CPU part	: 0x663
+VLPROFPROC CPU revision	: 3
+VLPROFPROC 
+VLPROFPROC processor	: 1
+VLPROFPROC model name	: Phytium,FT-2500/128
+VLPROFPROC BogoMIPS	: 100.00
+VLPROFPROC Features	: fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid
+VLPROFPROC CPU implementer	: 0x70
+VLPROFPROC CPU architecture: 8
+VLPROFPROC CPU variant	: 0x1
+VLPROFPROC CPU part	: 0x663
+VLPROFPROC CPU revision	: 3
+VLPROFPROC 
+VLPROFPROC processor	: 2
+VLPROFPROC model name	: Phytium,FT-2500/128
+VLPROFPROC BogoMIPS	: 100.00
+VLPROFPROC Features	: fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid
+VLPROFPROC CPU implementer	: 0x70
+VLPROFPROC CPU architecture: 8
+VLPROFPROC CPU variant	: 0x1
+VLPROFPROC CPU part	: 0x663
+VLPROFPROC CPU revision	: 3
+VLPROFPROC 
+VLPROFPROC processor	: 3
+VLPROFPROC model name	: Phytium,FT-2500/128
+VLPROFPROC BogoMIPS	: 100.00
+VLPROFPROC Features	: fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid
+VLPROFPROC CPU implementer	: 0x70
+VLPROFPROC CPU architecture: 8
+VLPROFPROC CPU variant	: 0x1
+VLPROFPROC CPU part	: 0x663
+VLPROFPROC CPU revision	: 3
+VLPROFPROC 
+VLPROF eval start 57709 elapsed 1745979 cpu 2 on thread 1
+VLPROF eval_loop start 58532 elapsed 1744353 cpu 2 on thread 1
+VLPROF mtask 85 start 90465 elapsed 64569 predict_start 14315 predict_cost 30533 cpu 2 on thread 1
+VLPROF mtask 79 start 156555 elapsed 137754 predict_start 44848 predict_cost 48001 cpu 2 on thread 1
+VLPROF mtask 90 start 77352 elapsed 1159 predict_start 14315 predict_cost 21592 cpu 3 on thread 2
+VLPROF mtask 81 start 79799 elapsed 868 predict_start 35907 predict_cost 29215 cpu 3 on thread 2
+VLPROF mtask 87 start 81746 elapsed 887 predict_start 65147 predict_cost 33809 cpu 3 on thread 2
+VLPROF stat ticks 180832
diff --git a/test_regress/t/t_gantt_io_arm.out b/test_regress/t/t_gantt_io_arm.out
new file mode 100644
index 000000000..d2be5b08e
--- /dev/null
+++ b/test_regress/t/t_gantt_io_arm.out
@@ -0,0 +1,37 @@
+Verilator Gantt report
+
+Argument settings:
+  +verilator+prof+threads+start+1
+  +verilator+prof+threads+window+2
+  --threads 4
+
+Analysis:
+  Total threads             = 2
+  Total mtasks              = 5
+  Total cpus used           = 2
+  Total yields              = 51
+  Total evals               = 1
+  Total eval loops          = 1
+  Total eval time           = 294309 rdtsc ticks
+  Longest mtask time        = 137754 rdtsc ticks
+  All-thread mtask time     = 205237 rdtsc ticks
+  Longest-thread efficiency = 46.8%
+  All-thread efficiency     = 34.9%
+  All-thread speedup        = 0.7
+
+Prediction (what Verilator used for scheduling):
+  All-thread efficiency     = 82.4%
+  All-thread speedup        = 1.6
+
+Statistics:
+  min log(p2e) = -1.054  from mtask 79 (predict 48001, elapsed 137754)
+  max log(p2e) = 3.641  from mtask 87 (predict 33809, elapsed 887)
+  mean = 1.656
+  stddev = 2.104
+  e ^ stddev = 8.200
+
+CPUs:
+  cpu 2: cpu_time=202323  Phytium,FT-2500/128
+  cpu 3: cpu_time=2914  Phytium,FT-2500/128
+
+Writing profile_threads.vcd
diff --git a/test_regress/t/t_gantt_io_arm.pl b/test_regress/t/t_gantt_io_arm.pl
new file mode 100755
index 000000000..7eac146d9
--- /dev/null
+++ b/test_regress/t/t_gantt_io_arm.pl
@@ -0,0 +1,20 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(dist => 1);
+
+run(cmd => ["cd $Self->{obj_dir} && $ENV{VERILATOR_ROOT}/bin/verilator_gantt"
+            . " $Self->{t_dir}/$Self->{name}.dat > gantt.log"],
+    check_finished => 0);
+
+files_identical("$Self->{obj_dir}/gantt.log", $Self->{golden_filename});
+
+ok(1);
+1;
diff --git a/test_regress/t/t_gantt_io_noproc.dat b/test_regress/t/t_gantt_io_noproc.dat
new file mode 100644
index 000000000..423d57a05
--- /dev/null
+++ b/test_regress/t/t_gantt_io_noproc.dat
@@ -0,0 +1,24 @@
+VLPROFTHREAD 1.1 # Verilator thread profile dump version 1.1
+VLPROF arg --threads 2
+VLPROF arg +verilator+prof+threads+start+2
+VLPROF arg +verilator+prof+threads+window+2
+VLPROF stat yields 0
+VLPROF eval start 595 elapsed 11655 cpu 19 on thread 1
+VLPROF eval_loop start 945 elapsed 11235 cpu 19 on thread 1
+VLPROF mtask 6 start 2695 elapsed 210 predict_start 0 predict_cost 30 cpu 19 on thread 1
+VLPROF mtask 10 start 9695 elapsed 175 predict_start 196 predict_cost 30 cpu 19 on thread 1
+VLPROF eval start 13720 elapsed 8610 cpu 19 on thread 1
+VLPROF eval_loop start 14000 elapsed 8085 cpu 19 on thread 1
+VLPROF mtask 6 start 15610 elapsed 210 predict_start 0 predict_cost 30 cpu 19 on thread 1
+VLPROF mtask 10 start 21700 elapsed 175 predict_start 196 predict_cost 30 cpu 19 on thread 1
+VLPROF mtask 5 start 5495 elapsed 595 predict_start 0 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 7 start 6300 elapsed 595 predict_start 30 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 8 start 7490 elapsed 1050 predict_start 60 predict_cost 107 cpu 10 on thread 2
+VLPROF mtask 9 start 9135 elapsed 595 predict_start 167 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 11 start 10255 elapsed 805 predict_start 197 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 5 start 18375 elapsed 595 predict_start 0 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 7 start 19145 elapsed 175 predict_start 30 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 8 start 19670 elapsed 140 predict_start 60 predict_cost 107 cpu 10 on thread 2
+VLPROF mtask 9 start 20650 elapsed 70 predict_start 167 predict_cost 30 cpu 10 on thread 2
+VLPROF mtask 11 start 21140 elapsed 105 predict_start 197 predict_cost 30 cpu 10 on thread 2
+VLPROF stat ticks 23415
diff --git a/test_regress/t/t_gantt_io_noproc.out b/test_regress/t/t_gantt_io_noproc.out
new file mode 100644
index 000000000..87680d43c
--- /dev/null
+++ b/test_regress/t/t_gantt_io_noproc.out
@@ -0,0 +1,36 @@
+Verilator Gantt report
+
+Argument settings:
+  +verilator+prof+threads+start+2
+  +verilator+prof+threads+window+2
+  --threads 2
+
+Analysis:
+  Total threads             = 2
+  Total mtasks              = 7
+  Total cpus used           = 2
+  Total yields              = 0
+  Total evals               = 2
+  Total eval loops          = 2
+  Total eval time           = 21875 rdtsc ticks
+  Longest mtask time        = 1190 rdtsc ticks
+  All-thread mtask time     = 5495 rdtsc ticks
+  Longest-thread efficiency = 5.4%
+  All-thread efficiency     = 12.6%
+  All-thread speedup        = 0.3
+
+Prediction (what Verilator used for scheduling):
+  All-thread efficiency     = 63.2%
+  All-thread speedup        = 1.3
+
+Statistics:
+  min log(p2e) = -3.681  from mtask 5 (predict 30, elapsed 1190)
+  max log(p2e) = -2.409  from mtask 8 (predict 107, elapsed 1190)
+  mean = -2.992
+  stddev = 0.459
+  e ^ stddev = 1.583
+
+CPUs:
+  cpu 10: cpu_time=4725
+  cpu 19: cpu_time=770
+
diff --git a/test_regress/t/t_gantt_io_noproc.pl b/test_regress/t/t_gantt_io_noproc.pl
new file mode 100755
index 000000000..f3011e1ff
--- /dev/null
+++ b/test_regress/t/t_gantt_io_noproc.pl
@@ -0,0 +1,20 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(dist => 1);
+
+run(cmd => ["cd $Self->{obj_dir} && $ENV{VERILATOR_ROOT}/bin/verilator_gantt"
+            . " --no-vcd $Self->{t_dir}/$Self->{name}.dat > gantt.log"],
+    check_finished => 0);
+
+files_identical("$Self->{obj_dir}/gantt.log", $Self->{golden_filename});
+
+ok(1);
+1;
diff --git a/test_regress/t/t_gate_chained.pl b/test_regress/t/t_gate_chained.pl
index 3b151b61f..ac9dd39d1 100755
--- a/test_regress/t/t_gate_chained.pl
+++ b/test_regress/t/t_gate_chained.pl
@@ -24,7 +24,7 @@ sub gen {
     $fh->print("\n");
     my $prev = "i";
     my $n = 9000;
-    for (my $i=1; $i<$n; ++$i) {
+    for (my $i = 1; $i < $n; ++$i) {
         $fh->printf("  wire [63:0] ass%04x = (sel == 16'h%04x) ? 64'h0 : $prev;\n", $i, $i);
         $prev = sprintf("ass%04x", $i);
     }
@@ -34,8 +34,8 @@ sub gen {
 
     $fh->print("\n");
     $fh->print("  always @ (posedge clk) begin\n");
-    $fh->print('    $write("*-* All Finished *-*\n");',"\n");
-    $fh->print('    $finish;',"\n");
+    $fh->print('    $write("*-* All Finished *-*\n");', "\n");
+    $fh->print('    $finish;', "\n");
     $fh->print("  end\n");
     $fh->print("endmodule\n");
 }
@@ -45,8 +45,8 @@ top_filename("$Self->{obj_dir}/t_gate_chained.v");
 gen($Self->{top_filename});
 
 compile(
-    verilator_flags2=>["--stats --x-assign fast --x-initial fast",
-                       "-Wno-UNOPTTHREADS"],
+    verilator_flags2 => ["--stats --x-assign fast --x-initial fast",
+                         "-Wno-UNOPTTHREADS"],
     );
 
 execute(
diff --git a/test_regress/t/t_gate_ormux.pl b/test_regress/t/t_gate_ormux.pl
index e0cf23b95..02476bffd 100755
--- a/test_regress/t/t_gate_ormux.pl
+++ b/test_regress/t/t_gate_ormux.pl
@@ -15,7 +15,7 @@ $Self->{sim_time} = $Self->{cycles} * 10 + 1000;
 
 compile(
     v_flags2 => ["+define+SIM_CYCLES=$Self->{cycles}",],
-    verilator_flags2=>["-Wno-UNOPTTHREADS", "--stats"],
+    verilator_flags2 => ["-Wno-UNOPTTHREADS", "--stats"],
     );
 
 if ($Self->{vlt}) {
diff --git a/test_regress/t/t_gate_tree.pl b/test_regress/t/t_gate_tree.pl
index 568c1868c..67587a1ba 100755
--- a/test_regress/t/t_gate_tree.pl
+++ b/test_regress/t/t_gate_tree.pl
@@ -14,7 +14,7 @@ use vars qw($Self);
 
 scenarios(simulator => 1);
 
-my $width = 64*($ENV{VERILATOR_TEST_WIDTH}||4);
+my $width = 64 * ($ENV{VERILATOR_TEST_WIDTH} || 4);
 my $vars = 64;
 
 $Self->{cycles} = ($Self->{benchmark} ? 1_000_000 : 100);
@@ -34,15 +34,15 @@ sub gen {
 
     my %tree;
     my $fanin = 8;
-    my $stages = int(log($vars)/log($fanin)+0.99999)+1;
+    my $stages = int(log($vars) / log($fanin) + 0.99999) + 1;
     my $result = 0;
-    for (my $n=0; $n<$vars; $n++) {
-        $result += ($n||1);
+    for (my $n = 0; $n < $vars; $n++) {
+        $result += ($n || 1);
         $tree{0}{$n}{$n} = 1;
         my $nl = $n;
-        for (my $stage=1; $stage<$stages; $stage++) {
+        for (my $stage=1; $stage < $stages; $stage++) {
             my $lastn = $nl;
-            $nl = int($nl/$fanin);
+            $nl = int($nl / $fanin);
             $tree{$stage}{$nl}{$lastn} = 1;
         }
     }
@@ -50,9 +50,9 @@ sub gen {
 
     $fh->print("\n");
     my $workingset = 0;
-    foreach my $stage (sort {$a<=>$b} keys %tree) {
-        foreach my $n (sort {$a<=>$b} keys %{$tree{$stage}}) {
-            $fh->print(    "   reg [".($width-1).":0] v${stage}_${n};\n");
+    foreach my $stage (sort { $a <=> $b} keys %tree) {
+        foreach my $n (sort { $a <=> $b} keys %{$tree{$stage}}) {
+            $fh->print(    "   reg [" . ($width - 1) . ":0] v${stage}_${n};\n");
             $workingset += int($width/8 + 7);
         }
     }
@@ -62,7 +62,7 @@ sub gen {
     $fh->print("      cyc <= cyc + 1;\n");
     $fh->print("`ifdef TEST_VERBOSE\n");
     $fh->print("         \$write(\"[%0t] rst=%0x  v0_0=%0x  v1_0=%0x  result=%0x\\n\""
-               .", \$time, reset, v0_0, v1_0, v".($stages-1)."_0);\n");
+               .", \$time, reset, v0_0, v1_0, v" . ($stages - 1) . "_0);\n");
     $fh->print("`endif\n");
     $fh->print("      if (cyc==0) begin\n");
     $fh->print("         reset <= 1;\n");
@@ -74,19 +74,19 @@ sub gen {
     $fh->print(" `define SIM_CYCLES 99\n");
     $fh->print("`endif\n");
     $fh->print("      else if (cyc==`SIM_CYCLES) begin\n");
-    $fh->print("         if (v".($stages-1)."_0 != ${width}'d${result}) \$stop;\n");
+    $fh->print("         if (v" . ($stages - 1) . "_0 != ${width}'d${result}) \$stop;\n");
     $fh->print("         \$write(\"VARS=${vars} WIDTH=${width}"
-               ." WORKINGSET=".(int($workingset/1024))."KB\\n\");\n");
-    $fh->print('         $write("*-* All Finished *-*\n");',"\n");
-    $fh->print('         $finish;',"\n");
+               ." WORKINGSET=" . (int($workingset / 1024)) . "KB\\n\");\n");
+    $fh->print('         $write("*-* All Finished *-*\n");', "\n");
+    $fh->print('         $finish;', "\n");
     $fh->print("      end\n");
     $fh->print("   end\n");
 
     $fh->print("\n");
     for (my $n=0; $n<$vars; $n++) {
         $fh->print("   always @ (posedge clk)"
-                   ." v0_${n} <= reset ? ${width}'d".(${n}||1)." : v0_"
-                   .((int($n/$fanin)*$fanin) + (($n+1) % $fanin)).";\n");
+                   . " v0_${n} <= reset ? ${width}'d" . (${n} || 1) . " : v0_"
+                   . ((int($n / $fanin) * $fanin) + (($n + 1) % $fanin)) . ";\n");
     }
 
     foreach my $stage (sort {$a<=>$b} keys %tree) {
@@ -94,10 +94,10 @@ sub gen {
         $fh->print("\n");
         foreach my $n (sort {$a<=>$b} keys %{$tree{$stage}}) {
             $fh->print("   always @ (posedge clk)"
-                       ." v${stage}_${n} <=");
+                       . " v${stage}_${n} <=");
             my $op = "";
             foreach my $ni (sort {$a<=>$b} keys %{$tree{$stage}{$n}}) {
-                $fh->print($op." v".(${stage}-1)."_${ni}");
+                $fh->print($op . " v" . (${stage} - 1) . "_${ni}");
                 $op = " +";
             }
             $fh->print(";\n");
@@ -121,6 +121,7 @@ execute(
     all_run_flags => ["+verilator+prof+threads+start+100",
                       " +verilator+prof+threads+window+2",
                       " +verilator+prof+threads+file+$Self->{obj_dir}/profile_threads.dat",
+                      " +verilator+prof+vlt+file+$Self->{obj_dir}/profile.vlt",
                       ],
     check_finished => 1,
     );
diff --git a/test_regress/t/t_hier_block_cmake.pl b/test_regress/t/t_hier_block_cmake.pl
index 0517b3a3b..a4d74b89b 100755
--- a/test_regress/t/t_hier_block_cmake.pl
+++ b/test_regress/t/t_hier_block_cmake.pl
@@ -29,7 +29,7 @@ if (!$Self->have_cmake) {
     run(logfile => "$Self->{obj_dir}/run.log",
         cmd => ['cd "' . $Self->{obj_dir} . '" && ./t_hier_block_cmake', '.']
     );
-    my $target_dir = $Self->{obj_dir} .'/CMakeFiles/t_hier_block_cmake.dir/Vt_hier_block.dir/';
+    my $target_dir = $Self->{obj_dir} . '/CMakeFiles/t_hier_block_cmake.dir/Vt_hier_block.dir/';
     file_grep($target_dir . 'Vsub0/sub0.sv', /^module\s+(\S+)\s+/, "sub0");
     file_grep($target_dir . 'Vsub1/sub1.sv', /^module\s+(\S+)\s+/, "sub1");
     file_grep($target_dir . 'Vsub2/sub2.sv', /^module\s+(\S+)\s+/, "sub2");
diff --git a/test_regress/t/t_inst_misarray2_bad.pl b/test_regress/t/t_inst_misarray2_bad.pl
index 7df095a97..a60503a1f 100755
--- a/test_regress/t/t_inst_misarray2_bad.pl
+++ b/test_regress/t/t_inst_misarray2_bad.pl
@@ -15,6 +15,5 @@ lint(
     expect_filename => $Self->{golden_filename},
     );
 
-
 ok(1);
 1;
diff --git a/test_regress/t/t_inst_misarray_bad.pl b/test_regress/t/t_inst_misarray_bad.pl
index 7df095a97..a60503a1f 100755
--- a/test_regress/t/t_inst_misarray_bad.pl
+++ b/test_regress/t/t_inst_misarray_bad.pl
@@ -15,6 +15,5 @@ lint(
     expect_filename => $Self->{golden_filename},
     );
 
-
 ok(1);
 1;
diff --git a/test_regress/t/t_inst_notunsized.pl b/test_regress/t/t_inst_notunsized.pl
index 4f4923342..fa437f2fe 100755
--- a/test_regress/t/t_inst_notunsized.pl
+++ b/test_regress/t/t_inst_notunsized.pl
@@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
-    verilator_flags2=>["-Wno-IMPLICIT"],
+    verilator_flags2 => ["-Wno-IMPLICIT"],
     );
 
 execute(
diff --git a/test_regress/t/t_inst_tree_inl0_pub1.pl b/test_regress/t/t_inst_tree_inl0_pub1.pl
index effe5ebbe..b06051080 100755
--- a/test_regress/t/t_inst_tree_inl0_pub1.pl
+++ b/test_regress/t/t_inst_tree_inl0_pub1.pl
@@ -31,9 +31,9 @@ sub checkRelativeRefs {
         }
 
         if ($found_relative != $expect_relative) {
-            error("$file " .
-                  ($found_relative ? "has" : "does not have") .
-                  " relative variable references.");
+            error("$file "
+                  . ($found_relative ? "has" : "does not have")
+                  . " relative variable references.");
         }
     }
 }
diff --git a/test_regress/t/t_lint_didnotconverge_bad.out b/test_regress/t/t_lint_didnotconverge_bad.out
new file mode 100644
index 000000000..1a0bb785a
--- /dev/null
+++ b/test_regress/t/t_lint_didnotconverge_bad.out
@@ -0,0 +1,6 @@
+-V{t#,#}- Verilated::debug is on. Message prefix indicates {<thread>,<sequence_number>}.
+-V{t#,#}+    Vt_lint_didnotconverge_bad___024root___change_request
+-V{t#,#}+    Vt_lint_didnotconverge_bad___024root___change_request_1
+-V{t#,#}        CHANGE: t/t_lint_didnotconverge_bad.v:14: a
+%Error: t/t_lint_didnotconverge_bad.v:7: Verilated model didn't converge
+Aborting...
diff --git a/test_regress/t/t_lint_didnotconverge_bad.pl b/test_regress/t/t_lint_didnotconverge_bad.pl
new file mode 100755
index 000000000..5281eac77
--- /dev/null
+++ b/test_regress/t/t_lint_didnotconverge_bad.pl
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2008 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(vlt => 1);
+
+compile(
+    verilator_flags2 => ["--prof-cfuncs"],
+    );
+
+execute(
+    fails => 1,
+    expect_filename => $Self->{golden_filename},
+    );
+
+extract(
+    in => $Self->{top_filename},
+    out => "../docs/gen/ex_DIDNOTCONVERGE_faulty.rst",
+    lines => "16-17");
+
+extract(
+    in => $Self->{golden_filename},
+    out => "../docs/gen/ex_DIDNOTCONVERGE_msg.rst",
+    lines => "2-5");
+
+ok(1);
+1;
diff --git a/test_regress/t/t_lint_didnotconverge_bad.v b/test_regress/t/t_lint_didnotconverge_bad.v
new file mode 100644
index 000000000..315e91aa5
--- /dev/null
+++ b/test_regress/t/t_lint_didnotconverge_bad.v
@@ -0,0 +1,19 @@
+// DESCRIPTION: Verilator: Verilog Test module
+//
+// This file ONLY is placed under the Creative Commons Public Domain, for
+// any use, without warranty, 2012 by Wilson Snyder.
+// SPDX-License-Identifier: CC0-1.0
+
+module t (/*AUTOARG*/
+   // Outputs
+   a, b
+   );
+
+   // verilator lint_off UNOPT
+
+   output logic a, b;
+
+   always_comb b = ~a;
+   always_comb a = b;
+
+endmodule
diff --git a/test_regress/t/t_lint_didnotconverge_nodbg_bad.out b/test_regress/t/t_lint_didnotconverge_nodbg_bad.out
new file mode 100644
index 000000000..1b59dad18
--- /dev/null
+++ b/test_regress/t/t_lint_didnotconverge_nodbg_bad.out
@@ -0,0 +1,2 @@
+%Error: t/t_lint_didnotconverge_bad.v:7: Verilated model didn't converge
+Aborting...
diff --git a/test_regress/t/t_lint_didnotconverge_nodbg_bad.pl b/test_regress/t/t_lint_didnotconverge_nodbg_bad.pl
new file mode 100755
index 000000000..3b851338e
--- /dev/null
+++ b/test_regress/t/t_lint_didnotconverge_nodbg_bad.pl
@@ -0,0 +1,30 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2008 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(vlt => 1);
+
+top_filename("t/t_lint_didnotconverge_bad.v");
+
+compile(
+    make_flags => 'CPPFLAGS_ADD=-UVL_DEBUG',
+    );
+
+execute(
+    fails => 1,
+    expect_filename => $Self->{golden_filename},
+    );
+
+extract(
+    in => $Self->{golden_filename},
+    out => "../docs/gen/ex_DIDNOTCONVERGE_nodbg_msg.rst",
+    lines => "1");
+
+ok(1);
+1;
diff --git a/test_regress/t/t_lint_eofline_bad.out b/test_regress/t/t_lint_eofline_bad.out
index 6d20a0dc1..cc52c6298 100644
--- a/test_regress/t/t_lint_eofline_bad.out
+++ b/test_regress/t/t_lint_eofline_bad.out
@@ -1,4 +1,5 @@
-%Warning-EOFNEWLINE: obj_vlt/t_lint_eofline_bad/t_lint_eofline_bad.v:4:10: Missing newline at end of file (POSIX 3.206).                                                                         : ... Suggest add newline.
+%Warning-EOFNEWLINE: obj_vlt/t_lint_eofline_bad/t_lint_eofline_bad.v:4:10: Missing newline at end of file (POSIX 3.206).
+                                                                         : ... Suggest add newline.
     4 | endmodule
       |          ^
                      ... For warning description see https://verilator.org/warn/EOFNEWLINE?v=latest
diff --git a/test_regress/t/t_lint_stmtdly_bad.out b/test_regress/t/t_lint_stmtdly_bad.out
new file mode 100644
index 000000000..f23238765
--- /dev/null
+++ b/test_regress/t/t_lint_stmtdly_bad.out
@@ -0,0 +1,7 @@
+%Warning-STMTDLY: t/t_lint_stmtdly_bad.v:10:8: Unsupported: Ignoring delay on this delayed statement.
+                                             : ... In instance t
+   10 |       #100 $finish;   
+      |        ^~~
+                  ... For warning description see https://verilator.org/warn/STMTDLY?v=latest
+                  ... Use "/* verilator lint_off STMTDLY */" and lint_on around source to disable this message.
+%Error: Exiting due to
diff --git a/test_regress/t/t_lint_stmtdly_bad.pl b/test_regress/t/t_lint_stmtdly_bad.pl
new file mode 100755
index 000000000..548dab0af
--- /dev/null
+++ b/test_regress/t/t_lint_stmtdly_bad.pl
@@ -0,0 +1,29 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2008 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(vlt => 1);
+
+compile(
+    fails => 1,
+    expect_filename => $Self->{golden_filename},
+    );
+
+extract(
+    in => $Self->{top_filename},
+    out => "../docs/gen/ex_STMTDLY_faulty.rst",
+    lines => "10");
+
+extract(
+    in => $Self->{golden_filename},
+    out => "../docs/gen/ex_STMTDLY_msg.rst",
+    lines => "1");
+
+ok(1);
+1;
diff --git a/test_regress/t/t_lint_stmtdly_bad.v b/test_regress/t/t_lint_stmtdly_bad.v
new file mode 100644
index 000000000..3684bbb30
--- /dev/null
+++ b/test_regress/t/t_lint_stmtdly_bad.v
@@ -0,0 +1,13 @@
+// DESCRIPTION: Verilator: Verilog Test module
+//
+// This file ONLY is placed under the Creative Commons Public Domain, for
+// any use, without warranty, 2012 by Wilson Snyder.
+// SPDX-License-Identifier: CC0-1.0
+
+module t (/*AUTOARG*/);
+
+   initial begin
+      #100 $finish;  //<--- Warning
+   end
+
+endmodule
diff --git a/test_regress/t/t_math_const.v b/test_regress/t/t_math_const.v
index 654a7161d..11bd286eb 100644
--- a/test_regress/t/t_math_const.v
+++ b/test_regress/t/t_math_const.v
@@ -106,7 +106,9 @@ module t (/*AUTOARG*/
 	    w32 = 12; w32 |= 15; if (w32 != 15) $stop;
 	    w32 = 12; w32 ^= 15; if (w32 != 3) $stop;
 	    w32 = 12; w32 >>= 1; if (w32 != 6) $stop;
+	    w32 = 12; w32 >>>= 1; if (w32 != 6) $stop;
 	    w32 = 12; w32 <<= 1; if (w32 != 24) $stop;
+	    w32 = 12; w32 %= 5; if (w32 != 2) $stop;
 
 	    // Increments
 	    v32[1] = 12; v32[1]++;  if (v32[1] != 13) $stop;
diff --git a/test_regress/t/t_multiline_waivers.pl b/test_regress/t/t_multiline_waivers.pl
new file mode 100755
index 000000000..6cfad5889
--- /dev/null
+++ b/test_regress/t/t_multiline_waivers.pl
@@ -0,0 +1,29 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2008 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(vlt => 1);
+
+my $out_filename = "$Self->{obj_dir}/$Self->{name}_waiver_gen.vlt";
+my $waiver_filename = "$Self->{obj_dir}/$Self->{name}_waiver.vlt";
+
+compile(
+    v_flags2 => ['--waiver-output', $out_filename],
+    fails => 1,
+    );
+
+file_sed($out_filename, $waiver_filename,
+         sub { s/\/\/ lint_off/lint_off/g; });
+
+compile(
+    v_flags2 => [$waiver_filename],
+    );
+
+ok(1);
+1;
diff --git a/test_regress/t/t_multiline_waivers.v b/test_regress/t/t_multiline_waivers.v
new file mode 100644
index 000000000..d9f68bdf1
--- /dev/null
+++ b/test_regress/t/t_multiline_waivers.v
@@ -0,0 +1,38 @@
+// DESCRIPTION: Verilator: Verilog Test module
+//
+// This file ONLY is placed under the Creative Commons Public Domain, for
+// any use, without warranty, 2012 by Wilson Snyder.
+// SPDX-License-Identifier: CC0-1.0
+
+module t (/*AUTOARG*/
+   // Outputs
+   out, out2,
+   // Inputs
+   clk, a0, d0, d1
+   );
+
+   input clk;
+   input [1:0] a0;
+   input [7:0] d0;
+   input [7:0] d1;
+   output reg [31:0] out;
+   output reg [15:0] out2;
+
+   reg [7:0]         mem [4];
+
+   always @(posedge clk) begin
+      mem[a0] <= d0;  // <--- Warning
+   end
+   always @(negedge clk) begin
+      mem[a0] <= d1;  // <--- Warning
+   end
+   assign out = {mem[3],mem[2],mem[1],mem[0]};
+
+   always @(posedge clk) begin
+      out2[7:0] <= d0;  // <--- Warning
+   end
+   always @(negedge clk) begin
+      out2[15:8] <= d0;  // <--- Warning
+   end
+
+endmodule
diff --git a/test_regress/t/t_optm_if_array.pl b/test_regress/t/t_optm_if_array.pl
index aa9716f06..caf3de14b 100755
--- a/test_regress/t/t_optm_if_array.pl
+++ b/test_regress/t/t_optm_if_array.pl
@@ -18,7 +18,7 @@ execute(
     );
 
 for my $file (glob_all("$Self->{obj_dir}/$Self->{VM_PREFIX}___024root*.cpp")) {
-  file_grep_not($file, qr/rstn_r/);
+    file_grep_not($file, qr/rstn_r/);
 }
 
 ok(1);
diff --git a/test_regress/t/t_optm_redor.pl b/test_regress/t/t_optm_redor.pl
index aa9716f06..caf3de14b 100755
--- a/test_regress/t/t_optm_redor.pl
+++ b/test_regress/t/t_optm_redor.pl
@@ -18,7 +18,7 @@ execute(
     );
 
 for my $file (glob_all("$Self->{obj_dir}/$Self->{VM_PREFIX}___024root*.cpp")) {
-  file_grep_not($file, qr/rstn_r/);
+    file_grep_not($file, qr/rstn_r/);
 }
 
 ok(1);
diff --git a/test_regress/t/t_order_dpi_export_1.pl b/test_regress/t/t_order_dpi_export_1.pl
index 21b3c76b6..e76a9afcd 100755
--- a/test_regress/t/t_order_dpi_export_1.pl
+++ b/test_regress/t/t_order_dpi_export_1.pl
@@ -13,7 +13,7 @@ scenarios(vlt_all => 1);
 compile(
     make_top_shell => 0,
     make_main => 0,
-    verilator_flags2 => ["--exe","$Self->{t_dir}/$Self->{name}.cpp"],
+    verilator_flags2 => ["--exe", "$Self->{t_dir}/$Self->{name}.cpp"],
     );
 
 execute(
diff --git a/test_regress/t/t_order_dpi_export_2.pl b/test_regress/t/t_order_dpi_export_2.pl
index 21b3c76b6..e76a9afcd 100755
--- a/test_regress/t/t_order_dpi_export_2.pl
+++ b/test_regress/t/t_order_dpi_export_2.pl
@@ -13,7 +13,7 @@ scenarios(vlt_all => 1);
 compile(
     make_top_shell => 0,
     make_main => 0,
-    verilator_flags2 => ["--exe","$Self->{t_dir}/$Self->{name}.cpp"],
+    verilator_flags2 => ["--exe", "$Self->{t_dir}/$Self->{name}.cpp"],
     );
 
 execute(
diff --git a/test_regress/t/t_order_dpi_export_3.pl b/test_regress/t/t_order_dpi_export_3.pl
index 21b3c76b6..e76a9afcd 100755
--- a/test_regress/t/t_order_dpi_export_3.pl
+++ b/test_regress/t/t_order_dpi_export_3.pl
@@ -13,7 +13,7 @@ scenarios(vlt_all => 1);
 compile(
     make_top_shell => 0,
     make_main => 0,
-    verilator_flags2 => ["--exe","$Self->{t_dir}/$Self->{name}.cpp"],
+    verilator_flags2 => ["--exe", "$Self->{t_dir}/$Self->{name}.cpp"],
     );
 
 execute(
diff --git a/test_regress/t/t_order_dpi_export_4.pl b/test_regress/t/t_order_dpi_export_4.pl
index 21b3c76b6..e76a9afcd 100755
--- a/test_regress/t/t_order_dpi_export_4.pl
+++ b/test_regress/t/t_order_dpi_export_4.pl
@@ -13,7 +13,7 @@ scenarios(vlt_all => 1);
 compile(
     make_top_shell => 0,
     make_main => 0,
-    verilator_flags2 => ["--exe","$Self->{t_dir}/$Self->{name}.cpp"],
+    verilator_flags2 => ["--exe", "$Self->{t_dir}/$Self->{name}.cpp"],
     );
 
 execute(
diff --git a/test_regress/t/t_order_dpi_export_5.pl b/test_regress/t/t_order_dpi_export_5.pl
index 21b3c76b6..e76a9afcd 100755
--- a/test_regress/t/t_order_dpi_export_5.pl
+++ b/test_regress/t/t_order_dpi_export_5.pl
@@ -13,7 +13,7 @@ scenarios(vlt_all => 1);
 compile(
     make_top_shell => 0,
     make_main => 0,
-    verilator_flags2 => ["--exe","$Self->{t_dir}/$Self->{name}.cpp"],
+    verilator_flags2 => ["--exe", "$Self->{t_dir}/$Self->{name}.cpp"],
     );
 
 execute(
diff --git a/test_regress/t/t_order_quad.pl b/test_regress/t/t_order_quad.pl
index 68cbf7858..68f1f0e47 100755
--- a/test_regress/t/t_order_quad.pl
+++ b/test_regress/t/t_order_quad.pl
@@ -13,7 +13,7 @@ scenarios(simulator => 1);
 compile(
     make_top_shell => 0,
     make_main => 0,
-    verilator_flags2 => ["--exe","$Self->{t_dir}/$Self->{name}.cpp"],
+    verilator_flags2 => ["--exe", "$Self->{t_dir}/$Self->{name}.cpp"],
     );
 
 execute(
diff --git a/test_regress/t/t_pgo_profoutofdate_bad.out b/test_regress/t/t_pgo_profoutofdate_bad.out
new file mode 100644
index 000000000..25f1c8787
--- /dev/null
+++ b/test_regress/t/t_pgo_profoutofdate_bad.out
@@ -0,0 +1,6 @@
+%Warning-PROFOUTOFDATE: t/t_pgo_profoutofdate_bad.v:27:1: Profile data for mtasks may be out of date. 3 of 3 mtasks had no data
+   27 | profile_data -model "x" -mtask "h7baded98__0" -cost 64'd12345678901234567890
+      | ^~~~~~~~~~~~
+                        ... For warning description see https://verilator.org/warn/PROFOUTOFDATE?v=latest
+                        ... Use "/* verilator lint_off PROFOUTOFDATE */" and lint_on around source to disable this message.
+%Error: Exiting due to
diff --git a/test_regress/t/t_pgo_profoutofdate_bad.pl b/test_regress/t/t_pgo_profoutofdate_bad.pl
new file mode 100755
index 000000000..e2cfc96a1
--- /dev/null
+++ b/test_regress/t/t_pgo_profoutofdate_bad.pl
@@ -0,0 +1,20 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(vltmt => 1);
+
+compile(
+    v_flags2 => ["--threads 2"],
+    fails => 1,
+    expect_filename => $Self->{golden_filename},
+    );
+
+ok(1);
+1;
diff --git a/test_regress/t/t_pgo_profoutofdate_bad.v b/test_regress/t/t_pgo_profoutofdate_bad.v
new file mode 100755
index 000000000..cba43da97
--- /dev/null
+++ b/test_regress/t/t_pgo_profoutofdate_bad.v
@@ -0,0 +1,28 @@
+// DESCRIPTION: Verilator: Verilog Test module
+//
+// This file ONLY is placed under the Creative Commons Public Domain, for
+// any use, without warranty, 2021 by Wilson Snyder.
+// SPDX-License-Identifier: CC0-1.0
+
+module t(/*AUTOARG*/
+   // Inputs
+   clk
+   );
+   input clk;
+
+   integer cyc=0;
+
+   // Test loop
+   always @ (posedge clk) begin
+      cyc <= cyc + 1;
+      if (cyc == 99) begin
+         $write("*-* All Finished *-*\n");
+         $finish;
+      end
+   end
+
+endmodule
+
+`verilator_config
+profile_data -model "x" -mtask "h7baded98__0" -cost 64'd12345678901234567890
+profile_data -model "x" -mtask "hb56134bd__0" -cost 945
diff --git a/test_regress/t/t_pgo_threads.pl b/test_regress/t/t_pgo_threads.pl
new file mode 100755
index 000000000..1bcccc905
--- /dev/null
+++ b/test_regress/t/t_pgo_threads.pl
@@ -0,0 +1,42 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(vltmt => 1);
+
+# It doesn't really matter what test
+top_filename("t/t_gen_alw.v");
+
+compile(
+    v_flags2 => ["--prof-threads --threads 2"]
+    );
+
+execute(
+    all_run_flags => ["+verilator+prof+threads+start+0",
+                      " +verilator+prof+threads+file+/dev/null",
+                      " +verilator+prof+vlt+file+$Self->{obj_dir}/profile.vlt",
+                      ],
+    check_finished => 1,
+    );
+
+file_grep("$Self->{obj_dir}/profile.vlt", qr/profile_data/i);
+
+compile(
+    # Intentinally no --prof-threads here, so we make sure profile data
+    # can read in without it (that is no prof-thread effect on profile_data hash names)
+    v_flags2 => ["--threads 2",
+                 " $Self->{obj_dir}/profile.vlt"],
+    );
+
+execute(
+    check_finished => 1,
+    );
+
+ok(1);
+1;
diff --git a/test_regress/t/t_preproc.pl b/test_regress/t/t_preproc.pl
index 930deb5d7..1a9daee70 100755
--- a/test_regress/t/t_preproc.pl
+++ b/test_regress/t/t_preproc.pl
@@ -51,7 +51,7 @@ sub preproc_check {
                 my $check = shift @Line_Checks;
                 if (!$check) { error("$filename2:$.: Extra Line_Preproc_Check\n"); }
                 if ($linecmt != $check) { error("$filename2:$.: __LINE__ inserted $linecmt, exp=$check\n"); }
-                if ($lineno != $check)  { error("$filename2:$.: __LINE__ on `line $lineno, exp=$check\n"); }
+                if ($lineno != $check) { error("$filename2:$.: __LINE__ on `line $lineno, exp=$check\n"); }
             }
         }
         $fh->close;
diff --git a/test_regress/t/t_preproc_debugi.v b/test_regress/t/t_preproc_debugi.v
index d044a69d7..9e9202741 100644
--- a/test_regress/t/t_preproc_debugi.v
+++ b/test_regress/t/t_preproc_debugi.v
@@ -8,3 +8,23 @@
 `define BAR(aa,bb) aa bb
 `FOO
 `BAR(aa,bb)
+
+`ifdef FOO
+`else
+`endif
+`ifndef FOO
+`elsif FOO
+`endif
+
+`define STRINGIFY(x) `"x`"
+`define CONCAT(a, b) a``b
+`STRINGIFY(x)
+`CONCAT(x,y)
+
+`undef FOO
+
+`undefineall
+
+`ifdef NEVER
+`error "should not get"
+`endif
diff --git a/test_regress/t/t_preproc_inc_fn_bad.out b/test_regress/t/t_preproc_inc_fn_bad.out
new file mode 100644
index 000000000..7e41874fa
--- /dev/null
+++ b/test_regress/t/t_preproc_inc_fn_bad.out
@@ -0,0 +1,4 @@
+%Error: t/t_preproc_inc_fn_bad.v:7:10: Expecting include filename. Found: ELSE
+    7 | `include `else
+      |          ^~~~~
+%Error: Exiting due to
diff --git a/test_regress/t/t_preproc_inc_fn_bad.pl b/test_regress/t/t_preproc_inc_fn_bad.pl
new file mode 100755
index 000000000..cdeb5a8e1
--- /dev/null
+++ b/test_regress/t/t_preproc_inc_fn_bad.pl
@@ -0,0 +1,20 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(vlt => 1);
+
+lint(
+    fails => 1,
+    # The .vh file has the error, not the .v file
+    expect_filename => $Self->{golden_filename},
+    );
+
+ok(1);
+1;
diff --git a/test_regress/t/t_preproc_inc_fn_bad.v b/test_regress/t/t_preproc_inc_fn_bad.v
new file mode 100644
index 000000000..49fd4a1a5
--- /dev/null
+++ b/test_regress/t/t_preproc_inc_fn_bad.v
@@ -0,0 +1,10 @@
+// DESCRIPTION: Verilator: Verilog Test module
+//
+// This file ONLY is placed under the Creative Commons Public Domain, for
+// any use, without warranty, 2010 by Wilson Snyder.
+// SPDX-License-Identifier: CC0-1.0
+
+`include `else
+
+module t;
+endmodule
diff --git a/test_regress/t/t_profcfunc.gprof b/test_regress/t/t_profcfunc.gprof
new file mode 100644
index 000000000..91be91407
--- /dev/null
+++ b/test_regress/t/t_profcfunc.gprof
@@ -0,0 +1,44 @@
+Flat profile:
+
+ Note all numbers below were faked for this test, so might not be consistent.
+
+  %   cumulative   self              self     total           
+ time   seconds   seconds    calls  Ts/call  Ts/call  name    
+  1.99      1.99     0.99   200578     0.00     0.00  VL_EXTENDS_QQ(int, int, unsigned long)
+  1.98      0.00     0.98   100000     0.00     0.00  VL_POWSS_QQQ(int, int, int, unsigned long, unsigned long, bool, bool)
+  1.89      0.00     0.89     1407     0.00     0.00  Verilated::debug()
+  1.88      0.00     0.88      202     0.00     0.00  VerilatedContext::gotFinish() const
+  1.87      0.00     0.87        6     0.00     0.00  VerilatedContext::randReset()
+  1.86      0.00     0.86        9     0.00     0.00  VlWide<2ul>::operator unsigned int*()
+  1.79      0.00     0.79      600     0.00     0.00  Vt_prof* const& std::__get_helper<0ul, Vt_prof*, std::default_delete<Vt_prof> >(std::_Tuple_impl<0ul, Vt_prof*, std::default_delete<Vt_prof> > const&)
+  1.78      0.00     0.78        3     0.00     0.00  Vt_prof*& std::__get_helper<0ul, Vt_prof*, std::default_delete<Vt_prof> >(std::_Tuple_impl<0ul, Vt_prof*, std::default_delete<Vt_prof> >&)
+  1.77      0.00     0.77        1     0.00     0.00  Vt_prof::Vt_prof(VerilatedContext*, char const*)
+  1.76      0.00     0.76        1     0.00     0.00  Vt_prof::Vt_prof(char const*)
+  1.75      0.00     0.75      200     0.00     0.00  Vt_prof::eval()
+  1.74      0.00     0.74      200     0.00     0.00  Vt_prof::eval_step()
+  1.73      0.00     0.73        1     0.00     0.00  Vt_prof::final()
+  1.72      0.00     0.72        1     0.00     0.00  Vt_prof::~Vt_prof()
+  1.71      0.00     0.71        1     0.00     0.00  Vt_prof__Syms::Vt_prof__Syms(VerilatedContext*, char const*, Vt_prof*)
+  1.70      0.00     0.70        1     0.00     0.00  Vt_prof__Syms::~Vt_prof__Syms()
+  1.69      0.00     0.69        1     0.00     0.00  Vt_prof___024root::__Vconfigure(Vt_prof__Syms*, bool)
+  1.68      0.00     0.68        1     0.00     0.00  Vt_prof___024root::Vt_prof___024root(char const*)
+  1.67      0.00     0.67        1     0.00     0.00  Vt_prof___024root::~Vt_prof___024root()
+  1.66      0.00     0.66      201     0.00     0.00  Vt_prof___024root___eval(Vt_prof___024root*)
+  1.65      0.00     0.65      200     0.00     0.00  Vt_prof___024root___eval_debug_assertions(Vt_prof___024root*)
+  1.64      0.00     0.64      100     0.00     0.00  Vt_prof___024root___sequent__TOP__5__PROF__t_prof__l31(Vt_prof___024root*)
+  1.63      0.00     0.63      100     0.00     0.00  Vt_prof___024root___sequent__TOP__50__PROF__t_prof__l31(Vt_prof___024root*)
+  1.62      0.00     0.62      100     0.00     0.00  Vt_prof___024root___sequent__TOP__6__PROF__t_prof__l30(Vt_prof___024root*)
+  1.61      0.00     0.61        1     0.00     0.00  Vt_prof___024root___final(Vt_prof___024root*)
+  1.60      0.00     0.60        1     0.00     0.00  Vt_prof___024root___eval_settle(Vt_prof___024root*)
+  1.59      0.00     0.59        1     0.00     0.00  Vt_prof___024root___eval_initial(Vt_prof___024root*)
+  1.58      0.00     0.58        1     0.00     0.00  Vt_prof___024root___ctor_var_reset(Vt_prof___024root*)
+  1.57      0.00     0.57        1     0.00     0.00  Vt_prof___024root___initial__TOP__13__PROF__t_prof__l13(Vt_prof___024root*)
+  1.30      0.00     0.30        1     0.00     0.00  _eval_initial_loop(Vt_prof__Syms*)
+  1.29      0.00     0.29        1     0.00     0.00  _vl_cmp_w(int, unsigned int const*, unsigned int const*)
+  1.28      0.00     0.28        2     0.00     0.00  _vl_moddiv_w(int, unsigned int*, unsigned int const*, unsigned int const*, bool)
+  1.27      0.00     0.27        2     0.00     0.00  _vl_vsformat(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, char const*, __va_list_tag*)
+  1.26      0.00     0.26     1399     0.00     0.00  std::unique_ptr<VerilatedContext, std::default_delete<VerilatedContext> >::get() const
+  1.25      0.00     0.25        3     0.00     0.00  unsigned long const& std::max<unsigned long>(unsigned long const&, unsigned long const&)
+  1.19      0.00     0.19        1     0.00     0.00  vl_finish(char const*, int, char const*)
+  1.18      0.00     0.18        2     0.00     0.00  vl_time_pow10(int)
+
diff --git a/test_regress/t/t_profcfunc.out b/test_regress/t/t_profcfunc.out
new file mode 100644
index 000000000..3dd28dfef
--- /dev/null
+++ b/test_regress/t/t_profcfunc.out
@@ -0,0 +1,69 @@
+Overall summary by type:
+  % time  type
+    4.37  C++
+   33.48  Common code under Vt_prof
+   15.82  VLib
+    6.46  Verilog Blocks under Vt_prof
+   39.87  Unaccounted for/rounding error
+
+Overall summary by design:
+  % time  design
+    4.37  C++
+   15.82  VLib
+   39.94  Vt_prof
+   39.87  Unaccounted for/rounding error
+
+Overall summary by module:
+  % time  module
+    4.37  C++
+   15.82  VLib
+   33.48  Vt_prof common code
+    6.46  t_prof
+   39.87  Unaccounted for/rounding error
+
+Verilog code profile:
+   These are split into three categories:
+      C++:     Time in non-Verilated C++ code
+      Prof:    Time in profile overhead
+      VBlock:  Time attributable to a block in a Verilog file and line
+      VCommon: Time in a Verilated module, due to all parts of the design
+      VLib:    Time in Verilated common libraries, called by the Verilated code
+
+  %   cumulative   self              
+ time   seconds   seconds      calls   design   type      filename and line number
+  3.27      1.27     1.27        200   Vt_prof  VBlock    t_prof:31
+  1.99      2.26     0.99     200578   -        VLib      VL_EXTENDS_QQ(int, int, unsigned long)
+  1.98      3.24     0.98     100000   -        VLib      VL_POWSS_QQQ(int, int, int, unsigned long, unsigned long, bool, bool)
+  1.89      4.13     0.89       1407   -        VLib      Verilated::debug()
+  1.88      5.01     0.88        202   -        VLib      VerilatedContext::gotFinish() const
+  1.87      5.88     0.87          6   -        VLib      VerilatedContext::randReset()
+  1.86      6.74     0.86          9   -        C++       VlWide<2ul>::operator unsigned int*()
+  1.79      7.53     0.79        600   Vt_prof  VCommon   Vt_prof* const& std::__get_helper<0ul, Vt_prof*, std::default_delete<Vt_prof> >(std::_Tuple_impl<0ul, Vt_prof*, std::default_delete<Vt_prof> > const&)
+  1.78      8.31     0.78          3   Vt_prof  VCommon   Vt_prof*& std::__get_helper<0ul, Vt_prof*, std::default_delete<Vt_prof> >(std::_Tuple_impl<0ul, Vt_prof*, std::default_delete<Vt_prof> >&)
+  1.77      9.08     0.77          1   Vt_prof  VCommon   Vt_prof::Vt_prof(VerilatedContext*, char const*)
+  1.76      9.84     0.76          1   Vt_prof  VCommon   Vt_prof::Vt_prof(char const*)
+  1.75     10.59     0.75        200   Vt_prof  VCommon   Vt_prof::eval()
+  1.74     11.33     0.74        200   Vt_prof  VCommon   Vt_prof::eval_step()
+  1.73     12.06     0.73          1   Vt_prof  VCommon   Vt_prof::final()
+  1.72     12.78     0.72          1   Vt_prof  VCommon   Vt_prof::~Vt_prof()
+  1.71     13.49     0.71          1   Vt_prof  VCommon   Vt_prof__Syms::Vt_prof__Syms(VerilatedContext*, char const*, Vt_prof*)
+  1.70     14.19     0.70          1   Vt_prof  VCommon   Vt_prof__Syms::~Vt_prof__Syms()
+  1.69     14.88     0.69          1   Vt_prof  VCommon   Vt_prof___024root::__Vconfigure(Vt_prof__Syms*, bool)
+  1.68     15.56     0.68          1   Vt_prof  VCommon   Vt_prof___024root::Vt_prof___024root(char const*)
+  1.67     16.23     0.67          1   Vt_prof  VCommon   Vt_prof___024root::~Vt_prof___024root()
+  1.66     16.89     0.66        201   Vt_prof  VCommon   Vt_prof___024root___eval(Vt_prof___024root*)
+  1.65     17.54     0.65        200   Vt_prof  VCommon   Vt_prof___024root___eval_debug_assertions(Vt_prof___024root*)
+  1.62     18.16     0.62        100   Vt_prof  VBlock    t_prof:30
+  1.61     18.77     0.61          1   Vt_prof  VCommon   Vt_prof___024root___final(Vt_prof___024root*)
+  1.60     19.37     0.60          1   Vt_prof  VCommon   Vt_prof___024root___eval_settle(Vt_prof___024root*)
+  1.59     19.96     0.59          1   Vt_prof  VCommon   Vt_prof___024root___eval_initial(Vt_prof___024root*)
+  1.58     20.54     0.58          1   Vt_prof  VCommon   Vt_prof___024root___ctor_var_reset(Vt_prof___024root*)
+  1.57     21.11     0.57          1   Vt_prof  VBlock    t_prof:13
+  1.30     21.41     0.30          1   Vt_prof  VCommon   _eval_initial_loop(Vt_prof__Syms*)
+  1.29     21.70     0.29          1   -        VLib      _vl_cmp_w(int, unsigned int const*, unsigned int const*)
+  1.28     21.98     0.28          2   -        VLib      _vl_moddiv_w(int, unsigned int*, unsigned int const*, unsigned int const*, bool)
+  1.27     22.25     0.27          2   -        VLib      _vl_vsformat(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, char const*, __va_list_tag*)
+  1.26     22.51     0.26       1399   -        C++       std::unique_ptr<VerilatedContext, std::default_delete<VerilatedContext> >::get() const
+  1.25     22.76     0.25          3   -        C++       unsigned long const& std::max<unsigned long>(unsigned long const&, unsigned long const&)
+  1.19     22.95     0.19          1   -        VLib      vl_finish(char const*, int, char const*)
+  1.18     23.13     0.18          2   -        VLib      vl_time_pow10(int)
diff --git a/test_regress/t/t_profcfunc.pl b/test_regress/t/t_profcfunc.pl
new file mode 100755
index 000000000..8e0470809
--- /dev/null
+++ b/test_regress/t/t_profcfunc.pl
@@ -0,0 +1,20 @@
+#!/usr/bin/env perl
+if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
+# DESCRIPTION: Verilator: Verilog Test driver/expect definition
+#
+# Copyright 2003 by Wilson Snyder. This program is free software; you
+# can redistribute it and/or modify it under the terms of either the GNU
+# Lesser General Public License Version 3 or the Perl Artistic License
+# Version 2.0.
+# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
+
+scenarios(dist => 1);
+
+run(cmd => ["cd $Self->{obj_dir} && $ENV{VERILATOR_ROOT}/bin/verilator_profcfunc $Self->{t_dir}/t_profcfunc.gprof > cfuncs.out"],
+    check_finished => 0);
+
+files_identical("$Self->{obj_dir}/cfuncs.out", $Self->{golden_filename});
+
+ok(1);
+
+1;
diff --git a/test_regress/t/t_protect_ids.pl b/test_regress/t/t_protect_ids.pl
index 371f5e0a7..c1b4a0455 100755
--- a/test_regress/t/t_protect_ids.pl
+++ b/test_regress/t/t_protect_ids.pl
@@ -14,8 +14,8 @@ scenarios(vlt_all => 1);
 
 # This test makes randomly named .cpp/.h files, which tend to collect, so remove them first
 foreach my $filename (glob ("$Self->{obj_dir}/*_PS*.cpp"
-                            ." $Self->{obj_dir}/*_PS*.h"
-                            ." $Self->{obj_dir}/*.d" )) {
+                            . " $Self->{obj_dir}/*_PS*.h"
+                            . " $Self->{obj_dir}/*.d")) {
     print "rm $filename\n" if $Self->{verbose};
     unlink $filename;
 }
@@ -40,9 +40,9 @@ file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__idmap.xml", qr/to="PS/);
 if ($Self->{vlt_all}) {
     # Check for secret in any outputs
     my $any;
-    foreach my $filename (glob $Self->{obj_dir}."/*.[ch]*") {
+    foreach my $filename (glob $Self->{obj_dir} . "/*.[ch]*") {
         if ($filename =~ /secret/i) {
-            $Self->error("Secret found in a filename: ".$filename);
+            $Self->error("Secret found in a filename: " . $filename);
         }
         file_grep_not($filename, qr/secret/i);
         $any = 1;
diff --git a/test_regress/t/t_struct_array.v b/test_regress/t/t_struct_array.v
index 9ea97e318..80320e831 100644
--- a/test_regress/t/t_struct_array.v
+++ b/test_regress/t/t_struct_array.v
@@ -5,6 +5,7 @@
 // SPDX-License-Identifier: CC0-1.0
 
 package TEST_TYPES;
+   typedef struct a_struct_t;  // Forward
    typedef struct packed {
       logic 	  stuff;
    } a_struct_t;
diff --git a/test_regress/t/t_struct_init.v b/test_regress/t/t_struct_init.v
index 81f0b2263..4f154d190 100644
--- a/test_regress/t/t_struct_init.v
+++ b/test_regress/t/t_struct_init.v
@@ -21,6 +21,7 @@ module t;
       b4_t      x0;
    } b4x2_t;
 
+   typedef union  q4_t;  // Forward
    typedef union packed { // [3:0]
       bit [3:0] quad0;
       b4_t      quad1;
diff --git a/test_regress/t/t_struct_init_bad.out b/test_regress/t/t_struct_init_bad.out
index 739b169af..2213562e0 100644
--- a/test_regress/t/t_struct_init_bad.out
+++ b/test_regress/t/t_struct_init_bad.out
@@ -1,5 +1,5 @@
-%Error: t/t_struct_init.v:54:40: Assignment pattern contains duplicate entry: b1
+%Error: t/t_struct_init.v:55:40: Assignment pattern contains duplicate entry: b1
                                : ... In instance t
-   54 |    const b4_t b4_const_c = '{b1: 1'b1, b1: 1'b0, b0:1'b0, b2: 1'b1, b3: 1'b1};
+   55 |    const b4_t b4_const_c = '{b1: 1'b1, b1: 1'b0, b0:1'b0, b2: 1'b1, b3: 1'b1};
       |                                        ^~
 %Error: Exiting due to
diff --git a/test_regress/t/t_sys_fread.pl b/test_regress/t/t_sys_fread.pl
index d4594dc84..73bebd44f 100755
--- a/test_regress/t/t_sys_fread.pl
+++ b/test_regress/t/t_sys_fread.pl
@@ -18,8 +18,8 @@ sub gen {
     my $filename = shift;
 
     my $fh = IO::File->new(">$filename");
-    for (my $copy=0; $copy<32; ++$copy) {
-        for (my $i=0; $i<=255; ++$i) {
+    for (my $copy = 0; $copy < 32; ++$copy) {
+        for (my $i = 0; $i <= 255; ++$i) {
             $fh->print(chr($i));
         }
     }
diff --git a/test_regress/t/t_sys_plusargs.v b/test_regress/t/t_sys_plusargs.v
index 015ccfb2c..73600e6cd 100644
--- a/test_regress/t/t_sys_plusargs.v
+++ b/test_regress/t/t_sys_plusargs.v
@@ -103,6 +103,11 @@ module t;
       $display("i='%d'",p_i);
       if (p_i !== 32'd1234) $stop;
 
+      // bug3131 - really "if" side effect test
+      p_i = 0;
+      if ($value$plusargs("INT=%d", p_i)) ;
+      if (p_i !== 32'd1234) $stop;
+
       $write("*-* All Finished *-*\n");
       $finish;
    end
diff --git a/test_regress/t/t_sys_sformat_noopt.pl b/test_regress/t/t_sys_sformat_noopt.pl
index c27062f54..569209da9 100755
--- a/test_regress/t/t_sys_sformat_noopt.pl
+++ b/test_regress/t/t_sys_sformat_noopt.pl
@@ -20,7 +20,7 @@ compile(
 if ($Self->cxx_version =~ /clang/) {
     skip("Known clang bug");
     #Here:   if (VL_UNLIKELY(VL_NEQ_W(12, __Vtemp1, vlSymsp->TOP__t.__PVT__str)))
-} else{
+} else {
     execute(
         check_finished => 1,
         );
diff --git a/test_regress/t/t_threads_crazy.pl b/test_regress/t/t_threads_crazy.pl
index 54641105d..ee8031054 100755
--- a/test_regress/t/t_threads_crazy.pl
+++ b/test_regress/t/t_threads_crazy.pl
@@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(vltmt => 1);
 
 if ($Self->cfg_with_m32) {
-  skip("Does not work with -m32 (resource unavailable)");
+    skip("Does not work with -m32 (resource unavailable)");
 }
 
 compile(
diff --git a/test_regress/t/t_timing_long.pl b/test_regress/t/t_timing_long.pl
index 031575331..eb279f82e 100755
--- a/test_regress/t/t_timing_long.pl
+++ b/test_regress/t/t_timing_long.pl
@@ -34,18 +34,18 @@ sub gen {
     $fh->print("  initial begin\n");
 
     my $n = 100;
-    for (my $i=1; $i<$n; ++$i) {
+    for (my $i = 1; $i < $n; ++$i) {
         # If statement around the timing is important to make the code scheduling
         # mostly unpredictable
-        $fh->printf("    if (cnt == %d) begin\n", $i-1);
+        $fh->printf("    if (cnt == %d) begin\n", $i - 1);
         $fh->printf("      #1; ++cnt; `MSG((\"[%0t] cnt?=${i}\", \$time));"
-                    ." if (cnt != %d) \$stop;\n", $i);
+                    . " if (cnt != %d) \$stop;\n", $i);
         $fh->printf("    end\n");
     }
 
     $fh->print("\n");
-    $fh->print('    $write("*-* All Finished *-*\n");',"\n");
-    $fh->print('    $finish;',"\n");
+    $fh->print('    $write("*-* All Finished *-*\n");', "\n");
+    $fh->print('    $finish;', "\n");
     $fh->print("  end\n");
     $fh->print("endmodule\n");
 }
@@ -55,8 +55,8 @@ top_filename("$Self->{obj_dir}/t_timing_long.v");
 gen($Self->{top_filename});
 
 compile(
-    #verilator_flags2=>["--exe --build --main --timing"],  # Unsupported
-    verilator_flags2=>["--exe --build --main -Wno-STMTDLY"],
+    # verilator_flags2 => ["--exe --build --main --timing"],  # Unsupported
+    verilator_flags2 => ["--exe --build --main -Wno-STMTDLY"],
     verilator_make_cmake => 0,
     verilator_make_gmake => 0,
     make_main => 0,
diff --git a/test_regress/t/t_trace_cat.pl b/test_regress/t/t_trace_cat.pl
index 78832a416..a34973212 100755
--- a/test_regress/t/t_trace_cat.pl
+++ b/test_regress/t/t_trace_cat.pl
@@ -21,7 +21,7 @@ execute(
     );
 
 system("cat $Self->{obj_dir}/simpart_0000.vcd "
-       ." $Self->{obj_dir}/simpart_0000_cat*.vcd > $Self->{obj_dir}/simall.vcd");
+       . " $Self->{obj_dir}/simpart_0000_cat*.vcd > $Self->{obj_dir}/simall.vcd");
 
 vcd_identical("$Self->{obj_dir}/simall.vcd",
               $Self->{golden_filename});
diff --git a/test_regress/t/t_trace_complex.pl b/test_regress/t/t_trace_complex.pl
index 8400cbf09..f02e405e8 100755
--- a/test_regress/t/t_trace_complex.pl
+++ b/test_regress/t/t_trace_complex.pl
@@ -18,17 +18,17 @@ execute(
     check_finished => 1,
     );
 
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
 
-vcd_identical ("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
+vcd_identical("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
 
 ok(1);
 1;
diff --git a/test_regress/t/t_trace_complex_old_api.pl b/test_regress/t/t_trace_complex_old_api.pl
index 4150e31af..8136d3f79 100755
--- a/test_regress/t/t_trace_complex_old_api.pl
+++ b/test_regress/t/t_trace_complex_old_api.pl
@@ -23,15 +23,15 @@ execute(
     check_finished => 1,
     );
 
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
 
 vcd_identical("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
 
diff --git a/test_regress/t/t_trace_complex_params.pl b/test_regress/t/t_trace_complex_params.pl
index 04c792441..8f72740a0 100755
--- a/test_regress/t/t_trace_complex_params.pl
+++ b/test_regress/t/t_trace_complex_params.pl
@@ -20,7 +20,7 @@ execute(
     check_finished => 1,
     );
 
-file_grep    ("$Self->{obj_dir}/simx.vcd", qr/ PARAM /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ PARAM /);
 
 vcd_identical("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
 
diff --git a/test_regress/t/t_trace_complex_portable.pl b/test_regress/t/t_trace_complex_portable.pl
index 56c794042..8095d31aa 100755
--- a/test_regress/t/t_trace_complex_portable.pl
+++ b/test_regress/t/t_trace_complex_portable.pl
@@ -23,15 +23,15 @@ execute(
     check_finished => 1,
     );
 
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
 
 vcd_identical("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
 
diff --git a/test_regress/t/t_trace_complex_structs.pl b/test_regress/t/t_trace_complex_structs.pl
index dbbf55c04..8bf726485 100755
--- a/test_regress/t/t_trace_complex_structs.pl
+++ b/test_regress/t/t_trace_complex_structs.pl
@@ -20,17 +20,17 @@ execute(
     check_finished => 1,
     );
 
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
-file_grep_not ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
-file_grep_not ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
+file_grep_not("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
+file_grep_not("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
 
-vcd_identical ("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
+vcd_identical("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
 
 ok(1);
 1;
diff --git a/test_regress/t/t_trace_complex_threads_1.pl b/test_regress/t/t_trace_complex_threads_1.pl
index 9a6e6d621..ae50a144d 100755
--- a/test_regress/t/t_trace_complex_threads_1.pl
+++ b/test_regress/t/t_trace_complex_threads_1.pl
@@ -21,17 +21,17 @@ execute(
     check_finished => 1,
     );
 
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
-file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_strp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_arrp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arrp_strp /);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arru\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_arrp\[/);
+file_grep("$Self->{obj_dir}/simx.vcd", qr/ v_arru_strp\[/);
 
-vcd_identical ("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
+vcd_identical("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
 
 ok(1);
 1;
diff --git a/test_regress/t/t_trace_ena_cc.pl b/test_regress/t/t_trace_ena_cc.pl
index ea30e0880..94c44cb0d 100755
--- a/test_regress/t/t_trace_ena_cc.pl
+++ b/test_regress/t/t_trace_ena_cc.pl
@@ -21,10 +21,10 @@ execute(
     );
 
 if ($Self->{vlt_all}) {
-    file_grep     ("$Self->{obj_dir}/V$Self->{name}__Trace__0__Slow.cpp", qr/c_trace_on\"/x);
-    file_grep_not ("$Self->{obj_dir}/V$Self->{name}__Trace__0__Slow.cpp", qr/_trace_off\"/x);
-    file_grep     ("$Self->{obj_dir}/simx.vcd", qr/\$enddefinitions/x);
-    file_grep_not ("$Self->{obj_dir}/simx.vcd", qr/inside_sub/x);
+    file_grep("$Self->{obj_dir}/V$Self->{name}__Trace__0__Slow.cpp", qr/c_trace_on\"/x);
+    file_grep_not("$Self->{obj_dir}/V$Self->{name}__Trace__0__Slow.cpp", qr/_trace_off\"/x);
+    file_grep("$Self->{obj_dir}/simx.vcd", qr/\$enddefinitions/x);
+    file_grep_not("$Self->{obj_dir}/simx.vcd", qr/inside_sub/x);
 
     vcd_identical("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
 }
diff --git a/test_regress/t/t_trace_ena_sc.pl b/test_regress/t/t_trace_ena_sc.pl
index 6a493841e..73e694095 100755
--- a/test_regress/t/t_trace_ena_sc.pl
+++ b/test_regress/t/t_trace_ena_sc.pl
@@ -26,7 +26,7 @@ else {
 
     if ($Self->{vlt_all}) {
         # Note more checks in _cc.pl
-        file_grep    ("$Self->{obj_dir}/simx.vcd", qr/\$enddefinitions/x);
+        file_grep("$Self->{obj_dir}/simx.vcd", qr/\$enddefinitions/x);
 
         vcd_identical("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
     }
diff --git a/test_regress/t/t_unicode.pl b/test_regress/t/t_unicode.pl
index 5d143bbdc..836641e83 100755
--- a/test_regress/t/t_unicode.pl
+++ b/test_regress/t/t_unicode.pl
@@ -23,12 +23,12 @@ sub gen {
     $fh->print("// Generated by t_unicode.pl\n");
     $fh->print("module t;\n");
     $fh->print("   // Chinese "
-               .chr(0xe8).chr(0xaf).chr(0x84).chr(0xe8).chr(0xae).chr(0xba)  # Comment
-               ."\n");
+               . chr(0xe8) . chr(0xaf) . chr(0x84) . chr(0xe8) . chr(0xae) . chr(0xba)  # Comment
+               . "\n");
     $fh->print("   initial begin\n");
     $fh->print("      \$write(\"Hello "
-               .chr(0xe4).chr(0xb8).chr(0x96).chr(0xe7).chr(0x95).chr(0x8c)  # World
-               ."\\n\");\n");
+               . chr(0xe4) . chr(0xb8) . chr(0x96) . chr(0xe7) . chr(0x95) . chr(0x8c)  # World
+               . "\\n\");\n");
     $fh->print("      \$write(\"*-* All Finished *-*\\n\");\n");
     $fh->print("      \$finish;\n");
     $fh->print("   end\n");
diff --git a/test_regress/t/t_unopt_array.pl b/test_regress/t/t_unopt_array.pl
index 6361d5df0..6bef1be6c 100755
--- a/test_regress/t/t_unopt_array.pl
+++ b/test_regress/t/t_unopt_array.pl
@@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
-    verilator_flags2=>["-Wno-UNOPTFLAT"],
+    verilator_flags2 => ["-Wno-UNOPTFLAT"],
     );
 
 execute(
diff --git a/test_regress/t/t_unroll_genf.v b/test_regress/t/t_unroll_genf.v
index 70d4852c7..3d89bdeb9 100644
--- a/test_regress/t/t_unroll_genf.v
+++ b/test_regress/t/t_unroll_genf.v
@@ -19,7 +19,7 @@ module t (/*AUTOARG*/);
    genvar j;
    generate
       for (j = 0; j < cdiv(10); j=j+1)
-        sub sub();
+        sub #() sub ();  // #() for code coverage in verilog.y
    endgenerate
 
    initial begin
diff --git a/test_regress/t/t_urandom.v b/test_regress/t/t_urandom.v
index fcb6b7b9d..bd865b990 100644
--- a/test_regress/t/t_urandom.v
+++ b/test_regress/t/t_urandom.v
@@ -37,6 +37,9 @@ module t(/*AUTOARG*/);
       v2 = $urandom_range(v1, v1);
       if (v1 != v2) $stop;
 
+      v2 = $urandom_range(0, 32'hffffffff);
+      if (v2 == v1) $stop;
+
       for (int test = 0; test < 20; ++test) begin
          v1 = 2;
          v1 = $urandom_range(0, v1);
diff --git a/test_regress/t/t_var_escape.pl b/test_regress/t/t_var_escape.pl
index e8c85b61e..cb56ec58b 100755
--- a/test_regress/t/t_var_escape.pl
+++ b/test_regress/t/t_var_escape.pl
@@ -20,13 +20,12 @@ execute(
     );
 
 if ($Self->{vlt_all}) {
-    file_grep     ("$Self->{obj_dir}/simx.vcd", qr/\$enddefinitions/x);
+    file_grep("$Self->{obj_dir}/simx.vcd", qr/\$enddefinitions/x);
     my $sig = quotemeta("bra[ket]slash/dash-colon:9");
-    file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ $sig/);
-    file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ other\.cyc /);
-    file_grep     ("$Self->{obj_dir}/simx.vcd", qr/ module mod\.with_dot /);
-    vcd_identical ("$Self->{obj_dir}/simx.vcd",
-                   $Self->{golden_filename});
+    file_grep("$Self->{obj_dir}/simx.vcd", qr/ $sig/);
+    file_grep("$Self->{obj_dir}/simx.vcd", qr/ other\.cyc /);
+    file_grep("$Self->{obj_dir}/simx.vcd", qr/ module mod\.with_dot /);
+    vcd_identical("$Self->{obj_dir}/simx.vcd", $Self->{golden_filename});
 }
 
 ok(1);
diff --git a/test_regress/t/t_var_rsvd_port.pl b/test_regress/t/t_var_rsvd_port.pl
index de2083cc5..76a08e9aa 100755
--- a/test_regress/t/t_var_rsvd_port.pl
+++ b/test_regress/t/t_var_rsvd_port.pl
@@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(simulator => 1);
 
 compile(
-    verilator_flags2=>["-Wno-SYMRSVDWORD"],
+    verilator_flags2 => ["-Wno-SYMRSVDWORD"],
     );
 
 execute();
diff --git a/test_regress/t/t_verilated_all.pl b/test_regress/t/t_verilated_all.pl
index 44b2d3a72..14ef03202 100755
--- a/test_regress/t/t_verilated_all.pl
+++ b/test_regress/t/t_verilated_all.pl
@@ -21,10 +21,15 @@ compile(
                           ? "--threads 2 $root/include/verilated_threads.cpp" : ""),
                          ($Self->cfg_with_threaded
                           ? "--trace-threads 1" : ""),
+                         ($Self->cfg_with_threaded
+                          ? "--prof-threads" : ""),
                          "$root/include/verilated_save.cpp"],
     );
 
 execute(
+    all_run_flags => [" +verilator+prof+threads+file+/dev/null",
+                      " +verilator+prof+vlt+file+/dev/null",
+                      ],
     check_finished => 1,
     );
 
@@ -55,7 +60,7 @@ foreach my $file (sort keys %hit) {
         && $file !~ /_fst/
         && $file !~ /_heavy/
         && ($file !~ /_thread/ || $Self->cfg_with_threaded)) {
-        error("Include file not covered by t_verilated_all test: ",$file);
+        error("Include file not covered by t_verilated_all test: ", $file);
     }
 }
 
diff --git a/test_regress/t/t_verilated_all_newest.pl b/test_regress/t/t_verilated_all_newest.pl
index 1df755ae6..08b1f3838 100755
--- a/test_regress/t/t_verilated_all_newest.pl
+++ b/test_regress/t/t_verilated_all_newest.pl
@@ -21,6 +21,9 @@ compile(
     );
 
 execute(
+    all_run_flags => [" +verilator+prof+threads+file+/dev/null",
+                      " +verilator+prof+vlt+file+/dev/null",
+                      ],
     check_finished => 1,
     );
 
diff --git a/test_regress/t/t_verilated_all_oldest.pl b/test_regress/t/t_verilated_all_oldest.pl
index db800f7b0..103e2e197 100755
--- a/test_regress/t/t_verilated_all_oldest.pl
+++ b/test_regress/t/t_verilated_all_oldest.pl
@@ -11,6 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
 scenarios(dist => 1);
 
 # This test now does nothing, because using DRIVER_STD=oldest tends to blow up glibc.
+# Support for DRIVER_STD=oldest was removed from makefiles. This file remains for commentary.
 
 ok(1);
 1;
diff --git a/test_regress/t/t_vpi_get_public_rw_switch.pl b/test_regress/t/t_vpi_get_public_rw_switch.pl
index 2142c3afc..3a0123177 100755
--- a/test_regress/t/t_vpi_get_public_rw_switch.pl
+++ b/test_regress/t/t_vpi_get_public_rw_switch.pl
@@ -21,8 +21,8 @@ compile(
     make_top_shell => 0,
     make_main => 0,
     verilator_flags2 => ["--exe --vpi"
-                         ." --public-flat-rw --prefix Vt_vpi_get --no-l2name"
-                         ." $Self->{t_dir}/t_vpi_get.cpp"],
+                         . " --public-flat-rw --prefix Vt_vpi_get --no-l2name"
+                         . " $Self->{t_dir}/t_vpi_get.cpp"],
     make_pli => 1,
     iv_flags2 => ["-g2005-sv -D USE_VPI_NOT_DPI"],
     v_flags2 => ["+define+USE_VPI_NOT_DPI"],
diff --git a/test_regress/t/t_vpi_zero_time_cb.cpp b/test_regress/t/t_vpi_zero_time_cb.cpp
index 865472719..9336d60d3 100644
--- a/test_regress/t/t_vpi_zero_time_cb.cpp
+++ b/test_regress/t/t_vpi_zero_time_cb.cpp
@@ -136,7 +136,7 @@ int main(int argc, char** argv, char** env) {
         void* lib = dlopen(filenamep, RTLD_LAZY);
         void* bootstrap = dlsym(lib, "vpi_compat_bootstrap");
         if (!bootstrap) {
-            std::string msg = std::string("%Error: Could not dlopen ") + filenamep;
+            const std::string msg = std::string("%Error: Could not dlopen ") + filenamep;
             vl_fatal(__FILE__, __LINE__, "main", msg.c_str());
         }
         ((void (*)(void))bootstrap)();
diff --git a/test_regress/t/t_wire_beh1364_bad.pl b/test_regress/t/t_wire_beh1364_bad.pl
index ee98f2262..6a33667e2 100755
--- a/test_regress/t/t_wire_beh1364_bad.pl
+++ b/test_regress/t/t_wire_beh1364_bad.pl
@@ -19,7 +19,7 @@ lint(
     );
 
 if (-e $waiver_filename) {
-    error("Waiver file generated, not expected..");
+    error("Waiver file generated, not expected");
 }
 
 ok(1);
diff --git a/test_regress/t/t_wrapper_context.cpp b/test_regress/t/t_wrapper_context.cpp
index d898c212d..9e203673c 100644
--- a/test_regress/t/t_wrapper_context.cpp
+++ b/test_regress/t/t_wrapper_context.cpp
@@ -11,11 +11,14 @@
 
 #include <verilated.h>
 #include <verilated_cov.h>
+#include "TestCheck.h"
 
 #include VM_PREFIX_INCLUDE
 
 double sc_time_stamp() { return 0; }
 
+int errors = 0;
+
 VerilatedMutex outputMutex;
 
 #ifdef T_WRAPPER_CONTEXT
@@ -89,6 +92,14 @@ int main(int argc, char** argv, char** env) {
     context0p->traceEverOn(true);
     context1p->traceEverOn(true);
 
+    // error number checks
+    TEST_CHECK_EQ(context0p->errorCount(), 0);
+    TEST_CHECK_EQ(context1p->errorCount(), 0);
+    context0p->errorCount(1);
+    TEST_CHECK_EQ(context0p->errorCount(), 1);
+    context0p->errorCount(0);
+    TEST_CHECK_EQ(context0p->errorCount(), 0);
+
     // instantiate verilated design
     std::unique_ptr<VM_PREFIX> top0p{new VM_PREFIX{context0p.get(), "top0"}};
     std::unique_ptr<VM_PREFIX> top1p{new VM_PREFIX{context1p.get(), "top1"}};
@@ -108,7 +119,10 @@ int main(int argc, char** argv, char** env) {
 
     // check if both finished
     bool pass = true;
-    if (top0p->done_o && top1p->done_o) {
+    if (errors) {
+        std::cout << "Error: comparison errors" << std::endl;
+        pass = false;
+    } else if (top0p->done_o && top1p->done_o) {
         std::cout << "*-* All Finished *-*" << std::endl;
     } else {
         std::cout << "Error: Early termination!" << std::endl;
diff --git a/test_regress/t/t_x_assign_0.pl b/test_regress/t/t_x_assign_0.pl
index 6f9e1347f..aa0614025 100755
--- a/test_regress/t/t_x_assign_0.pl
+++ b/test_regress/t/t_x_assign_0.pl
@@ -13,8 +13,8 @@ scenarios(vlt_all => 1);
 top_filename("t/t_x_assign.v");
 
 compile(
-    make_top_shell   => 0,
-    make_main        => 0,
+    make_top_shell => 0,
+    make_main => 0,
     verilator_flags2 => ["--x-assign 0 --exe $Self->{t_dir}/t_x_assign.cpp"],
     );
 
diff --git a/test_regress/t/t_x_assign_1.pl b/test_regress/t/t_x_assign_1.pl
index 2a6ffe0a8..d55fbc7a0 100755
--- a/test_regress/t/t_x_assign_1.pl
+++ b/test_regress/t/t_x_assign_1.pl
@@ -13,8 +13,8 @@ scenarios(vlt_all => 1);
 top_filename("t/t_x_assign.v");
 
 compile(
-    make_top_shell   => 0,
-    make_main        => 0,
+    make_top_shell => 0,
+    make_main => 0,
     verilator_flags2 => ["--x-assign 1 --exe $Self->{t_dir}/t_x_assign.cpp"],
     );
 
diff --git a/test_regress/t/t_x_assign_unique_0.pl b/test_regress/t/t_x_assign_unique_0.pl
index bef9e2495..675fbbd33 100755
--- a/test_regress/t/t_x_assign_unique_0.pl
+++ b/test_regress/t/t_x_assign_unique_0.pl
@@ -13,8 +13,8 @@ scenarios(vlt_all => 1);
 top_filename("t/t_x_assign.v");
 
 compile(
-    make_top_shell   => 0,
-    make_main        => 0,
+    make_top_shell => 0,
+    make_main => 0,
     verilator_flags2 => ["--x-assign unique --exe $Self->{t_dir}/t_x_assign.cpp"],
     );
 
diff --git a/test_regress/t/t_x_assign_unique_1.pl b/test_regress/t/t_x_assign_unique_1.pl
index bef9e2495..675fbbd33 100755
--- a/test_regress/t/t_x_assign_unique_1.pl
+++ b/test_regress/t/t_x_assign_unique_1.pl
@@ -13,8 +13,8 @@ scenarios(vlt_all => 1);
 top_filename("t/t_x_assign.v");
 
 compile(
-    make_top_shell   => 0,
-    make_main        => 0,
+    make_top_shell => 0,
+    make_main => 0,
     verilator_flags2 => ["--x-assign unique --exe $Self->{t_dir}/t_x_assign.cpp"],
     );
 
diff --git a/test_regress/t/t_xml_debugcheck.pl b/test_regress/t/t_xml_debugcheck.pl
index 392d9a60c..4d1491a1b 100755
--- a/test_regress/t/t_xml_debugcheck.pl
+++ b/test_regress/t/t_xml_debugcheck.pl
@@ -29,9 +29,9 @@ file_grep("$out_filename", qr/<constpool /x);
 file_grep("$out_filename", qr/<inititem /x);
 file_grep("$out_filename", qr/<if /x);
 file_grep("$out_filename", qr/<while /x);
-file_grep("$out_filename", qr/<begin>/x); # for <if> and <while>
-file_grep("$out_filename", qr/ signed=/x); # for <basicdtype>
-file_grep("$out_filename", qr/ func=/x); # for <ccall>
+file_grep("$out_filename", qr/<begin>/x);  # for <if> and <while>
+file_grep("$out_filename", qr/ signed=/x);  # for <basicdtype>
+file_grep("$out_filename", qr/ func=/x);  # for <ccall>
 
 ok(1);
 1;