CI: Improve RTLMeter PR report (#7740)

- If any simulated cycle counts mismatch, report them and fail the job - Keep "Elapsed" only in cppbuild - Ignore "Elapsed" and "CPU" samples less than 30 seconds - too noisy - Widen acceptable ranges on "Elapsed" and "Memory" - Add badge legend
2026-06-08 22:22:52 +01:00 · 2026-06-08 22:22:52 +01:00 · c5945021c1
parent a3827182c0
commit c5945021c1
3 changed files with 72 additions and 31 deletions
--- a/.github/workflows/rtlmeter.yml
+++ b/.github/workflows/rtlmeter.yml
@ -343,14 +343,17 @@ jobs:
          path: verilator

      - name: Create report
+        id: report
        working-directory: verilator
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          ln -s ../rtlmeter rtlmeter
          gh repo set-default ${{ github.repository }}
-          # Compare to last successful scheduled run
-          ci/ci-rtlmeter-report.bash ${{ github.run_id }} ${{ github.sha }} ${{ needs.combine-results.outputs.run-tags }}
+          # Create run report - save status to fail job if the script did
+          STATUS=0
+          ci/ci-rtlmeter-report.bash ${{ github.run_id }} ${{ github.sha }} ${{ needs.combine-results.outputs.run-tags }} || STATUS=$?
+          echo "status=$STATUS" >> "$GITHUB_OUTPUT"
          # Create the report artifact
          mkdir ../report-artifact
          mv rtlmeter-report/report ../report-artifact/
@ -372,6 +375,9 @@ jobs:
          path: notification-artifact
          name: pr-notification

+      - name: Report status
+        run: exit ${{ steps.report.outputs.status }}
+
  # Create GitHub issue for failed scheduled jobs
  # This should always be the last job (we want an issue if anything breaks)
  create-issue:
--- a/ci/ci-rtlmeter-report.bash
+++ b/ci/ci-rtlmeter-report.bash
@ -82,8 +82,9 @@ for r in $RUNS; do
  awk -v RS= -v prefix=$TMP_DIR/$r-frag '{print > sprintf("%s-execute-%02d.txt" ,prefix,NR)}' $TMP_DIR/execute-$r.txt
 done

-# Create summary
-venv/bin/python3 $SCRIPT_DIR/ci-rtlmeter-report.py ${SUMMARY_ARGS[@]} > $TMP_DIR/summary.txt
+# Create summary, suppress failure, but save the status reported to pass back.
+STATUS=0
+venv/bin/python3 $SCRIPT_DIR/ci-rtlmeter-report.py ${SUMMARY_ARGS[@]} > $TMP_DIR/summary.txt || STATUS=$?
 # Print it
 cat $TMP_DIR/summary.txt

@ -158,3 +159,5 @@ $(cat ${TMP_DIR}/body.html)

 </html>
 INDEX_TEMPLATE
+
+exit $STATUS
--- a/ci/ci-rtlmeter-report.py
+++ b/ci/ci-rtlmeter-report.py
@ -33,21 +33,29 @@ def printTable(table: List[List[str]], **kwargs) -> None:

 # fmt: off
 stepMetric = (
-    ("verilate", "elapsed"),
-    ("verilate", "memory"),
-    ("verilate", "cpu"),
-    ("cppbuild", "elapsed"),
-    ("cppbuild", "memory"),
-    ("cppbuild", "cpu"),
-    ("cppbuild", "codeSize"),
-    ("execute", "speed"),
-    ("execute", "clocks"),
-    ("execute", "memory"),
-    ("execute", "cpu"),
+    # Step,      Metric,    Status Brackets
+    ("execute",  "speed",   ("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")),
+    ("execute",  "memory",  ("❌", 0.90, "⚠️", 0.95, "✅", 1.05, "💡", 1.10, "⭐")),
+    ("verilate", "cpu",     ("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")),
+    ("verilate", "memory",  ("❌", 0.90, "⚠️", 0.95, "✅", 1.05, "💡", 1.10, "⭐")),
+    ("cppbuild", "cpu",     ("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")),
+    ("cppbuild", "memory",  ("❌", 0.90, "⚠️", 0.95, "✅", 1.05, "💡", 1.10, "⭐")),
+    ("cppbuild", "codeSize",("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")),
+    ("cppbuild", "elapsed", ("❌", 0.70, "⚠️", 0.85, "✅", 1.15, "💡", 1.30, "⭐")),
 )
+
+badgeLegend = [
+    ("❌", "Likely regression"),
+    ("⚠️", "Possible regression"),
+    ("✅", "Within acceptable range"),
+    ("💡", "Possible improvement"),
+    ("⭐", "Likely improvement"),
+]
 # fmt: on

+changedCycles = []
 table = []
+badgesToExplain = set()

 for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
    with open(ref, "r", encoding="utf-8") as f:
@ -57,7 +65,17 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
    if table:
        table.append(tabulate.SEPARATING_LINE)
    runName = ref_json["runName"]
-    for step, metric in stepMetric:
+
+    # Check simulated cycles match - it's ok to crash if this entry does not exist
+    for entry in cmp_json["execute"]["clocks"]["table"]:
+        case, _, _, (refCycles, _), (newCycles, _), _, _ = entry
+        refCycles = int(refCycles)
+        newCycles = int(newCycles)
+        if refCycles != newCycles:
+            changedCycles.append([runName, case, refCycles, newCycles])
+
+    # Check metrics
+    for step, metric, brackets in stepMetric:
        if step not in cmp_json:
            continue
        data = cmp_json[step]
@ -68,7 +86,11 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
        maxGain = float("-inf")
        meanGain = 1
        count = 0
-        for _, _, _, _, _, g, _ in data["table"]:
+        for _, _, _, (a, _), (b, _), g, _ in data["table"]:
+            # for wall clock and CPU time, ignore small values that just add noise
+            if metric == "elapsed" or metric == "cpu":
+                if a < 30 or b < 30:
+                    continue
            minGain = min(minGain, g)
            maxGain = max(maxGain, g)
            meanGain *= g
@ -77,29 +99,39 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
            continue
        meanGain = meanGain**(1 / count)

-        if metric == "clocks":
-            # Clock cycles should match exactly
-            status = "❌" if minGain != 1 or maxGain != 1 else "✅"
-        else:
-            # Otherwise use some arbitrary brackets
-            status = "❌"
-            if (meanGain > 0.95):
-                status = "⚠️"
-            if (meanGain > 0.98):
-                status = "✅"
-            if (meanGain > 1.02):
-                status = "💡"
-            if (meanGain > 1.05):
-                status = "⭐"
+        status = brackets[0]
+        for limit, badge in zip(brackets[1::2], brackets[2::2]):
+            if meanGain >= limit:
+                status = badge
+        badgesToExplain.add(status)

        table.append([
            runName, step, ref_json["metrics"][metric]["header"], f"{meanGain:.2f}x  {status} ",
            f"{minGain:.2f}x", f"{maxGain:.2f}x", f"{count}"
        ])

+# Print changed cycles if any
+if changedCycles:
+    print("❌ simulated cycles changed (must be fixed):")
+    printTable(changedCycles,
+               headers=("Run", "Case", "Old Cycles", "New Cycles"),
+               colalign=("left", "left", "right", "right"),
+               disable_numparse=True)
+    print()
+
+# Print results
 printTable(
    table,
    headers=("Run", "Step", "Metric", "Improvement", "Min", "Max", "Samples"),
    colalign=("left", "left", "left", "right", "right", "right", "right"),
    disable_numparse=True,
 )
+
+# Explain badges
+print()
+for badge, legend in badgeLegend:
+    if badge in badgesToExplain:
+        print(f"  {badge} : {legend}")
+
+# Fail job if status changed
+sys.exit(0 if not changedCycles else 1)