CI: Improve RTLMeter PR report (#7740)

- If any simulated cycle counts mismatch, report them and fail the job
- Keep "Elapsed" only in cppbuild
- Ignore "Elapsed" and "CPU" samples less than 30 seconds - too noisy
- Widen acceptable ranges on "Elapsed" and "Memory"
- Add badge legend
This commit is contained in:
Geza Lore 2026-06-08 22:22:52 +01:00 committed by GitHub
parent a3827182c0
commit c5945021c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 72 additions and 31 deletions

View File

@ -343,14 +343,17 @@ jobs:
path: verilator
- name: Create report
id: report
working-directory: verilator
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
ln -s ../rtlmeter rtlmeter
gh repo set-default ${{ github.repository }}
# Compare to last successful scheduled run
ci/ci-rtlmeter-report.bash ${{ github.run_id }} ${{ github.sha }} ${{ needs.combine-results.outputs.run-tags }}
# Create run report - save status to fail job if the script did
STATUS=0
ci/ci-rtlmeter-report.bash ${{ github.run_id }} ${{ github.sha }} ${{ needs.combine-results.outputs.run-tags }} || STATUS=$?
echo "status=$STATUS" >> "$GITHUB_OUTPUT"
# Create the report artifact
mkdir ../report-artifact
mv rtlmeter-report/report ../report-artifact/
@ -372,6 +375,9 @@ jobs:
path: notification-artifact
name: pr-notification
- name: Report status
run: exit ${{ steps.report.outputs.status }}
# Create GitHub issue for failed scheduled jobs
# This should always be the last job (we want an issue if anything breaks)
create-issue:

View File

@ -82,8 +82,9 @@ for r in $RUNS; do
awk -v RS= -v prefix=$TMP_DIR/$r-frag '{print > sprintf("%s-execute-%02d.txt" ,prefix,NR)}' $TMP_DIR/execute-$r.txt
done
# Create summary
venv/bin/python3 $SCRIPT_DIR/ci-rtlmeter-report.py ${SUMMARY_ARGS[@]} > $TMP_DIR/summary.txt
# Create summary, suppress failure, but save the status reported to pass back.
STATUS=0
venv/bin/python3 $SCRIPT_DIR/ci-rtlmeter-report.py ${SUMMARY_ARGS[@]} > $TMP_DIR/summary.txt || STATUS=$?
# Print it
cat $TMP_DIR/summary.txt
@ -158,3 +159,5 @@ $(cat ${TMP_DIR}/body.html)
</html>
INDEX_TEMPLATE
exit $STATUS

View File

@ -33,21 +33,29 @@ def printTable(table: List[List[str]], **kwargs) -> None:
# fmt: off
stepMetric = (
("verilate", "elapsed"),
("verilate", "memory"),
("verilate", "cpu"),
("cppbuild", "elapsed"),
("cppbuild", "memory"),
("cppbuild", "cpu"),
("cppbuild", "codeSize"),
("execute", "speed"),
("execute", "clocks"),
("execute", "memory"),
("execute", "cpu"),
# Step, Metric, Status Brackets
("execute", "speed", ("", 0.96, "⚠️", 0.98, "", 1.02, "💡", 1.04, "")),
("execute", "memory", ("", 0.90, "⚠️", 0.95, "", 1.05, "💡", 1.10, "")),
("verilate", "cpu", ("", 0.96, "⚠️", 0.98, "", 1.02, "💡", 1.04, "")),
("verilate", "memory", ("", 0.90, "⚠️", 0.95, "", 1.05, "💡", 1.10, "")),
("cppbuild", "cpu", ("", 0.96, "⚠️", 0.98, "", 1.02, "💡", 1.04, "")),
("cppbuild", "memory", ("", 0.90, "⚠️", 0.95, "", 1.05, "💡", 1.10, "")),
("cppbuild", "codeSize",("", 0.96, "⚠️", 0.98, "", 1.02, "💡", 1.04, "")),
("cppbuild", "elapsed", ("", 0.70, "⚠️", 0.85, "", 1.15, "💡", 1.30, "")),
)
badgeLegend = [
("", "Likely regression"),
("⚠️", "Possible regression"),
("", "Within acceptable range"),
("💡", "Possible improvement"),
("", "Likely improvement"),
]
# fmt: on
changedCycles = []
table = []
badgesToExplain = set()
for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
with open(ref, "r", encoding="utf-8") as f:
@ -57,7 +65,17 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
if table:
table.append(tabulate.SEPARATING_LINE)
runName = ref_json["runName"]
for step, metric in stepMetric:
# Check simulated cycles match - it's ok to crash if this entry does not exist
for entry in cmp_json["execute"]["clocks"]["table"]:
case, _, _, (refCycles, _), (newCycles, _), _, _ = entry
refCycles = int(refCycles)
newCycles = int(newCycles)
if refCycles != newCycles:
changedCycles.append([runName, case, refCycles, newCycles])
# Check metrics
for step, metric, brackets in stepMetric:
if step not in cmp_json:
continue
data = cmp_json[step]
@ -68,7 +86,11 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
maxGain = float("-inf")
meanGain = 1
count = 0
for _, _, _, _, _, g, _ in data["table"]:
for _, _, _, (a, _), (b, _), g, _ in data["table"]:
# for wall clock and CPU time, ignore small values that just add noise
if metric == "elapsed" or metric == "cpu":
if a < 30 or b < 30:
continue
minGain = min(minGain, g)
maxGain = max(maxGain, g)
meanGain *= g
@ -77,29 +99,39 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
continue
meanGain = meanGain**(1 / count)
if metric == "clocks":
# Clock cycles should match exactly
status = "" if minGain != 1 or maxGain != 1 else ""
else:
# Otherwise use some arbitrary brackets
status = ""
if (meanGain > 0.95):
status = "⚠️"
if (meanGain > 0.98):
status = ""
if (meanGain > 1.02):
status = "💡"
if (meanGain > 1.05):
status = ""
status = brackets[0]
for limit, badge in zip(brackets[1::2], brackets[2::2]):
if meanGain >= limit:
status = badge
badgesToExplain.add(status)
table.append([
runName, step, ref_json["metrics"][metric]["header"], f"{meanGain:.2f}x {status} ",
f"{minGain:.2f}x", f"{maxGain:.2f}x", f"{count}"
])
# Print changed cycles if any
if changedCycles:
print("❌ simulated cycles changed (must be fixed):")
printTable(changedCycles,
headers=("Run", "Case", "Old Cycles", "New Cycles"),
colalign=("left", "left", "right", "right"),
disable_numparse=True)
print()
# Print results
printTable(
table,
headers=("Run", "Step", "Metric", "Improvement", "Min", "Max", "Samples"),
colalign=("left", "left", "left", "right", "right", "right", "right"),
disable_numparse=True,
)
# Explain badges
print()
for badge, legend in badgeLegend:
if badge in badgesToExplain:
print(f" {badge} : {legend}")
# Fail job if status changed
sys.exit(0 if not changedCycles else 1)