CI: Improve RTLMeter PR report (#7740)
- If any simulated cycle counts mismatch, report them and fail the job - Keep "Elapsed" only in cppbuild - Ignore "Elapsed" and "CPU" samples less than 30 seconds - too noisy - Widen acceptable ranges on "Elapsed" and "Memory" - Add badge legend
This commit is contained in:
parent
a3827182c0
commit
c5945021c1
|
|
@ -343,14 +343,17 @@ jobs:
|
|||
path: verilator
|
||||
|
||||
- name: Create report
|
||||
id: report
|
||||
working-directory: verilator
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
ln -s ../rtlmeter rtlmeter
|
||||
gh repo set-default ${{ github.repository }}
|
||||
# Compare to last successful scheduled run
|
||||
ci/ci-rtlmeter-report.bash ${{ github.run_id }} ${{ github.sha }} ${{ needs.combine-results.outputs.run-tags }}
|
||||
# Create run report - save status to fail job if the script did
|
||||
STATUS=0
|
||||
ci/ci-rtlmeter-report.bash ${{ github.run_id }} ${{ github.sha }} ${{ needs.combine-results.outputs.run-tags }} || STATUS=$?
|
||||
echo "status=$STATUS" >> "$GITHUB_OUTPUT"
|
||||
# Create the report artifact
|
||||
mkdir ../report-artifact
|
||||
mv rtlmeter-report/report ../report-artifact/
|
||||
|
|
@ -372,6 +375,9 @@ jobs:
|
|||
path: notification-artifact
|
||||
name: pr-notification
|
||||
|
||||
- name: Report status
|
||||
run: exit ${{ steps.report.outputs.status }}
|
||||
|
||||
# Create GitHub issue for failed scheduled jobs
|
||||
# This should always be the last job (we want an issue if anything breaks)
|
||||
create-issue:
|
||||
|
|
|
|||
|
|
@ -82,8 +82,9 @@ for r in $RUNS; do
|
|||
awk -v RS= -v prefix=$TMP_DIR/$r-frag '{print > sprintf("%s-execute-%02d.txt" ,prefix,NR)}' $TMP_DIR/execute-$r.txt
|
||||
done
|
||||
|
||||
# Create summary
|
||||
venv/bin/python3 $SCRIPT_DIR/ci-rtlmeter-report.py ${SUMMARY_ARGS[@]} > $TMP_DIR/summary.txt
|
||||
# Create summary, suppress failure, but save the status reported to pass back.
|
||||
STATUS=0
|
||||
venv/bin/python3 $SCRIPT_DIR/ci-rtlmeter-report.py ${SUMMARY_ARGS[@]} > $TMP_DIR/summary.txt || STATUS=$?
|
||||
# Print it
|
||||
cat $TMP_DIR/summary.txt
|
||||
|
||||
|
|
@ -158,3 +159,5 @@ $(cat ${TMP_DIR}/body.html)
|
|||
|
||||
</html>
|
||||
INDEX_TEMPLATE
|
||||
|
||||
exit $STATUS
|
||||
|
|
|
|||
|
|
@ -33,21 +33,29 @@ def printTable(table: List[List[str]], **kwargs) -> None:
|
|||
|
||||
# fmt: off
|
||||
stepMetric = (
|
||||
("verilate", "elapsed"),
|
||||
("verilate", "memory"),
|
||||
("verilate", "cpu"),
|
||||
("cppbuild", "elapsed"),
|
||||
("cppbuild", "memory"),
|
||||
("cppbuild", "cpu"),
|
||||
("cppbuild", "codeSize"),
|
||||
("execute", "speed"),
|
||||
("execute", "clocks"),
|
||||
("execute", "memory"),
|
||||
("execute", "cpu"),
|
||||
# Step, Metric, Status Brackets
|
||||
("execute", "speed", ("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")),
|
||||
("execute", "memory", ("❌", 0.90, "⚠️", 0.95, "✅", 1.05, "💡", 1.10, "⭐")),
|
||||
("verilate", "cpu", ("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")),
|
||||
("verilate", "memory", ("❌", 0.90, "⚠️", 0.95, "✅", 1.05, "💡", 1.10, "⭐")),
|
||||
("cppbuild", "cpu", ("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")),
|
||||
("cppbuild", "memory", ("❌", 0.90, "⚠️", 0.95, "✅", 1.05, "💡", 1.10, "⭐")),
|
||||
("cppbuild", "codeSize",("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")),
|
||||
("cppbuild", "elapsed", ("❌", 0.70, "⚠️", 0.85, "✅", 1.15, "💡", 1.30, "⭐")),
|
||||
)
|
||||
|
||||
badgeLegend = [
|
||||
("❌", "Likely regression"),
|
||||
("⚠️", "Possible regression"),
|
||||
("✅", "Within acceptable range"),
|
||||
("💡", "Possible improvement"),
|
||||
("⭐", "Likely improvement"),
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
changedCycles = []
|
||||
table = []
|
||||
badgesToExplain = set()
|
||||
|
||||
for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
|
||||
with open(ref, "r", encoding="utf-8") as f:
|
||||
|
|
@ -57,7 +65,17 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
|
|||
if table:
|
||||
table.append(tabulate.SEPARATING_LINE)
|
||||
runName = ref_json["runName"]
|
||||
for step, metric in stepMetric:
|
||||
|
||||
# Check simulated cycles match - it's ok to crash if this entry does not exist
|
||||
for entry in cmp_json["execute"]["clocks"]["table"]:
|
||||
case, _, _, (refCycles, _), (newCycles, _), _, _ = entry
|
||||
refCycles = int(refCycles)
|
||||
newCycles = int(newCycles)
|
||||
if refCycles != newCycles:
|
||||
changedCycles.append([runName, case, refCycles, newCycles])
|
||||
|
||||
# Check metrics
|
||||
for step, metric, brackets in stepMetric:
|
||||
if step not in cmp_json:
|
||||
continue
|
||||
data = cmp_json[step]
|
||||
|
|
@ -68,7 +86,11 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
|
|||
maxGain = float("-inf")
|
||||
meanGain = 1
|
||||
count = 0
|
||||
for _, _, _, _, _, g, _ in data["table"]:
|
||||
for _, _, _, (a, _), (b, _), g, _ in data["table"]:
|
||||
# for wall clock and CPU time, ignore small values that just add noise
|
||||
if metric == "elapsed" or metric == "cpu":
|
||||
if a < 30 or b < 30:
|
||||
continue
|
||||
minGain = min(minGain, g)
|
||||
maxGain = max(maxGain, g)
|
||||
meanGain *= g
|
||||
|
|
@ -77,29 +99,39 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]):
|
|||
continue
|
||||
meanGain = meanGain**(1 / count)
|
||||
|
||||
if metric == "clocks":
|
||||
# Clock cycles should match exactly
|
||||
status = "❌" if minGain != 1 or maxGain != 1 else "✅"
|
||||
else:
|
||||
# Otherwise use some arbitrary brackets
|
||||
status = "❌"
|
||||
if (meanGain > 0.95):
|
||||
status = "⚠️"
|
||||
if (meanGain > 0.98):
|
||||
status = "✅"
|
||||
if (meanGain > 1.02):
|
||||
status = "💡"
|
||||
if (meanGain > 1.05):
|
||||
status = "⭐"
|
||||
status = brackets[0]
|
||||
for limit, badge in zip(brackets[1::2], brackets[2::2]):
|
||||
if meanGain >= limit:
|
||||
status = badge
|
||||
badgesToExplain.add(status)
|
||||
|
||||
table.append([
|
||||
runName, step, ref_json["metrics"][metric]["header"], f"{meanGain:.2f}x {status} ",
|
||||
f"{minGain:.2f}x", f"{maxGain:.2f}x", f"{count}"
|
||||
])
|
||||
|
||||
# Print changed cycles if any
|
||||
if changedCycles:
|
||||
print("❌ simulated cycles changed (must be fixed):")
|
||||
printTable(changedCycles,
|
||||
headers=("Run", "Case", "Old Cycles", "New Cycles"),
|
||||
colalign=("left", "left", "right", "right"),
|
||||
disable_numparse=True)
|
||||
print()
|
||||
|
||||
# Print results
|
||||
printTable(
|
||||
table,
|
||||
headers=("Run", "Step", "Metric", "Improvement", "Min", "Max", "Samples"),
|
||||
colalign=("left", "left", "left", "right", "right", "right", "right"),
|
||||
disable_numparse=True,
|
||||
)
|
||||
|
||||
# Explain badges
|
||||
print()
|
||||
for badge, legend in badgeLegend:
|
||||
if badge in badgesToExplain:
|
||||
print(f" {badge} : {legend}")
|
||||
|
||||
# Fail job if status changed
|
||||
sys.exit(0 if not changedCycles else 1)
|
||||
|
|
|
|||
Loading…
Reference in New Issue