diff --git a/.github/workflows/rtlmeter.yml b/.github/workflows/rtlmeter.yml index 650e8fb25..6cf357f67 100644 --- a/.github/workflows/rtlmeter.yml +++ b/.github/workflows/rtlmeter.yml @@ -343,14 +343,17 @@ jobs: path: verilator - name: Create report + id: report working-directory: verilator env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | ln -s ../rtlmeter rtlmeter gh repo set-default ${{ github.repository }} - # Compare to last successful scheduled run - ci/ci-rtlmeter-report.bash ${{ github.run_id }} ${{ github.sha }} ${{ needs.combine-results.outputs.run-tags }} + # Create run report - save status to fail job if the script did + STATUS=0 + ci/ci-rtlmeter-report.bash ${{ github.run_id }} ${{ github.sha }} ${{ needs.combine-results.outputs.run-tags }} || STATUS=$? + echo "status=$STATUS" >> "$GITHUB_OUTPUT" # Create the report artifact mkdir ../report-artifact mv rtlmeter-report/report ../report-artifact/ @@ -372,6 +375,9 @@ jobs: path: notification-artifact name: pr-notification + - name: Report status + run: exit ${{ steps.report.outputs.status }} + # Create GitHub issue for failed scheduled jobs # This should always be the last job (we want an issue if anything breaks) create-issue: diff --git a/ci/ci-rtlmeter-report.bash b/ci/ci-rtlmeter-report.bash index 8e0b8b47c..03c6acc5e 100755 --- a/ci/ci-rtlmeter-report.bash +++ b/ci/ci-rtlmeter-report.bash @@ -82,8 +82,9 @@ for r in $RUNS; do awk -v RS= -v prefix=$TMP_DIR/$r-frag '{print > sprintf("%s-execute-%02d.txt" ,prefix,NR)}' $TMP_DIR/execute-$r.txt done -# Create summary -venv/bin/python3 $SCRIPT_DIR/ci-rtlmeter-report.py ${SUMMARY_ARGS[@]} > $TMP_DIR/summary.txt +# Create summary, suppress failure, but save the status reported to pass back. +STATUS=0 +venv/bin/python3 $SCRIPT_DIR/ci-rtlmeter-report.py ${SUMMARY_ARGS[@]} > $TMP_DIR/summary.txt || STATUS=$? # Print it cat $TMP_DIR/summary.txt @@ -158,3 +159,5 @@ $(cat ${TMP_DIR}/body.html) INDEX_TEMPLATE + +exit $STATUS diff --git a/ci/ci-rtlmeter-report.py b/ci/ci-rtlmeter-report.py index 73810e715..27883526d 100644 --- a/ci/ci-rtlmeter-report.py +++ b/ci/ci-rtlmeter-report.py @@ -33,21 +33,29 @@ def printTable(table: List[List[str]], **kwargs) -> None: # fmt: off stepMetric = ( - ("verilate", "elapsed"), - ("verilate", "memory"), - ("verilate", "cpu"), - ("cppbuild", "elapsed"), - ("cppbuild", "memory"), - ("cppbuild", "cpu"), - ("cppbuild", "codeSize"), - ("execute", "speed"), - ("execute", "clocks"), - ("execute", "memory"), - ("execute", "cpu"), + # Step, Metric, Status Brackets + ("execute", "speed", ("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")), + ("execute", "memory", ("❌", 0.90, "⚠️", 0.95, "✅", 1.05, "💡", 1.10, "⭐")), + ("verilate", "cpu", ("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")), + ("verilate", "memory", ("❌", 0.90, "⚠️", 0.95, "✅", 1.05, "💡", 1.10, "⭐")), + ("cppbuild", "cpu", ("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")), + ("cppbuild", "memory", ("❌", 0.90, "⚠️", 0.95, "✅", 1.05, "💡", 1.10, "⭐")), + ("cppbuild", "codeSize",("❌", 0.96, "⚠️", 0.98, "✅", 1.02, "💡", 1.04, "⭐")), + ("cppbuild", "elapsed", ("❌", 0.70, "⚠️", 0.85, "✅", 1.15, "💡", 1.30, "⭐")), ) + +badgeLegend = [ + ("❌", "Likely regression"), + ("⚠️", "Possible regression"), + ("✅", "Within acceptable range"), + ("💡", "Possible improvement"), + ("⭐", "Likely improvement"), +] # fmt: on +changedCycles = [] table = [] +badgesToExplain = set() for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]): with open(ref, "r", encoding="utf-8") as f: @@ -57,7 +65,17 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]): if table: table.append(tabulate.SEPARATING_LINE) runName = ref_json["runName"] - for step, metric in stepMetric: + + # Check simulated cycles match - it's ok to crash if this entry does not exist + for entry in cmp_json["execute"]["clocks"]["table"]: + case, _, _, (refCycles, _), (newCycles, _), _, _ = entry + refCycles = int(refCycles) + newCycles = int(newCycles) + if refCycles != newCycles: + changedCycles.append([runName, case, refCycles, newCycles]) + + # Check metrics + for step, metric, brackets in stepMetric: if step not in cmp_json: continue data = cmp_json[step] @@ -68,7 +86,11 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]): maxGain = float("-inf") meanGain = 1 count = 0 - for _, _, _, _, _, g, _ in data["table"]: + for _, _, _, (a, _), (b, _), g, _ in data["table"]: + # for wall clock and CPU time, ignore small values that just add noise + if metric == "elapsed" or metric == "cpu": + if a < 30 or b < 30: + continue minGain = min(minGain, g) maxGain = max(maxGain, g) meanGain *= g @@ -77,29 +99,39 @@ for ref, cmp in zip(sys.argv[1::2], sys.argv[2::2]): continue meanGain = meanGain**(1 / count) - if metric == "clocks": - # Clock cycles should match exactly - status = "❌" if minGain != 1 or maxGain != 1 else "✅" - else: - # Otherwise use some arbitrary brackets - status = "❌" - if (meanGain > 0.95): - status = "⚠️" - if (meanGain > 0.98): - status = "✅" - if (meanGain > 1.02): - status = "💡" - if (meanGain > 1.05): - status = "⭐" + status = brackets[0] + for limit, badge in zip(brackets[1::2], brackets[2::2]): + if meanGain >= limit: + status = badge + badgesToExplain.add(status) table.append([ runName, step, ref_json["metrics"][metric]["header"], f"{meanGain:.2f}x {status} ", f"{minGain:.2f}x", f"{maxGain:.2f}x", f"{count}" ]) +# Print changed cycles if any +if changedCycles: + print("❌ simulated cycles changed (must be fixed):") + printTable(changedCycles, + headers=("Run", "Case", "Old Cycles", "New Cycles"), + colalign=("left", "left", "right", "right"), + disable_numparse=True) + print() + +# Print results printTable( table, headers=("Run", "Step", "Metric", "Improvement", "Min", "Max", "Samples"), colalign=("left", "left", "left", "right", "right", "right", "right"), disable_numparse=True, ) + +# Explain badges +print() +for badge, legend in badgeLegend: + if badge in badgesToExplain: + print(f" {badge} : {legend}") + +# Fail job if status changed +sys.exit(0 if not changedCycles else 1)