From 04d6a8b44c6b1385ebe84236af332b8de59d644e Mon Sep 17 00:00:00 2001 From: Eugene Yokota Date: Thu, 5 Apr 2018 02:14:00 -0400 Subject: [PATCH 1/2] perf: optimize hash for build This hot path was discovered by retronym using FlameGraph. This removes intermediate creation of Array. `filesModifiedBytes` in particular was bad as it was going through all `*.class` files, each generating an Array. This replaces that with `fileModifiedHash` that accepts `MessageDigest`. According to the flamegraph, evalCommon footprint reduced from 4.5% to 3.6%. Using `time sbt reload reload reload exit`, the median reduced from 41.450s to 39.467s. --- build.sbt | 4 ++ .../src/main/scala/sbt/compiler/Eval.scala | 51 ++++++++++++++----- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/build.sbt b/build.sbt index d2b19aef3..349c89e62 100644 --- a/build.sbt +++ b/build.sbt @@ -310,6 +310,10 @@ lazy val actionsProj = (project in file("main-actions")) name := "Actions", libraryDependencies += sjsonNewScalaJson.value, mimaSettings, + mimaBinaryIssueFilters ++= Seq( + exclude[DirectMissingMethodProblem]("sbt.compiler.Eval.filesModifiedBytes"), + exclude[DirectMissingMethodProblem]("sbt.compiler.Eval.fileModifiedBytes"), + ) ) .configure( addSbtIO, diff --git a/main-actions/src/main/scala/sbt/compiler/Eval.scala b/main-actions/src/main/scala/sbt/compiler/Eval.scala index a9d97246a..d2afb03b9 100644 --- a/main-actions/src/main/scala/sbt/compiler/Eval.scala +++ b/main-actions/src/main/scala/sbt/compiler/Eval.scala @@ -18,6 +18,7 @@ import Tokens.{ EOF, NEWLINE, NEWLINES, SEMI } import java.io.File import java.nio.ByteBuffer import java.net.URLClassLoader +import java.security.MessageDigest import Eval.{ getModule, getValue, WrapValName } import sbt.io.{ DirectoryFilter, FileFilter, GlobFilter, Hash, IO, Path } @@ -159,12 +160,31 @@ final class Eval(optionsNoncp: Seq[String], // TODO - We also encode the source of the setting into the hash to avoid conflicts where the exact SAME setting // is defined in multiple evaluated instances with a backing. This leads to issues with finding a previous // value on the classpath when compiling. - val hash = Hash.toHex( - Hash(bytes( - stringSeqBytes(content) :: optBytes(backing)(fileExistsBytes) :: stringSeqBytes(options) :: - seqBytes(classpath)(fileModifiedBytes) :: stringSeqBytes(imports.strings.map(_._1)) :: optBytes( - tpeName)(bytes) :: - bytes(ev.extraHash) :: Nil))) + + // This is a hot path. + val digester = MessageDigest.getInstance("SHA") + content foreach { c => + digester.update(bytes(c)) + } + backing foreach { x => + digester.update(fileExistsBytes(x)) + } + options foreach { o => + digester.update(bytes(o)) + } + classpath foreach { f => + fileModifiedHash(f, digester) + } + imports.strings.map(_._1) foreach { x => + digester.update(bytes(x)) + } + tpeName foreach { x => + digester.update(bytes(x)) + } + digester.update(bytes(ev.extraHash)) + val d = digester.digest() + + val hash = Hash.toHex(d) val moduleName = makeModuleName(hash) lazy val unit = { @@ -482,13 +502,18 @@ private[sbt] object Eval { def seqBytes[T](s: Seq[T])(f: T => Array[Byte]): Array[Byte] = bytes(s map f) def bytes(b: Seq[Array[Byte]]): Array[Byte] = bytes(b.length) ++ b.flatten.toArray[Byte] def bytes(b: Boolean): Array[Byte] = Array[Byte](if (b) 1 else 0) - def filesModifiedBytes(fs: Array[File]): Array[Byte] = - if (fs eq null) filesModifiedBytes(Array[File]()) else seqBytes(fs)(fileModifiedBytes) - def fileModifiedBytes(f: File): Array[Byte] = - (if (f.isDirectory) - filesModifiedBytes(f listFiles classDirFilter) - else - bytes(IO.getModifiedTimeOrZero(f))) ++ bytes(f.getAbsolutePath) + + // fileModifiedBytes is a hot method, taking up 0.85% of reload time + // This is a procedural version + def fileModifiedHash(f: File, digester: MessageDigest): Unit = { + if (f.isDirectory) + (f listFiles classDirFilter) foreach { x => + fileModifiedHash(x, digester) + } else digester.update(bytes(IO.getModifiedTimeOrZero(f))) + + digester.update(bytes(f.getAbsolutePath)) + } + def fileExistsBytes(f: File): Array[Byte] = bytes(f.exists) ++ bytes(f.getAbsolutePath) From ad3692b2dfc8a7e2164f4a9b7d578a4723daac8c Mon Sep 17 00:00:00 2001 From: Eugene Yokota Date: Thu, 5 Apr 2018 04:22:16 -0400 Subject: [PATCH 2/2] Use NIO Files.getLastModifiedTime for hashing --- main-actions/src/main/scala/sbt/compiler/Eval.scala | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/main-actions/src/main/scala/sbt/compiler/Eval.scala b/main-actions/src/main/scala/sbt/compiler/Eval.scala index d2afb03b9..f9b0ffeee 100644 --- a/main-actions/src/main/scala/sbt/compiler/Eval.scala +++ b/main-actions/src/main/scala/sbt/compiler/Eval.scala @@ -509,11 +509,22 @@ private[sbt] object Eval { if (f.isDirectory) (f listFiles classDirFilter) foreach { x => fileModifiedHash(x, digester) - } else digester.update(bytes(IO.getModifiedTimeOrZero(f))) + } else digester.update(bytes(JavaMilli.getModifiedTimeOrZero(f))) digester.update(bytes(f.getAbsolutePath)) } + // This uses NIO instead of the JNA-based IO.getModifiedTimeOrZero for speed + object JavaMilli { + import java.nio.file.{ Files, NoSuchFileException } + def getModifiedTimeOrZero(f: File): Long = + try { + Files.getLastModifiedTime(f.toPath).toMillis + } catch { + case e: NoSuchFileException => 0L + } + } + def fileExistsBytes(f: File): Array[Byte] = bytes(f.exists) ++ bytes(f.getAbsolutePath)