From a368bf7026d28d8a2ee866c67d8d8699974b7df8 Mon Sep 17 00:00:00 2001 From: Ethan Atkins Date: Fri, 5 Jul 2019 19:12:23 -0700 Subject: [PATCH] Use last modified instead of hash I noticed that for a simple spark project that evaluating the test task was faster than running run when both tasks evaluated the same code block. I tracked this down to the BackgroundJobService.copyClasspath method. This method was hashing the jar contents of all of the files in the build. On my computer, this took 600ms (for context, the total run time of the `run` task was around 1.2 seconds, which included about 150ms of scala compiling and 350ms of time in the main method). If instead we use the last modified time it drops down to 5-10ms. As predicted, the total runtime of `run` in this project dropped down to 600ms which was on par with `test`. I am not sure why a hash was used rather than last modified in the first place, so I reworked things in such a way that, by default, sbt will use a hash but if turbo mode is on, it will use the last modified instead. We can revisit the default later. --- .../main/scala/sbt/BackgroundJobService.scala | 7 ++ main/src/main/scala/sbt/Defaults.scala | 7 +- main/src/main/scala/sbt/Keys.scala | 1 + .../DefaultBackgroundJobService.scala | 75 +++++++++++++++---- 4 files changed, 73 insertions(+), 17 deletions(-) diff --git a/main/src/main/scala/sbt/BackgroundJobService.scala b/main/src/main/scala/sbt/BackgroundJobService.scala index b99f51a74..3b061a200 100644 --- a/main/src/main/scala/sbt/BackgroundJobService.scala +++ b/main/src/main/scala/sbt/BackgroundJobService.scala @@ -43,6 +43,13 @@ abstract class BackgroundJobService extends Closeable { /** Copies classpath to temporary directories. */ def copyClasspath(products: Classpath, full: Classpath, workingDirectory: File): Classpath + + private[sbt] def copyClasspath( + products: Classpath, + full: Classpath, + workingDirectory: File, + hashContents: Boolean + ): Classpath = copyClasspath(products, full, workingDirectory) } object BackgroundJobService { diff --git a/main/src/main/scala/sbt/Defaults.scala b/main/src/main/scala/sbt/Defaults.scala index 277068dd3..97ca96b39 100755 --- a/main/src/main/scala/sbt/Defaults.scala +++ b/main/src/main/scala/sbt/Defaults.scala @@ -1379,10 +1379,11 @@ object Defaults extends BuildCommon { Def.inputTask { val service = bgJobService.value val (mainClass, args) = parser.parsed + val hashClasspath = (bgHashClasspath in bgRunMain).value service.runInBackground(resolvedScoped.value, state.value) { (logger, workingDir) => val cp = if (copyClasspath.value) - service.copyClasspath(products.value, classpath.value, workingDir) + service.copyClasspath(products.value, classpath.value, workingDir, hashClasspath) else classpath.value scalaRun.value.run(mainClass, data(cp), args, logger).get } @@ -1401,10 +1402,11 @@ object Defaults extends BuildCommon { Def.inputTask { val service = bgJobService.value val mainClass = mainClassTask.value getOrElse sys.error("No main class detected.") + val hashClasspath = (bgHashClasspath in bgRun).value service.runInBackground(resolvedScoped.value, state.value) { (logger, workingDir) => val cp = if (copyClasspath.value) - service.copyClasspath(products.value, classpath.value, workingDir) + service.copyClasspath(products.value, classpath.value, workingDir, hashClasspath) else classpath.value scalaRun.value.run(mainClass, data(cp), parser.parsed, logger).get } @@ -1890,6 +1892,7 @@ object Defaults extends BuildCommon { val base = ModuleID(id.groupID, id.name, sv).withCrossVersion(cross) CrossVersion(scalaV, binVersion)(base).withCrossVersion(Disabled()) }, + bgHashClasspath := !turbo.value, classLoaderLayeringStrategy := { if (turbo.value) ClassLoaderLayeringStrategy.AllLibraryJars else ClassLoaderLayeringStrategy.ScalaLibrary diff --git a/main/src/main/scala/sbt/Keys.scala b/main/src/main/scala/sbt/Keys.scala index b50013d8c..051a480d9 100644 --- a/main/src/main/scala/sbt/Keys.scala +++ b/main/src/main/scala/sbt/Keys.scala @@ -258,6 +258,7 @@ object Keys { val bgRunMain = inputKey[JobHandle]("Start a provided main class as a background job") val fgRunMain = inputKey[Unit]("Start a provided main class as a foreground job") val bgCopyClasspath = settingKey[Boolean]("Copies classpath on bgRun to prevent conflict.") + val bgHashClasspath = settingKey[Boolean]("Toggles whether to use a hash or the last modified time to stamp the classpath jars") val classLoaderLayeringStrategy = settingKey[ClassLoaderLayeringStrategy]("Creates the classloader layering strategy for the particular configuration.") // Test Keys diff --git a/main/src/main/scala/sbt/internal/DefaultBackgroundJobService.scala b/main/src/main/scala/sbt/internal/DefaultBackgroundJobService.scala index 823cad5fe..b464ff67a 100644 --- a/main/src/main/scala/sbt/internal/DefaultBackgroundJobService.scala +++ b/main/src/main/scala/sbt/internal/DefaultBackgroundJobService.scala @@ -8,19 +8,21 @@ package sbt package internal -import java.util.concurrent.atomic.AtomicLong import java.io.{ Closeable, File, FileInputStream, IOException } import java.nio.file.attribute.BasicFileAttributes import java.nio.file.{ FileVisitResult, Files, Path, SimpleFileVisitor } import java.security.{ DigestInputStream, MessageDigest } -import Def.{ Classpath, ScopedKey, Setting } +import java.util.concurrent.atomic.AtomicLong + +import sbt.Def.{ Classpath, ScopedKey, Setting } +import sbt.Scope.GlobalScope +import sbt.internal.util.{ Attributed, ManagedLogger } +import sbt.io.syntax._ +import sbt.io.{ Hash, IO } +import sbt.util.{ LogExchange, Logger } + import scala.concurrent.ExecutionContext import scala.util.Try -import Scope.GlobalScope -import sbt.io.{ Hash, IO } -import sbt.io.syntax._ -import sbt.util.{ LogExchange, Logger } -import sbt.internal.util.{ Attributed, ManagedLogger } /** * Interface between sbt and a thing running in the background. @@ -193,21 +195,33 @@ private[sbt] abstract class AbstractBackgroundJobService extends BackgroundJobSe override def toString(): String = s"BackgroundJobService(jobs=${jobs.map(_.id).mkString})" /** + * * Copies products to the working directory, and the rest to the serviceTempDir of this service, - * both wrapped in SHA-1 hash of the file contents. + * both wrapped in a stamp of the file contents. * This is intended to minimize the file copying and accumulation of the unused JAR file. * Since working directory is wiped out when the background job ends, the product JAR is deleted too. * Meanwhile, the rest of the dependencies are cached for the duration of this service. + * + * @param products the portion of the classpath that is generated by the task + * @param full the entire classpath of the task + * @param workingDirectory the directory into which jars and class files are copied + * @param hashFileContents toggles whether or not the contents of each files should be hashed + * to determine whether it has changed. When false, the last modified + * time is used instead. + * + * @return a classpath pointing to jar and class files in the working directory */ - override def copyClasspath( + override private[sbt] def copyClasspath( products: Classpath, full: Classpath, - workingDirectory: File + workingDirectory: File, + hashFileContents: Boolean ): Classpath = { def syncTo(dir: File)(source0: Attributed[File]): Attributed[File] = { val source = source0.data - val hash8 = Hash.toHex(hash(source)).take(8) - val dest = dir / hash8 / source.getName + val hash8 = Hash.toHex(Hash(source.toString)).take(8) + val id: File => String = if (hashFileContents) hash else lastModified + val dest = dir / hash8 / id(source) / source.getName if (!dest.exists) { if (source.isDirectory) IO.copyDirectory(source, dest) else IO.copyFile(source, dest) @@ -220,13 +234,13 @@ private[sbt] abstract class AbstractBackgroundJobService extends BackgroundJobSe } /** An alternative to sbt.io.Hash that handles java.io.File being a directory. */ - private def hash(f: File) = { + private def hash(f: File): String = { val digest = MessageDigest.getInstance("SHA") val buffer = new Array[Byte](8192) Files.walkFileTree( f.toPath, new SimpleFileVisitor[Path]() { - override def visitFile(file: Path, attrs: BasicFileAttributes) = { + override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { val dis = new DigestInputStream(new FileInputStream(file.toFile), digest) try { while (dis.read(buffer) >= 0) () @@ -237,8 +251,39 @@ private[sbt] abstract class AbstractBackgroundJobService extends BackgroundJobSe } } ) - digest.digest + Hash.toHex(Hash(digest.digest)).take(8) } + + /** + * Computes the last modified time of a file or the maximum last file of the contents of a + * directory. + * + * @param f the file or directory for which we calculate the last modified time + * @return the last modified time of the file or the maximum last modified time of the contents + * of the directory. + */ + private def lastModified(f: File): String = { + var lastModified = 0L + Files.walkFileTree( + f.toPath, + new SimpleFileVisitor[Path]() { + override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { + val lm = attrs.lastModifiedTime.toMillis + if (lm > lastModified) lastModified = lm + FileVisitResult.CONTINUE + } + } + ) + lastModified.toString + } + + /** Copies classpath to temporary directories. */ + override def copyClasspath( + products: Classpath, + full: Classpath, + workingDirectory: File + ): Classpath = + copyClasspath(products, full, workingDirectory, hashFileContents = true) } private[sbt] object BackgroundThreadPool {