Use last modified instead of hash

I noticed that for a simple spark project that evaluating the test task
was faster than running run when both tasks evaluated the same code
block. I tracked this down to the BackgroundJobService.copyClasspath
method. This method was hashing the jar contents of all of the files in
the build. On my computer, this took 600ms (for context, the total run
time of the `run` task was around 1.2 seconds, which included about
150ms of scala compiling and 350ms of time in the main method). If
instead we use the last modified time it drops down to 5-10ms. As
predicted, the total runtime of `run` in this project dropped down to
600ms which was on par with `test`.

I am not sure why a hash was used rather than last modified in the first place,
so I reworked things in such a way that, by default, sbt will use a hash
but if turbo mode is on, it will use the last modified instead. We can
revisit the default later.
This commit is contained in:
Ethan Atkins 2019-07-05 19:12:23 -07:00
parent 8413e259e1
commit a368bf7026
4 changed files with 73 additions and 17 deletions

View File

@ -43,6 +43,13 @@ abstract class BackgroundJobService extends Closeable {
/** Copies classpath to temporary directories. */
def copyClasspath(products: Classpath, full: Classpath, workingDirectory: File): Classpath
private[sbt] def copyClasspath(
products: Classpath,
full: Classpath,
workingDirectory: File,
hashContents: Boolean
): Classpath = copyClasspath(products, full, workingDirectory)
}
object BackgroundJobService {

View File

@ -1379,10 +1379,11 @@ object Defaults extends BuildCommon {
Def.inputTask {
val service = bgJobService.value
val (mainClass, args) = parser.parsed
val hashClasspath = (bgHashClasspath in bgRunMain).value
service.runInBackground(resolvedScoped.value, state.value) { (logger, workingDir) =>
val cp =
if (copyClasspath.value)
service.copyClasspath(products.value, classpath.value, workingDir)
service.copyClasspath(products.value, classpath.value, workingDir, hashClasspath)
else classpath.value
scalaRun.value.run(mainClass, data(cp), args, logger).get
}
@ -1401,10 +1402,11 @@ object Defaults extends BuildCommon {
Def.inputTask {
val service = bgJobService.value
val mainClass = mainClassTask.value getOrElse sys.error("No main class detected.")
val hashClasspath = (bgHashClasspath in bgRun).value
service.runInBackground(resolvedScoped.value, state.value) { (logger, workingDir) =>
val cp =
if (copyClasspath.value)
service.copyClasspath(products.value, classpath.value, workingDir)
service.copyClasspath(products.value, classpath.value, workingDir, hashClasspath)
else classpath.value
scalaRun.value.run(mainClass, data(cp), parser.parsed, logger).get
}
@ -1890,6 +1892,7 @@ object Defaults extends BuildCommon {
val base = ModuleID(id.groupID, id.name, sv).withCrossVersion(cross)
CrossVersion(scalaV, binVersion)(base).withCrossVersion(Disabled())
},
bgHashClasspath := !turbo.value,
classLoaderLayeringStrategy := {
if (turbo.value) ClassLoaderLayeringStrategy.AllLibraryJars
else ClassLoaderLayeringStrategy.ScalaLibrary

View File

@ -258,6 +258,7 @@ object Keys {
val bgRunMain = inputKey[JobHandle]("Start a provided main class as a background job")
val fgRunMain = inputKey[Unit]("Start a provided main class as a foreground job")
val bgCopyClasspath = settingKey[Boolean]("Copies classpath on bgRun to prevent conflict.")
val bgHashClasspath = settingKey[Boolean]("Toggles whether to use a hash or the last modified time to stamp the classpath jars")
val classLoaderLayeringStrategy = settingKey[ClassLoaderLayeringStrategy]("Creates the classloader layering strategy for the particular configuration.")
// Test Keys

View File

@ -8,19 +8,21 @@
package sbt
package internal
import java.util.concurrent.atomic.AtomicLong
import java.io.{ Closeable, File, FileInputStream, IOException }
import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.{ FileVisitResult, Files, Path, SimpleFileVisitor }
import java.security.{ DigestInputStream, MessageDigest }
import Def.{ Classpath, ScopedKey, Setting }
import java.util.concurrent.atomic.AtomicLong
import sbt.Def.{ Classpath, ScopedKey, Setting }
import sbt.Scope.GlobalScope
import sbt.internal.util.{ Attributed, ManagedLogger }
import sbt.io.syntax._
import sbt.io.{ Hash, IO }
import sbt.util.{ LogExchange, Logger }
import scala.concurrent.ExecutionContext
import scala.util.Try
import Scope.GlobalScope
import sbt.io.{ Hash, IO }
import sbt.io.syntax._
import sbt.util.{ LogExchange, Logger }
import sbt.internal.util.{ Attributed, ManagedLogger }
/**
* Interface between sbt and a thing running in the background.
@ -193,21 +195,33 @@ private[sbt] abstract class AbstractBackgroundJobService extends BackgroundJobSe
override def toString(): String = s"BackgroundJobService(jobs=${jobs.map(_.id).mkString})"
/**
*
* Copies products to the working directory, and the rest to the serviceTempDir of this service,
* both wrapped in SHA-1 hash of the file contents.
* both wrapped in a stamp of the file contents.
* This is intended to minimize the file copying and accumulation of the unused JAR file.
* Since working directory is wiped out when the background job ends, the product JAR is deleted too.
* Meanwhile, the rest of the dependencies are cached for the duration of this service.
*
* @param products the portion of the classpath that is generated by the task
* @param full the entire classpath of the task
* @param workingDirectory the directory into which jars and class files are copied
* @param hashFileContents toggles whether or not the contents of each files should be hashed
* to determine whether it has changed. When false, the last modified
* time is used instead.
*
* @return a classpath pointing to jar and class files in the working directory
*/
override def copyClasspath(
override private[sbt] def copyClasspath(
products: Classpath,
full: Classpath,
workingDirectory: File
workingDirectory: File,
hashFileContents: Boolean
): Classpath = {
def syncTo(dir: File)(source0: Attributed[File]): Attributed[File] = {
val source = source0.data
val hash8 = Hash.toHex(hash(source)).take(8)
val dest = dir / hash8 / source.getName
val hash8 = Hash.toHex(Hash(source.toString)).take(8)
val id: File => String = if (hashFileContents) hash else lastModified
val dest = dir / hash8 / id(source) / source.getName
if (!dest.exists) {
if (source.isDirectory) IO.copyDirectory(source, dest)
else IO.copyFile(source, dest)
@ -220,13 +234,13 @@ private[sbt] abstract class AbstractBackgroundJobService extends BackgroundJobSe
}
/** An alternative to sbt.io.Hash that handles java.io.File being a directory. */
private def hash(f: File) = {
private def hash(f: File): String = {
val digest = MessageDigest.getInstance("SHA")
val buffer = new Array[Byte](8192)
Files.walkFileTree(
f.toPath,
new SimpleFileVisitor[Path]() {
override def visitFile(file: Path, attrs: BasicFileAttributes) = {
override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
val dis = new DigestInputStream(new FileInputStream(file.toFile), digest)
try {
while (dis.read(buffer) >= 0) ()
@ -237,8 +251,39 @@ private[sbt] abstract class AbstractBackgroundJobService extends BackgroundJobSe
}
}
)
digest.digest
Hash.toHex(Hash(digest.digest)).take(8)
}
/**
* Computes the last modified time of a file or the maximum last file of the contents of a
* directory.
*
* @param f the file or directory for which we calculate the last modified time
* @return the last modified time of the file or the maximum last modified time of the contents
* of the directory.
*/
private def lastModified(f: File): String = {
var lastModified = 0L
Files.walkFileTree(
f.toPath,
new SimpleFileVisitor[Path]() {
override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
val lm = attrs.lastModifiedTime.toMillis
if (lm > lastModified) lastModified = lm
FileVisitResult.CONTINUE
}
}
)
lastModified.toString
}
/** Copies classpath to temporary directories. */
override def copyClasspath(
products: Classpath,
full: Classpath,
workingDirectory: File
): Classpath =
copyClasspath(products, full, workingDirectory, hashFileContents = true)
}
private[sbt] object BackgroundThreadPool {