mirror of https://github.com/sbt/sbt.git
Use last modified instead of hash
I noticed that for a simple spark project that evaluating the test task was faster than running run when both tasks evaluated the same code block. I tracked this down to the BackgroundJobService.copyClasspath method. This method was hashing the jar contents of all of the files in the build. On my computer, this took 600ms (for context, the total run time of the `run` task was around 1.2 seconds, which included about 150ms of scala compiling and 350ms of time in the main method). If instead we use the last modified time it drops down to 5-10ms. As predicted, the total runtime of `run` in this project dropped down to 600ms which was on par with `test`. I am not sure why a hash was used rather than last modified in the first place, so I reworked things in such a way that, by default, sbt will use a hash but if turbo mode is on, it will use the last modified instead. We can revisit the default later.
This commit is contained in:
parent
8413e259e1
commit
a368bf7026
|
|
@ -43,6 +43,13 @@ abstract class BackgroundJobService extends Closeable {
|
|||
|
||||
/** Copies classpath to temporary directories. */
|
||||
def copyClasspath(products: Classpath, full: Classpath, workingDirectory: File): Classpath
|
||||
|
||||
private[sbt] def copyClasspath(
|
||||
products: Classpath,
|
||||
full: Classpath,
|
||||
workingDirectory: File,
|
||||
hashContents: Boolean
|
||||
): Classpath = copyClasspath(products, full, workingDirectory)
|
||||
}
|
||||
|
||||
object BackgroundJobService {
|
||||
|
|
|
|||
|
|
@ -1379,10 +1379,11 @@ object Defaults extends BuildCommon {
|
|||
Def.inputTask {
|
||||
val service = bgJobService.value
|
||||
val (mainClass, args) = parser.parsed
|
||||
val hashClasspath = (bgHashClasspath in bgRunMain).value
|
||||
service.runInBackground(resolvedScoped.value, state.value) { (logger, workingDir) =>
|
||||
val cp =
|
||||
if (copyClasspath.value)
|
||||
service.copyClasspath(products.value, classpath.value, workingDir)
|
||||
service.copyClasspath(products.value, classpath.value, workingDir, hashClasspath)
|
||||
else classpath.value
|
||||
scalaRun.value.run(mainClass, data(cp), args, logger).get
|
||||
}
|
||||
|
|
@ -1401,10 +1402,11 @@ object Defaults extends BuildCommon {
|
|||
Def.inputTask {
|
||||
val service = bgJobService.value
|
||||
val mainClass = mainClassTask.value getOrElse sys.error("No main class detected.")
|
||||
val hashClasspath = (bgHashClasspath in bgRun).value
|
||||
service.runInBackground(resolvedScoped.value, state.value) { (logger, workingDir) =>
|
||||
val cp =
|
||||
if (copyClasspath.value)
|
||||
service.copyClasspath(products.value, classpath.value, workingDir)
|
||||
service.copyClasspath(products.value, classpath.value, workingDir, hashClasspath)
|
||||
else classpath.value
|
||||
scalaRun.value.run(mainClass, data(cp), parser.parsed, logger).get
|
||||
}
|
||||
|
|
@ -1890,6 +1892,7 @@ object Defaults extends BuildCommon {
|
|||
val base = ModuleID(id.groupID, id.name, sv).withCrossVersion(cross)
|
||||
CrossVersion(scalaV, binVersion)(base).withCrossVersion(Disabled())
|
||||
},
|
||||
bgHashClasspath := !turbo.value,
|
||||
classLoaderLayeringStrategy := {
|
||||
if (turbo.value) ClassLoaderLayeringStrategy.AllLibraryJars
|
||||
else ClassLoaderLayeringStrategy.ScalaLibrary
|
||||
|
|
|
|||
|
|
@ -258,6 +258,7 @@ object Keys {
|
|||
val bgRunMain = inputKey[JobHandle]("Start a provided main class as a background job")
|
||||
val fgRunMain = inputKey[Unit]("Start a provided main class as a foreground job")
|
||||
val bgCopyClasspath = settingKey[Boolean]("Copies classpath on bgRun to prevent conflict.")
|
||||
val bgHashClasspath = settingKey[Boolean]("Toggles whether to use a hash or the last modified time to stamp the classpath jars")
|
||||
val classLoaderLayeringStrategy = settingKey[ClassLoaderLayeringStrategy]("Creates the classloader layering strategy for the particular configuration.")
|
||||
|
||||
// Test Keys
|
||||
|
|
|
|||
|
|
@ -8,19 +8,21 @@
|
|||
package sbt
|
||||
package internal
|
||||
|
||||
import java.util.concurrent.atomic.AtomicLong
|
||||
import java.io.{ Closeable, File, FileInputStream, IOException }
|
||||
import java.nio.file.attribute.BasicFileAttributes
|
||||
import java.nio.file.{ FileVisitResult, Files, Path, SimpleFileVisitor }
|
||||
import java.security.{ DigestInputStream, MessageDigest }
|
||||
import Def.{ Classpath, ScopedKey, Setting }
|
||||
import java.util.concurrent.atomic.AtomicLong
|
||||
|
||||
import sbt.Def.{ Classpath, ScopedKey, Setting }
|
||||
import sbt.Scope.GlobalScope
|
||||
import sbt.internal.util.{ Attributed, ManagedLogger }
|
||||
import sbt.io.syntax._
|
||||
import sbt.io.{ Hash, IO }
|
||||
import sbt.util.{ LogExchange, Logger }
|
||||
|
||||
import scala.concurrent.ExecutionContext
|
||||
import scala.util.Try
|
||||
import Scope.GlobalScope
|
||||
import sbt.io.{ Hash, IO }
|
||||
import sbt.io.syntax._
|
||||
import sbt.util.{ LogExchange, Logger }
|
||||
import sbt.internal.util.{ Attributed, ManagedLogger }
|
||||
|
||||
/**
|
||||
* Interface between sbt and a thing running in the background.
|
||||
|
|
@ -193,21 +195,33 @@ private[sbt] abstract class AbstractBackgroundJobService extends BackgroundJobSe
|
|||
override def toString(): String = s"BackgroundJobService(jobs=${jobs.map(_.id).mkString})"
|
||||
|
||||
/**
|
||||
*
|
||||
* Copies products to the working directory, and the rest to the serviceTempDir of this service,
|
||||
* both wrapped in SHA-1 hash of the file contents.
|
||||
* both wrapped in a stamp of the file contents.
|
||||
* This is intended to minimize the file copying and accumulation of the unused JAR file.
|
||||
* Since working directory is wiped out when the background job ends, the product JAR is deleted too.
|
||||
* Meanwhile, the rest of the dependencies are cached for the duration of this service.
|
||||
*
|
||||
* @param products the portion of the classpath that is generated by the task
|
||||
* @param full the entire classpath of the task
|
||||
* @param workingDirectory the directory into which jars and class files are copied
|
||||
* @param hashFileContents toggles whether or not the contents of each files should be hashed
|
||||
* to determine whether it has changed. When false, the last modified
|
||||
* time is used instead.
|
||||
*
|
||||
* @return a classpath pointing to jar and class files in the working directory
|
||||
*/
|
||||
override def copyClasspath(
|
||||
override private[sbt] def copyClasspath(
|
||||
products: Classpath,
|
||||
full: Classpath,
|
||||
workingDirectory: File
|
||||
workingDirectory: File,
|
||||
hashFileContents: Boolean
|
||||
): Classpath = {
|
||||
def syncTo(dir: File)(source0: Attributed[File]): Attributed[File] = {
|
||||
val source = source0.data
|
||||
val hash8 = Hash.toHex(hash(source)).take(8)
|
||||
val dest = dir / hash8 / source.getName
|
||||
val hash8 = Hash.toHex(Hash(source.toString)).take(8)
|
||||
val id: File => String = if (hashFileContents) hash else lastModified
|
||||
val dest = dir / hash8 / id(source) / source.getName
|
||||
if (!dest.exists) {
|
||||
if (source.isDirectory) IO.copyDirectory(source, dest)
|
||||
else IO.copyFile(source, dest)
|
||||
|
|
@ -220,13 +234,13 @@ private[sbt] abstract class AbstractBackgroundJobService extends BackgroundJobSe
|
|||
}
|
||||
|
||||
/** An alternative to sbt.io.Hash that handles java.io.File being a directory. */
|
||||
private def hash(f: File) = {
|
||||
private def hash(f: File): String = {
|
||||
val digest = MessageDigest.getInstance("SHA")
|
||||
val buffer = new Array[Byte](8192)
|
||||
Files.walkFileTree(
|
||||
f.toPath,
|
||||
new SimpleFileVisitor[Path]() {
|
||||
override def visitFile(file: Path, attrs: BasicFileAttributes) = {
|
||||
override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
|
||||
val dis = new DigestInputStream(new FileInputStream(file.toFile), digest)
|
||||
try {
|
||||
while (dis.read(buffer) >= 0) ()
|
||||
|
|
@ -237,8 +251,39 @@ private[sbt] abstract class AbstractBackgroundJobService extends BackgroundJobSe
|
|||
}
|
||||
}
|
||||
)
|
||||
digest.digest
|
||||
Hash.toHex(Hash(digest.digest)).take(8)
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the last modified time of a file or the maximum last file of the contents of a
|
||||
* directory.
|
||||
*
|
||||
* @param f the file or directory for which we calculate the last modified time
|
||||
* @return the last modified time of the file or the maximum last modified time of the contents
|
||||
* of the directory.
|
||||
*/
|
||||
private def lastModified(f: File): String = {
|
||||
var lastModified = 0L
|
||||
Files.walkFileTree(
|
||||
f.toPath,
|
||||
new SimpleFileVisitor[Path]() {
|
||||
override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
|
||||
val lm = attrs.lastModifiedTime.toMillis
|
||||
if (lm > lastModified) lastModified = lm
|
||||
FileVisitResult.CONTINUE
|
||||
}
|
||||
}
|
||||
)
|
||||
lastModified.toString
|
||||
}
|
||||
|
||||
/** Copies classpath to temporary directories. */
|
||||
override def copyClasspath(
|
||||
products: Classpath,
|
||||
full: Classpath,
|
||||
workingDirectory: File
|
||||
): Classpath =
|
||||
copyClasspath(products, full, workingDirectory, hashFileContents = true)
|
||||
}
|
||||
|
||||
private[sbt] object BackgroundThreadPool {
|
||||
|
|
|
|||
Loading…
Reference in New Issue