perf: Cache content hash of binary files

**Problem**
sha256 is currently a bottleneck for no-op compilation.

**Solution**
This adds a local, in-memory cache of sha256 hashes of binary files
using their timestamp and file size.
The size of the digest cache can be configured using localDigestCacheByteSize,
which is set to 1MB by default.
This commit is contained in:
Eugene Yokota 2025-11-09 22:33:44 -05:00
parent 9a00d7cf3c
commit 4265b92aa4
8 changed files with 103 additions and 4 deletions

View File

@ -358,6 +358,7 @@ lazy val utilCache = project
name := "Util Cache",
libraryDependencies ++=
Seq(
caffeine,
sjsonNewCore.value,
sjsonNewScalaJson.value,
sjsonNewMurmurhash.value
@ -365,6 +366,7 @@ lazy val utilCache = project
contrabandSettings,
mimaSettings,
mimaBinaryIssueFilters ++= Seq(
exclude[ReversedMissingMethodProblem]("sbt.util.CacheImplicits.*"),
),
Test / fork := true,
)

View File

@ -123,6 +123,12 @@ object BasicKeys {
10000
)
val localDigestCacheByteSize = AttributeKey[Long](
"localDigestCacheByteSize",
"The maximum total size in the in-memory digest cache in bytes.",
10000
)
// Unlike other BasicKeys, this is not used directly as a setting key,
// and severLog / logLevel is used instead.
private[sbt] val serverLogLevel =

View File

@ -274,6 +274,8 @@ object Def extends BuildSyntax with Init with InitializeImplicits:
// These are here, as opposed to RemoteCache, since we need them from TaskMacro etc
private[sbt] val cacheEventLog: CacheEventLog = CacheEventLog()
private[sbt] val localDigestCacheByteSizeKey =
SettingKey[Long](BasicKeys.localDigestCacheByteSize)
@cacheLevel(include = Array.empty)
val cacheConfiguration: Initialize[Task[BuildWideCacheConfiguration]] = Def.task {
val state = stateKey.value
@ -288,12 +290,14 @@ object Def extends BuildSyntax with Init with InitializeImplicits:
.getOrElse(
DiskActionCacheStore(state.baseDir.toPath.resolve("target/bootcache"), fileConverter)
)
val cacheByteSize = localDigestCacheByteSizeKey.value
BuildWideCacheConfiguration(
cacheStore,
outputDirectory,
fileConverter,
state.log,
cacheEventLog
cacheEventLog,
cacheByteSize
)
}

View File

@ -440,6 +440,7 @@ object Keys {
val remoteCacheResolvers = settingKey[Seq[Resolver]]("Resolvers for remote cache.")
val remoteCachePom = taskKey[HashedVirtualFileRef]("Generates a pom for publishing when publishing Maven-style.")
val localCacheDirectory = settingKey[File]("Operating system specific cache directory.")
val localDigestCacheByteSize = SettingKey[Long](BasicKeys.localDigestCacheByteSize).withRank(DSetting)
val usePipelining = settingKey[Boolean]("Use subproject pipelining for compilation.").withRank(BSetting)
val exportPipelining = settingKey[Boolean]("Produce early output so downstream subprojects can do pipelining.").withRank(BSetting)

View File

@ -36,7 +36,7 @@ import sbt.nio.FileStamp
import sbt.nio.Keys.{ inputFileStamps, outputFileStamps }
import sbt.std.TaskExtra.*
import sbt.util.InterfaceUtil.toOption
import sbt.util.{ DiskActionCacheStore, Logger }
import sbt.util.{ CacheImplicits, DiskActionCacheStore, Logger }
import sbt.util.CacheImplicits.given
import sjsonnew.JsonFormat
import xsbti.{ FileConverter, HashedVirtualFileRef, VirtualFileRef }
@ -78,6 +78,7 @@ object RemoteCache {
remoteCacheIdCandidates :== Nil,
pushRemoteCacheTo :== None,
localCacheDirectory :== defaultCacheLocation,
localDigestCacheByteSize :== CacheImplicits.defaultLocalDigestCacheByteSize,
pushRemoteCache / ivyPaths := {
val app = appConfiguration.value
val base = app.baseDirectory.getCanonicalFile

View File

@ -38,6 +38,7 @@ object LintUnused {
evictionWarningOptions,
initialize,
lintUnusedKeysOnLoad,
localDigestCacheByteSize,
onLoad,
onLoadMessage,
onUnload,

View File

@ -8,6 +8,7 @@ import sbt.io.syntax.*
import sbt.io.IO
import sbt.nio.file.{ **, FileTreeView }
import sbt.nio.file.syntax.*
import sbt.util.CacheImplicits
import scala.reflect.ClassTag
import scala.annotation.{ meta, StaticAnnotation }
import sjsonnew.{ HashWriter, JsonFormat }
@ -127,6 +128,7 @@ object ActionCache:
config: BuildWideCacheConfiguration,
): Either[Throwable, ActionResult] =
// val logger = config.logger
CacheImplicits.setCacheSize(config.localDigestCacheByteSize)
val (input, valuePath) = mkInput(key, codeContentHash, extraHash)
val getRequest =
GetActionResultRequest(input, inlineStdout = false, inlineStderr = false, Vector(valuePath))
@ -210,7 +212,24 @@ class BuildWideCacheConfiguration(
val fileConverter: FileConverter,
val logger: Logger,
val cacheEventLog: CacheEventLog,
val localDigestCacheByteSize: Long,
):
def this(
store: ActionCacheStore,
outputDirectory: Path,
fileConverter: FileConverter,
logger: Logger,
cacheEventLog: CacheEventLog
) =
this(
store,
outputDirectory,
fileConverter,
logger,
cacheEventLog,
CacheImplicits.defaultLocalDigestCacheByteSize
)
override def toString(): String =
s"BuildWideCacheConfiguration(store = $store, outputDirectory = $outputDirectory)"
end BuildWideCacheConfiguration

View File

@ -8,7 +8,72 @@
package sbt.util
import com.github.benmanes.caffeine.cache.{ Cache as CCache, Caffeine, Weigher }
import java.nio.file.{ Files, NoSuchFileException }
import java.nio.file.attribute.BasicFileAttributes
import java.util.concurrent.atomic.{ AtomicLong, AtomicReference }
import sjsonnew.BasicJsonProtocol
import xsbti.{ HashedVirtualFileRef, PathBasedFile }
object CacheImplicits extends CacheImplicits
trait CacheImplicits extends BasicCacheImplicits with BasicJsonProtocol
object CacheImplicits extends CacheImplicits:
private[sbt] val defaultLocalDigestCacheByteSize = 1024L * 1024L
end CacheImplicits
trait CacheImplicits extends BasicCacheImplicits with BasicJsonProtocol:
private val localDigestCacheByteSize = AtomicLong(CacheImplicits.defaultLocalDigestCacheByteSize)
private val weigher: Weigher[String, (String, Long, Long)] = { case (k, (v1, _, _)) =>
k.size + v1.size + 16
}
private val stampCache: AtomicReference[CCache[String, (String, Long, Long)]] =
AtomicReference(
Caffeine
.newBuilder()
.maximumWeight(localDigestCacheByteSize.get())
.weigher(weigher)
.build()
)
private[sbt] def setCacheSize(size: Long): Unit =
if localDigestCacheByteSize.get() == size then ()
else
localDigestCacheByteSize.set(size)
stampCache.get().invalidateAll()
stampCache.set(
Caffeine
.newBuilder()
.maximumWeight(localDigestCacheByteSize.get())
.weigher(weigher)
.build()
)
private def getOrElseUpdate(ref: HashedVirtualFileRef, lastModified: Long, sizeBytes: Long)(
value: => String
) =
Option(stampCache.get().getIfPresent(ref.id())) match
case Some((v, mod, i)) if lastModified == mod && sizeBytes == i => v
case _ =>
val v = value
stampCache.get().put(ref.id(), (v, lastModified, sizeBytes))
v
/**
* A string representation of HashedVirtualFileRef, delimited by `>`.
*/
override def hashedVirtualFileRefToStr(ref: HashedVirtualFileRef): String =
def fallback: String = super.hashedVirtualFileRefToStr(ref)
if ref.id().endsWith(".scala") || ref.id().endsWith(".java") then fallback
else
ref match
case pbf: PathBasedFile =>
val path = pbf.toPath
try
val attrs = Files.readAttributes(path, classOf[BasicFileAttributes])
if attrs.isDirectory then fallback
else
val lastModified = attrs.lastModifiedTime().toMillis()
val sizeBytes = attrs.size()
getOrElseUpdate(ref, lastModified, sizeBytes)(fallback)
catch case _: NoSuchFileException => fallback
case _ => fallback
end CacheImplicits