From 4265b92aa4d417baa9b27ac31869148d863614ea Mon Sep 17 00:00:00 2001 From: Eugene Yokota Date: Sun, 9 Nov 2025 22:33:44 -0500 Subject: [PATCH] perf: Cache content hash of binary files **Problem** sha256 is currently a bottleneck for no-op compilation. **Solution** This adds a local, in-memory cache of sha256 hashes of binary files using their timestamp and file size. The size of the digest cache can be configured using localDigestCacheByteSize, which is set to 1MB by default. --- build.sbt | 2 + .../src/main/scala/sbt/BasicKeys.scala | 6 ++ main-settings/src/main/scala/sbt/Def.scala | 6 +- main/src/main/scala/sbt/Keys.scala | 1 + main/src/main/scala/sbt/RemoteCache.scala | 3 +- .../main/scala/sbt/internal/LintUnused.scala | 1 + .../src/main/scala/sbt/util/ActionCache.scala | 19 +++++ .../main/scala/sbt/util/CacheImplicits.scala | 69 ++++++++++++++++++- 8 files changed, 103 insertions(+), 4 deletions(-) diff --git a/build.sbt b/build.sbt index 5c025a813..5636a47c0 100644 --- a/build.sbt +++ b/build.sbt @@ -358,6 +358,7 @@ lazy val utilCache = project name := "Util Cache", libraryDependencies ++= Seq( + caffeine, sjsonNewCore.value, sjsonNewScalaJson.value, sjsonNewMurmurhash.value @@ -365,6 +366,7 @@ lazy val utilCache = project contrabandSettings, mimaSettings, mimaBinaryIssueFilters ++= Seq( + exclude[ReversedMissingMethodProblem]("sbt.util.CacheImplicits.*"), ), Test / fork := true, ) diff --git a/main-command/src/main/scala/sbt/BasicKeys.scala b/main-command/src/main/scala/sbt/BasicKeys.scala index b5f82e69b..5b4decfff 100644 --- a/main-command/src/main/scala/sbt/BasicKeys.scala +++ b/main-command/src/main/scala/sbt/BasicKeys.scala @@ -123,6 +123,12 @@ object BasicKeys { 10000 ) + val localDigestCacheByteSize = AttributeKey[Long]( + "localDigestCacheByteSize", + "The maximum total size in the in-memory digest cache in bytes.", + 10000 + ) + // Unlike other BasicKeys, this is not used directly as a setting key, // and severLog / logLevel is used instead. private[sbt] val serverLogLevel = diff --git a/main-settings/src/main/scala/sbt/Def.scala b/main-settings/src/main/scala/sbt/Def.scala index e77923a9c..f3526b1a6 100644 --- a/main-settings/src/main/scala/sbt/Def.scala +++ b/main-settings/src/main/scala/sbt/Def.scala @@ -274,6 +274,8 @@ object Def extends BuildSyntax with Init with InitializeImplicits: // These are here, as opposed to RemoteCache, since we need them from TaskMacro etc private[sbt] val cacheEventLog: CacheEventLog = CacheEventLog() + private[sbt] val localDigestCacheByteSizeKey = + SettingKey[Long](BasicKeys.localDigestCacheByteSize) @cacheLevel(include = Array.empty) val cacheConfiguration: Initialize[Task[BuildWideCacheConfiguration]] = Def.task { val state = stateKey.value @@ -288,12 +290,14 @@ object Def extends BuildSyntax with Init with InitializeImplicits: .getOrElse( DiskActionCacheStore(state.baseDir.toPath.resolve("target/bootcache"), fileConverter) ) + val cacheByteSize = localDigestCacheByteSizeKey.value BuildWideCacheConfiguration( cacheStore, outputDirectory, fileConverter, state.log, - cacheEventLog + cacheEventLog, + cacheByteSize ) } diff --git a/main/src/main/scala/sbt/Keys.scala b/main/src/main/scala/sbt/Keys.scala index 4684f50e1..ac421f4ee 100644 --- a/main/src/main/scala/sbt/Keys.scala +++ b/main/src/main/scala/sbt/Keys.scala @@ -440,6 +440,7 @@ object Keys { val remoteCacheResolvers = settingKey[Seq[Resolver]]("Resolvers for remote cache.") val remoteCachePom = taskKey[HashedVirtualFileRef]("Generates a pom for publishing when publishing Maven-style.") val localCacheDirectory = settingKey[File]("Operating system specific cache directory.") + val localDigestCacheByteSize = SettingKey[Long](BasicKeys.localDigestCacheByteSize).withRank(DSetting) val usePipelining = settingKey[Boolean]("Use subproject pipelining for compilation.").withRank(BSetting) val exportPipelining = settingKey[Boolean]("Produce early output so downstream subprojects can do pipelining.").withRank(BSetting) diff --git a/main/src/main/scala/sbt/RemoteCache.scala b/main/src/main/scala/sbt/RemoteCache.scala index 9107a5b9e..2844a48e7 100644 --- a/main/src/main/scala/sbt/RemoteCache.scala +++ b/main/src/main/scala/sbt/RemoteCache.scala @@ -36,7 +36,7 @@ import sbt.nio.FileStamp import sbt.nio.Keys.{ inputFileStamps, outputFileStamps } import sbt.std.TaskExtra.* import sbt.util.InterfaceUtil.toOption -import sbt.util.{ DiskActionCacheStore, Logger } +import sbt.util.{ CacheImplicits, DiskActionCacheStore, Logger } import sbt.util.CacheImplicits.given import sjsonnew.JsonFormat import xsbti.{ FileConverter, HashedVirtualFileRef, VirtualFileRef } @@ -78,6 +78,7 @@ object RemoteCache { remoteCacheIdCandidates :== Nil, pushRemoteCacheTo :== None, localCacheDirectory :== defaultCacheLocation, + localDigestCacheByteSize :== CacheImplicits.defaultLocalDigestCacheByteSize, pushRemoteCache / ivyPaths := { val app = appConfiguration.value val base = app.baseDirectory.getCanonicalFile diff --git a/main/src/main/scala/sbt/internal/LintUnused.scala b/main/src/main/scala/sbt/internal/LintUnused.scala index 69a797082..d11b04785 100644 --- a/main/src/main/scala/sbt/internal/LintUnused.scala +++ b/main/src/main/scala/sbt/internal/LintUnused.scala @@ -38,6 +38,7 @@ object LintUnused { evictionWarningOptions, initialize, lintUnusedKeysOnLoad, + localDigestCacheByteSize, onLoad, onLoadMessage, onUnload, diff --git a/util-cache/src/main/scala/sbt/util/ActionCache.scala b/util-cache/src/main/scala/sbt/util/ActionCache.scala index 59436ab31..b85acc57d 100644 --- a/util-cache/src/main/scala/sbt/util/ActionCache.scala +++ b/util-cache/src/main/scala/sbt/util/ActionCache.scala @@ -8,6 +8,7 @@ import sbt.io.syntax.* import sbt.io.IO import sbt.nio.file.{ **, FileTreeView } import sbt.nio.file.syntax.* +import sbt.util.CacheImplicits import scala.reflect.ClassTag import scala.annotation.{ meta, StaticAnnotation } import sjsonnew.{ HashWriter, JsonFormat } @@ -127,6 +128,7 @@ object ActionCache: config: BuildWideCacheConfiguration, ): Either[Throwable, ActionResult] = // val logger = config.logger + CacheImplicits.setCacheSize(config.localDigestCacheByteSize) val (input, valuePath) = mkInput(key, codeContentHash, extraHash) val getRequest = GetActionResultRequest(input, inlineStdout = false, inlineStderr = false, Vector(valuePath)) @@ -210,7 +212,24 @@ class BuildWideCacheConfiguration( val fileConverter: FileConverter, val logger: Logger, val cacheEventLog: CacheEventLog, + val localDigestCacheByteSize: Long, ): + def this( + store: ActionCacheStore, + outputDirectory: Path, + fileConverter: FileConverter, + logger: Logger, + cacheEventLog: CacheEventLog + ) = + this( + store, + outputDirectory, + fileConverter, + logger, + cacheEventLog, + CacheImplicits.defaultLocalDigestCacheByteSize + ) + override def toString(): String = s"BuildWideCacheConfiguration(store = $store, outputDirectory = $outputDirectory)" end BuildWideCacheConfiguration diff --git a/util-cache/src/main/scala/sbt/util/CacheImplicits.scala b/util-cache/src/main/scala/sbt/util/CacheImplicits.scala index 766afb608..3a7648906 100644 --- a/util-cache/src/main/scala/sbt/util/CacheImplicits.scala +++ b/util-cache/src/main/scala/sbt/util/CacheImplicits.scala @@ -8,7 +8,72 @@ package sbt.util +import com.github.benmanes.caffeine.cache.{ Cache as CCache, Caffeine, Weigher } +import java.nio.file.{ Files, NoSuchFileException } +import java.nio.file.attribute.BasicFileAttributes +import java.util.concurrent.atomic.{ AtomicLong, AtomicReference } import sjsonnew.BasicJsonProtocol +import xsbti.{ HashedVirtualFileRef, PathBasedFile } -object CacheImplicits extends CacheImplicits -trait CacheImplicits extends BasicCacheImplicits with BasicJsonProtocol +object CacheImplicits extends CacheImplicits: + private[sbt] val defaultLocalDigestCacheByteSize = 1024L * 1024L +end CacheImplicits + +trait CacheImplicits extends BasicCacheImplicits with BasicJsonProtocol: + private val localDigestCacheByteSize = AtomicLong(CacheImplicits.defaultLocalDigestCacheByteSize) + private val weigher: Weigher[String, (String, Long, Long)] = { case (k, (v1, _, _)) => + k.size + v1.size + 16 + } + + private val stampCache: AtomicReference[CCache[String, (String, Long, Long)]] = + AtomicReference( + Caffeine + .newBuilder() + .maximumWeight(localDigestCacheByteSize.get()) + .weigher(weigher) + .build() + ) + + private[sbt] def setCacheSize(size: Long): Unit = + if localDigestCacheByteSize.get() == size then () + else + localDigestCacheByteSize.set(size) + stampCache.get().invalidateAll() + stampCache.set( + Caffeine + .newBuilder() + .maximumWeight(localDigestCacheByteSize.get()) + .weigher(weigher) + .build() + ) + + private def getOrElseUpdate(ref: HashedVirtualFileRef, lastModified: Long, sizeBytes: Long)( + value: => String + ) = + Option(stampCache.get().getIfPresent(ref.id())) match + case Some((v, mod, i)) if lastModified == mod && sizeBytes == i => v + case _ => + val v = value + stampCache.get().put(ref.id(), (v, lastModified, sizeBytes)) + v + + /** + * A string representation of HashedVirtualFileRef, delimited by `>`. + */ + override def hashedVirtualFileRefToStr(ref: HashedVirtualFileRef): String = + def fallback: String = super.hashedVirtualFileRefToStr(ref) + if ref.id().endsWith(".scala") || ref.id().endsWith(".java") then fallback + else + ref match + case pbf: PathBasedFile => + val path = pbf.toPath + try + val attrs = Files.readAttributes(path, classOf[BasicFileAttributes]) + if attrs.isDirectory then fallback + else + val lastModified = attrs.lastModifiedTime().toMillis() + val sizeBytes = attrs.size() + getOrElseUpdate(ref, lastModified, sizeBytes)(fallback) + catch case _: NoSuchFileException => fallback + case _ => fallback +end CacheImplicits