[2.x] perf: Synchronize the classes using Analysis stamps

**Problem**
We currently need to split the output of the compiler from
the classes directory so we can cache just the output
without mixing it with the resource files
while maintaining similar classes directory as sbt 1.x
(as opposed to just splitting them completely).

However, copying directory is a performance bottleneck.

**Solution**
This implements a new synchronizing function that uses
the stamp information generated during the incremental compilation.

For example, in a no-op compilation, the stamps will remain the same,
and no files will be copied.
If only a few classes are changed, those stamps will change,
and only the changed files will be deleted or copied.
This commit is contained in:
Eugene Yokota 2025-11-14 03:11:15 -05:00
parent 0e9eb00a7d
commit 8ab636e10a
3 changed files with 71 additions and 14 deletions

View File

@ -9,14 +9,16 @@
package sbt
import java.io.{ File, IOException }
import java.nio.file.Path
import java.util.zip.ZipException
import sbt.internal.inc.MappedFileConverter
import sbt.internal.util.Relation
import sbt.internal.io.TranslatedException
import sbt.util.CacheImplicits.*
import sbt.util.CacheImplicits.given
import sbt.util.{ CacheStore, FileInfo }
import sbt.io.IO
import sbt.io.Path.{ flat, rebase }
import sjsonnew.{
Builder,
IsoString,
@ -27,6 +29,7 @@ import sjsonnew.{
deserializationError,
}
import xsbti.{ FileConverter, VirtualFileRef }
import xsbti.compile.CompileAnalysis
/**
* Maintains a set of mappings so that they are uptodate.
@ -94,6 +97,59 @@ object Sync {
relation
}
private[sbt] def syncClasses(
store: CacheStore,
fileConverter: FileConverter
): (Option[CompileAnalysis], Path, Path) => Unit =
(analysisOpt, backendDir, classesDir) => {
val currentStamps = analysisOpt match
case Some(a) =>
import scala.jdk.CollectionConverters.*
a.readStamps
.getAllProductStamps()
.asScala
.map: (k, v) =>
(k, v.toString())
.toMap
case None => Map.empty
val currentStampsSeq = currentStamps.toVector.sortBy(_._1.id())
val previousStampsSeq = store.read[Vector[(VirtualFileRef, String)]](Vector.empty)
val previousStamps = Map(previousStampsSeq*)
if currentStampsSeq == previousStampsSeq then ()
else
val t = classesDir.toFile()
val productsVf = currentStamps.map(_._1)
val flt: File => Option[File] = flat(t)
val transform: VirtualFileRef => Option[File] =
(vf: VirtualFileRef) =>
val f = fileConverter.toPath(vf).toFile()
rebase(backendDir.toFile(), t)(f).orElse(flt(f))
val mappings = productsVf.flatMap: x =>
transform(x).map(x -> _)
val relation = Relation.empty ++ mappings
def outofdate(source: VirtualFileRef, target: File): Boolean =
!previousStamps.contains(source) ||
previousStamps.get(source) != currentStamps.get(source) ||
!target.exists
val updates = relation.filter(outofdate)
val removeTargets = (previousStampsSeq.map(_._1) diff currentStampsSeq.map(_._1)).flatMap:
x => transform(x).map(x -> _)
val (cleanDirs, cleanFiles) =
(updates._2s ++ removeTargets.map(_._2)).partition(_.isDirectory)
IO.delete(cleanFiles)
IO.deleteIfEmpty(cleanDirs)
updates.all.foreach: (k, v) =>
val classFile = fileConverter.toPath(k).toFile()
copy(classFile, v)
if !classFile.getName().contains("$") then
val (name, ext) = IO.split(classFile.getName)
val tasty = File(classFile.getParentFile(), name + ".tasty")
if tasty.exists() then
val tastyTarget = File(v.getParentFile(), name + ".tasty")
copy(tasty, tastyTarget)
store.write(currentStampsSeq)
}
def copy(source: File, target: File): Unit =
if (source.isFile) IO.copyFile(source, target, true)
else if (!target.exists) { // we don't want to update the last modified time of an existing directory

View File

@ -4069,17 +4069,12 @@ object Classpaths {
val cacheStore = factory.make("make-product")
val t = classDirectory.value
val vfBackendDir = compileIncremental.value._2
val setup: Setup = compileIncSetup.value
val analysisOut = c.toVirtualFile(setup.cachePath())
val analysisOpt = BuildDef.extractAnalysis(analysisOut, c)
val backendDir = c.toPath(vfBackendDir)
val flt: File => Option[File] = flat(t)
val transform: File => Option[File] =
(f: File) => rebase(backendDir.toFile(), t)(f).orElse(flt(f))
val resources = copyResources.value.map(_._2).toSet
val view = fileTreeView.value
val classes = view.list((Glob(backendDir, RecursiveGlob / "*")))
val mappings: Seq[(File, File)] = classes.flatMap:
case (r, attr) if r != backendDir => transform(r.toFile()).map(r.toFile() -> _)
case _ => None
Sync.sync(cacheStore, fileConverter = c)(mappings)
Sync.syncClasses(cacheStore, fileConverter = c)(analysisOpt, backendDir, t.toPath())
t :: Nil
}

View File

@ -113,6 +113,15 @@ private[sbt] object BuildDef:
private[sbt] def extractAnalysis(
metadata: StringAttributeMap,
converter: FileConverter
): Option[CompileAnalysis] =
for
ref <- metadata.get(Keys.analysis)
analysis <- extractAnalysis(VirtualFileRef.of(ref), converter)
yield analysis
private[sbt] def extractAnalysis(
ref: VirtualFileRef,
converter: FileConverter
): Option[CompileAnalysis] =
import sbt.OptionSyntax.*
def asBinary(file: File) = FileAnalysisStore.binary(file).get.asScala
@ -129,9 +138,6 @@ private[sbt] object BuildDef:
val sizeBytes = attrs.size()
getOrElseUpdate(ref, lastModified, sizeBytes)(fallback(file))
catch case _: NoSuchFileException => fallback(file)
for
ref <- metadata.get(Keys.analysis)
content <- getContents(VirtualFileRef.of(ref))
yield content.getAnalysis
getContents(ref).map(_.getAnalysis)
end BuildDef