From c77a26e832f90aae81dad09cc8bb2085377b3b7a Mon Sep 17 00:00:00 2001 From: Ethan Atkins Date: Sun, 13 Jan 2019 09:29:04 -0800 Subject: [PATCH] Update clean task to use globs The clean task is unreasonably slow because it does a lot of redundant io. In this commit, I update clean to be implemented using globs. This allows us to (optionally) route io through the file system cache. There is a significant performance improvement to this change. Currently, running clean on the sbt project takes O(6 seconds) on my machine. After this change, it takes O(1 second). To implement this, I added a new setting cleanKeepGlobs to replace cleanKeepFiles. I don't think that cleanKeepFiles returning Seq[File] is a big deal for performance because, by default, it just contains the history file so there isn't much benefit to accessing a single file through the cache. The reason I added the setting was more for consistency and to help push people towards globs in their own task implementations. Part of the performance improvement comes from inverting the problem. Before we would walk the file system tree from the base and recursively delete leafs and nodes in a depth first traversal. Now we collect all of the files that we are interested in deleting in advance. We then sort the results lexically by path name and then perform the deletions in that order. Because children will always comes first in this scheme, this will generally allow us to delete a directory. There is an edge case that if files are created in a subdirectory after we've created the list to delete, but before the subdirectory is deleted, then that subdirectory will not be deleted. In general, this will tend to impact target/streams because writes occur to target/streams during traversal. I don't think this really matters for most users. If the target directory is being concurrently modified with clean, then the user is doing something wrong. To ensure legacy compatibility, I re-implement cleanKeepFiles to return no files. Any plugin that was appending files to the cleanKeepFiles task with `+=` or `++=` will continue working as before because I explicitly add those files to the list to delete. I updated the actions/clean-keep scripted test to use both cleanKeepFiles and cleanKeepGlobs to ensure both tasks are correctly used. Bonus: add debug logging of all deleted files --- main/src/main/scala/sbt/Defaults.scala | 46 +++++++++++-------- main/src/main/scala/sbt/Keys.scala | 3 ++ sbt/src/sbt-test/actions/clean-keep/build.sbt | 4 +- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/main/src/main/scala/sbt/Defaults.scala b/main/src/main/scala/sbt/Defaults.scala index c8a1993c9..ff8a28c88 100755 --- a/main/src/main/scala/sbt/Defaults.scala +++ b/main/src/main/scala/sbt/Defaults.scala @@ -9,6 +9,7 @@ package sbt import java.io.{ File, PrintWriter } import java.net.{ URI, URL } +import java.nio.file.{ DirectoryNotEmptyException, Files } import java.util.Optional import java.util.concurrent.{ Callable, TimeUnit } @@ -613,8 +614,9 @@ object Defaults extends BuildCommon { lazy val projectTasks: Seq[Setting[_]] = Seq( cleanFiles := cleanFilesTask.value, - cleanKeepFiles := historyPath.value.toVector, - clean := (Def.task { IO.delete(cleanFiles.value) } tag (Tags.Clean)).value, + cleanKeepFiles := Vector.empty, + cleanKeepGlobs := historyPath.value.map(_.toGlob).toSeq, + clean := (cleanTask tag Tags.Clean).value, consoleProject := consoleProjectTask.value, watchTransitiveSources := watchTransitiveSourcesTask.value, watchProjectTransitiveSources := watchTransitiveSourcesTaskImpl(watchProjectSources).value, @@ -1302,24 +1304,30 @@ object Defaults extends BuildCommon { } /** Implements `cleanFiles` task. */ - def cleanFilesTask: Initialize[Task[Vector[File]]] = - Def.task { - val filesAndDirs = Vector(managedDirectory.value, target.value) - val preserve = cleanKeepFiles.value - val (dirs, fs) = filesAndDirs.filter(_.exists).partition(_.isDirectory) - val preserveSet = preserve.filter(_.exists).toSet - // performance reasons, only the direct items under `filesAndDirs` are allowed to be preserved. - val dirItems = dirs flatMap { _.glob("*").get } - (preserveSet diff dirItems.toSet) match { - case xs if xs.isEmpty => () - case xs => - sys.error( - s"cleanKeepFiles contains directory/file that are not directly under cleanFiles: $xs" - ) - } - val toClean = (dirItems filterNot { preserveSet(_) }) ++ fs - toClean + def cleanFilesTask: Initialize[Task[Vector[File]]] = Def.task { Vector.empty[File] } + private[this] def cleanTask: Initialize[Task[Unit]] = Def.task { + val defaults = Seq(managedDirectory.value ** AllPassFilter, target.value ** AllPassFilter) + val excludes = cleanKeepFiles.value.map { + // This mimics the legacy behavior of cleanFilesTask + case f if f.isDirectory => f * AllPassFilter + case f => f.toGlob + } ++ cleanKeepGlobs.value + val excludeFilter: File => Boolean = excludes.toFileFilter.accept + val globDeletions = defaults.unique.filterNot(excludeFilter) + val toDelete = cleanFiles.value.filterNot(excludeFilter) match { + case f @ Seq(_, _*) => (globDeletions ++ f).distinct + case _ => globDeletions } + val logger = streams.value.log + toDelete.sorted.reverseIterator.foreach { f => + logger.debug(s"clean -- deleting file $f") + try Files.deleteIfExists(f.toPath) + catch { + case _: DirectoryNotEmptyException => + logger.debug(s"clean -- unable to delete non-empty directory $f") + } + } + } def bgRunMainTask( products: Initialize[Task[Classpath]], diff --git a/main/src/main/scala/sbt/Keys.scala b/main/src/main/scala/sbt/Keys.scala index d7c20b91e..05dced647 100644 --- a/main/src/main/scala/sbt/Keys.scala +++ b/main/src/main/scala/sbt/Keys.scala @@ -150,8 +150,11 @@ object Keys { // Output paths val classDirectory = settingKey[File]("Directory for compiled classes and copied resources.").withRank(AMinusSetting) + @deprecated("Clean is now implemented using globs.", "1.3.0") val cleanFiles = taskKey[Seq[File]]("The files to recursively delete during a clean.").withRank(BSetting) + @deprecated("Clean is now implemented using globs. Prefer the cleanKeepGlobs task", "1.3.0") val cleanKeepFiles = settingKey[Seq[File]]("Files or directories to keep during a clean. Must be direct children of target.").withRank(CSetting) + val cleanKeepGlobs = settingKey[Seq[Glob]]("Globs to keep during a clean. Must be direct children of target.").withRank(CSetting) val crossPaths = settingKey[Boolean]("If true, enables cross paths, which distinguish input and output directories for cross-building.").withRank(ASetting) val taskTemporaryDirectory = settingKey[File]("Directory used for temporary files for tasks that is deleted after each task execution.").withRank(DSetting) diff --git a/sbt/src/sbt-test/actions/clean-keep/build.sbt b/sbt/src/sbt-test/actions/clean-keep/build.sbt index 64de3093b..38f36e178 100644 --- a/sbt/src/sbt-test/actions/clean-keep/build.sbt +++ b/sbt/src/sbt-test/actions/clean-keep/build.sbt @@ -1,6 +1,6 @@ cleanKeepFiles ++= Seq( target.value / "keep", - target.value / "keepfile", - target.value / "keepdir" + target.value / "keepfile" ) +cleanKeepGlobs += target.value / "keepdir" ** AllPassFilter