From 0ccf3325c8f718ceee676fe16797a7fc3666877d Mon Sep 17 00:00:00 2001 From: Adrien Piquerez Date: Mon, 11 Nov 2024 15:24:04 +0100 Subject: [PATCH] Optimize indexing of aggregate keys To build the index of all aggregate keys, we were computing the reverse aggregation of each key, before indexing them. And so each aggregate key was indexed many times, once for each aggregated project. It was parallelized to reduce the latency. In this PR, we compute the reverse aggregation of all the keys at once, removing all duplication. We cannot parallelize this process anymore but we don't need to, because it is a lot faster. It reduces the total CPU time by 12%. The impact for the user depends on its number of cores. --- .../main/scala/sbt/internal/Aggregation.scala | 45 ++++++++++++------- .../main/scala/sbt/internal/KeyIndex.scala | 28 +++--------- main/src/main/scala/sbt/internal/Load.scala | 2 +- 3 files changed, 35 insertions(+), 40 deletions(-) diff --git a/main/src/main/scala/sbt/internal/Aggregation.scala b/main/src/main/scala/sbt/internal/Aggregation.scala index 6f106952e..9afa960ce 100644 --- a/main/src/main/scala/sbt/internal/Aggregation.scala +++ b/main/src/main/scala/sbt/internal/Aggregation.scala @@ -266,29 +266,40 @@ object Aggregation { else extra.aggregates.forward(ref) } + /** + * Compute the reverse aggregate keys of all the `keys` at once. + * This is more performant than computing the revere aggregate keys of each key individually + * because of the duplicates. One aggregate key is the aggregation of many keys. + */ + def reverseAggregate[Proj]( + keys: Set[ScopedKey[?]], + extra: BuildUtil[Proj], + ): Iterable[ScopedKey[?]] = + val mask = ScopeMask() + def recur(keys: Set[ScopedKey[?]], acc: Set[ScopedKey[?]]): Set[ScopedKey[?]] = + if keys.isEmpty then acc + else + val aggKeys = for + key <- keys + ref <- projectAggregates(key.scope.project.toOption, extra, reverse = true) + toResolve = key.scope.copy(project = Select(ref)) + resolved = Resolve(extra, Zero, key.key, mask)(toResolve) + scoped = ScopedKey(resolved, key.key) + if !acc.contains(scoped) + yield scoped + val filteredAggKeys = aggKeys.filter(aggregationEnabled(_, extra.data)) + // recursive because an aggregate project can be aggregated in another aggregate project + recur(filteredAggKeys, acc ++ filteredAggKeys) + recur(keys, keys) + def aggregate[A1, Proj]( key: ScopedKey[A1], rawMask: ScopeMask, - extra: BuildUtil[Proj], - reverse: Boolean = false + extra: BuildUtil[Proj] ): Seq[ScopedKey[A1]] = val mask = rawMask.copy(project = true) Dag.topologicalSort(key): (k) => - if reverse then reverseAggregatedKeys(k, extra, mask) - else if aggregationEnabled(k, extra.data) then aggregatedKeys(k, extra, mask) - else Nil - - def reverseAggregatedKeys[T]( - key: ScopedKey[T], - extra: BuildUtil[?], - mask: ScopeMask - ): Seq[ScopedKey[T]] = - projectAggregates(key.scope.project.toOption, extra, reverse = true) flatMap { ref => - val toResolve = key.scope.copy(project = Select(ref)) - val resolved = Resolve(extra, Zero, key.key, mask)(toResolve) - val skey = ScopedKey(resolved, key.key) - if (aggregationEnabled(skey, extra.data)) skey :: Nil else Nil - } + if aggregationEnabled(k, extra.data) then aggregatedKeys(k, extra, mask) else Nil def aggregatedKeys[T]( key: ScopedKey[T], diff --git a/main/src/main/scala/sbt/internal/KeyIndex.scala b/main/src/main/scala/sbt/internal/KeyIndex.scala index 4c6be3ef4..9b3545413 100644 --- a/main/src/main/scala/sbt/internal/KeyIndex.scala +++ b/main/src/main/scala/sbt/internal/KeyIndex.scala @@ -29,30 +29,14 @@ object KeyIndex { } def aggregate( - known: Iterable[ScopedKey[?]], + known: Set[ScopedKey[?]], extra: BuildUtil[?], projects: Map[URI, Set[String]], configurations: Map[String, Seq[Configuration]] - ): ExtendableKeyIndex = { - /* - * Used to be: - * (base(projects, configurations) /: known) { (index, key) => - * index.addAggregated(key, extra) - * } - * This was a significant serial bottleneck during project loading that we can work around by - * computing the aggregations in parallel and then bulk adding them to the index. - */ - import scala.collection.parallel.CollectionConverters.* - val toAggregate = known.par.map { - case key if validID(key.key.label) => - Aggregation.aggregate(key, ScopeMask(), extra, reverse = true) - case _ => Nil - } - toAggregate.foldLeft(base(projects, configurations)) { - case (index, Nil) => index - case (index, keys) => keys.foldLeft(index)(_.add(_)) - } - } + ): ExtendableKeyIndex = + Aggregation + .reverseAggregate(known.filter(k => validID(k.key.label)), extra) + .foldLeft(base(projects, configurations))(_.add(_)) private def base( projects: Map[URI, Set[String]], @@ -278,7 +262,7 @@ private[sbt] final class KeyIndex0(val data: BuildIndex) extends ExtendableKeyIn def addAggregated(scoped: ScopedKey[?], extra: BuildUtil[?]): ExtendableKeyIndex = if (validID(scoped.key.label)) { - val aggregateProjects = Aggregation.aggregate(scoped, ScopeMask(), extra, reverse = true) + val aggregateProjects = Aggregation.reverseAggregate(Set(scoped), extra) aggregateProjects.foldLeft(this: ExtendableKeyIndex)(_.add(_)) } else this diff --git a/main/src/main/scala/sbt/internal/Load.scala b/main/src/main/scala/sbt/internal/Load.scala index 4c659f064..d88cdbab7 100755 --- a/main/src/main/scala/sbt/internal/Load.scala +++ b/main/src/main/scala/sbt/internal/Load.scala @@ -376,7 +376,7 @@ private[sbt] object Load { ): StructureIndex = { val keys = Index.allKeys(settings) val attributeKeys = data.attributeKeys ++ keys.map(_.key) - val scopedKeys = (keys ++ data.keys).toVector + val scopedKeys = keys ++ data.keys val projectsMap = projects.view.mapValues(_.defined.keySet).toMap val configsMap: Map[String, Seq[Configuration]] = projects.values.flatMap(bu => bu.defined map { case (k, v) => (k, v.configurations) }).toMap