Optimize indexing of aggregate keys

To build the index of all aggregate keys, we were computing the reverse
aggregation of each key, before indexing them.
And so each aggregate key was indexed many times, once for each
aggregated project. It was parallelized to reduce the latency.
In this PR, we compute the reverse aggregation of all the keys at once,
removing all duplication. We cannot parallelize this process anymore
but we don't need to, because it is a lot faster. It reduces the total
CPU time by 12%. The impact for the user depends on its number of cores.
This commit is contained in:
Adrien Piquerez 2024-11-11 15:24:04 +01:00
parent f4f185a1c1
commit 0ccf3325c8
3 changed files with 35 additions and 40 deletions

View File

@ -266,29 +266,40 @@ object Aggregation {
else extra.aggregates.forward(ref)
}
/**
* Compute the reverse aggregate keys of all the `keys` at once.
* This is more performant than computing the revere aggregate keys of each key individually
* because of the duplicates. One aggregate key is the aggregation of many keys.
*/
def reverseAggregate[Proj](
keys: Set[ScopedKey[?]],
extra: BuildUtil[Proj],
): Iterable[ScopedKey[?]] =
val mask = ScopeMask()
def recur(keys: Set[ScopedKey[?]], acc: Set[ScopedKey[?]]): Set[ScopedKey[?]] =
if keys.isEmpty then acc
else
val aggKeys = for
key <- keys
ref <- projectAggregates(key.scope.project.toOption, extra, reverse = true)
toResolve = key.scope.copy(project = Select(ref))
resolved = Resolve(extra, Zero, key.key, mask)(toResolve)
scoped = ScopedKey(resolved, key.key)
if !acc.contains(scoped)
yield scoped
val filteredAggKeys = aggKeys.filter(aggregationEnabled(_, extra.data))
// recursive because an aggregate project can be aggregated in another aggregate project
recur(filteredAggKeys, acc ++ filteredAggKeys)
recur(keys, keys)
def aggregate[A1, Proj](
key: ScopedKey[A1],
rawMask: ScopeMask,
extra: BuildUtil[Proj],
reverse: Boolean = false
extra: BuildUtil[Proj]
): Seq[ScopedKey[A1]] =
val mask = rawMask.copy(project = true)
Dag.topologicalSort(key): (k) =>
if reverse then reverseAggregatedKeys(k, extra, mask)
else if aggregationEnabled(k, extra.data) then aggregatedKeys(k, extra, mask)
else Nil
def reverseAggregatedKeys[T](
key: ScopedKey[T],
extra: BuildUtil[?],
mask: ScopeMask
): Seq[ScopedKey[T]] =
projectAggregates(key.scope.project.toOption, extra, reverse = true) flatMap { ref =>
val toResolve = key.scope.copy(project = Select(ref))
val resolved = Resolve(extra, Zero, key.key, mask)(toResolve)
val skey = ScopedKey(resolved, key.key)
if (aggregationEnabled(skey, extra.data)) skey :: Nil else Nil
}
if aggregationEnabled(k, extra.data) then aggregatedKeys(k, extra, mask) else Nil
def aggregatedKeys[T](
key: ScopedKey[T],

View File

@ -29,30 +29,14 @@ object KeyIndex {
}
def aggregate(
known: Iterable[ScopedKey[?]],
known: Set[ScopedKey[?]],
extra: BuildUtil[?],
projects: Map[URI, Set[String]],
configurations: Map[String, Seq[Configuration]]
): ExtendableKeyIndex = {
/*
* Used to be:
* (base(projects, configurations) /: known) { (index, key) =>
* index.addAggregated(key, extra)
* }
* This was a significant serial bottleneck during project loading that we can work around by
* computing the aggregations in parallel and then bulk adding them to the index.
*/
import scala.collection.parallel.CollectionConverters.*
val toAggregate = known.par.map {
case key if validID(key.key.label) =>
Aggregation.aggregate(key, ScopeMask(), extra, reverse = true)
case _ => Nil
}
toAggregate.foldLeft(base(projects, configurations)) {
case (index, Nil) => index
case (index, keys) => keys.foldLeft(index)(_.add(_))
}
}
): ExtendableKeyIndex =
Aggregation
.reverseAggregate(known.filter(k => validID(k.key.label)), extra)
.foldLeft(base(projects, configurations))(_.add(_))
private def base(
projects: Map[URI, Set[String]],
@ -278,7 +262,7 @@ private[sbt] final class KeyIndex0(val data: BuildIndex) extends ExtendableKeyIn
def addAggregated(scoped: ScopedKey[?], extra: BuildUtil[?]): ExtendableKeyIndex =
if (validID(scoped.key.label)) {
val aggregateProjects = Aggregation.aggregate(scoped, ScopeMask(), extra, reverse = true)
val aggregateProjects = Aggregation.reverseAggregate(Set(scoped), extra)
aggregateProjects.foldLeft(this: ExtendableKeyIndex)(_.add(_))
} else this

View File

@ -376,7 +376,7 @@ private[sbt] object Load {
): StructureIndex = {
val keys = Index.allKeys(settings)
val attributeKeys = data.attributeKeys ++ keys.map(_.key)
val scopedKeys = (keys ++ data.keys).toVector
val scopedKeys = keys ++ data.keys
val projectsMap = projects.view.mapValues(_.defined.keySet).toMap
val configsMap: Map[String, Seq[Configuration]] =
projects.values.flatMap(bu => bu.defined map { case (k, v) => (k, v.configurations) }).toMap