Improve sbt build load time by 25%

The sbt project load made a number of relatively inefficient
transformations of scala collecitons. I went through and found the slow
parts during project loading and made my best attempt at fixing them.
The most significant changes I made were in places using IMap. An IMap
is more or less a wrapper around an immutable Map. It can be much faster
to construct an IMap by creating a java mutable hashmap, wrapping it a
scala Map that delegates to the underlying java hashmap (with a copy on
write if the map is modified) and constructing the IMap from the wrapped
map. It was also in many cases to parallelize some transformations
wherever the order didn't matter.

After applying all of these changes, I found that loading the sbt
project took generally between 8.5 and 9 seconds on my laptop. With
1.3.13, it hovered around 11.5 seconds. I saw a similar speedup in zinc.
The biggest specific improvement was that generating the compiled map
dropped from between 3.5-4 seconds to pretty consistently being around
1.5 seconds.
This commit is contained in:
Ethan Atkins 2020-07-26 10:27:48 -07:00
parent 627a79c254
commit 38d67cfdf0
6 changed files with 170 additions and 43 deletions

View File

@ -0,0 +1,17 @@
/*
* sbt
* Copyright 2011 - 2018, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*/
package sbt.internal.util
import scala.collection.JavaConverters._
private[util] class WrappedMap[K, V](val jmap: java.util.Map[K, V]) extends Map[K, V] {
def +[V1 >: V](kv: (K, V1)): scala.collection.immutable.Map[K, V1] =
jmap.asScala.toMap + kv
def -(key: K): scala.collection.immutable.Map[K, V] = jmap.asScala.toMap - key
def get(key: K): Option[V] = Option(jmap.get(key))
def iterator: Iterator[(K, V)] = jmap.asScala.iterator
}

View File

@ -0,0 +1,18 @@
/*
* sbt
* Copyright 2011 - 2018, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*/
package sbt.internal.util
import scala.collection.JavaConverters._
private[util] class WrappedMap[K, V](val jmap: java.util.Map[K, V]) extends Map[K, V] {
def removed(key: K): scala.collection.immutable.Map[K, V] = jmap.asScala.toMap.removed(key)
def updated[V1 >: V](key: K, value: V1): scala.collection.immutable.Map[K, V1] =
jmap.asScala.toMap.updated(key, value)
def get(key: K): Option[V] = Option(jmap.get(key))
def iterator: Iterator[(K, V)] = jmap.asScala.iterator
}

View File

@ -55,7 +55,10 @@ object IMap {
*/
def empty[K[_], V[_]]: IMap[K, V] = new IMap0[K, V](Map.empty)
private[this] class IMap0[K[_], V[_]](backing: Map[K[_], V[_]])
private[sbt] def fromJMap[K[_], V[_]](map: java.util.Map[K[_], V[_]]): IMap[K, V] =
new IMap0[K, V](new WrappedMap(map))
private[sbt] class IMap0[K[_], V[_]](val backing: Map[K[_], V[_]])
extends AbstractRMap[K, V]
with IMap[K, V] {
def get[T](k: K[T]): Option[V[T]] = (backing get k).asInstanceOf[Option[V[T]]]
@ -69,15 +72,16 @@ object IMap {
new IMap0[K, V2](Map(backing.iterator.map { case (k, v) => k -> f(v) }.toArray: _*))
def mapSeparate[VL[_], VR[_]](f: V ~> λ[T => Either[VL[T], VR[T]]]) = {
val mapped = backing.iterator.map {
val left = new java.util.concurrent.ConcurrentHashMap[K[_], VL[_]]
val right = new java.util.concurrent.ConcurrentHashMap[K[_], VR[_]]
Par(backing.toVector).foreach {
case (k, v) =>
f(v) match {
case Left(l) => Left((k, l)): Either[(K[_], VL[_]), (K[_], VR[_])]
case Right(r) => Right((k, r)): Either[(K[_], VL[_]), (K[_], VR[_])]
case Left(l) => left.put(k, l)
case Right(r) => right.put(k, r)
}
}
val (l, r) = Util.separateE[(K[_], VL[_]), (K[_], VR[_])](mapped.toList)
(new IMap0[K, VL](l.toMap), new IMap0[K, VR](r.toMap))
(new IMap0[K, VL](new WrappedMap(left)), new IMap0[K, VR](new WrappedMap(right)))
}
def toSeq = backing.toSeq

View File

@ -173,10 +173,15 @@ trait Init[ScopeType] {
)
private[this] def applyDefaults(ss: Seq[Setting[_]]): Seq[Setting[_]] = {
val (defaults, others) = Util.separate[Setting[_], DefaultSetting[_], Setting[_]](ss) {
case u: DefaultSetting[_] => Left(u); case s => Right(s)
val result = new java.util.LinkedHashSet[Setting[_]]
val others = new java.util.ArrayList[Setting[_]]
ss.foreach {
case u: DefaultSetting[_] => result.add(u)
case r => others.add(r)
}
(defaults.distinct: Seq[Setting[_]]) ++ others
result.addAll(others)
import scala.collection.JavaConverters._
result.asScala.toVector
}
def compiled(init: Seq[Setting[_]], actual: Boolean = true)(
@ -187,11 +192,12 @@ trait Init[ScopeType] {
val initDefaults = applyDefaults(init)
// inject derived settings into scopes where their dependencies are directly defined
// and prepend per-scope settings
val derived = deriveAndLocal(initDefaults)
val derived = deriveAndLocal(initDefaults, mkDelegates(delegates))
// group by Scope/Key, dropping dead initializations
val sMap: ScopedMap = grouped(derived)
// delegate references to undefined values according to 'delegates'
val dMap: ScopedMap = if (actual) delegate(sMap)(delegates, display) else sMap
val dMap: ScopedMap =
if (actual) delegate(sMap)(delegates, display) else sMap
// merge Seq[Setting[_]] into Compiled
compile(dMap)
}
@ -223,15 +229,39 @@ trait Init[ScopeType] {
def sort(cMap: CompiledMap): Seq[Compiled[_]] =
Dag.topologicalSort(cMap.values)(_.dependencies.map(cMap))
def compile(sMap: ScopedMap): CompiledMap =
sMap.toTypedSeq.map {
case sMap.TPair(k, ss) =>
val deps = ss.flatMap(_.dependencies).toSet
(k, new Compiled(k, deps, ss))
}.toMap
def compile(sMap: ScopedMap): CompiledMap = sMap match {
case m: IMap.IMap0[ScopedKey, SettingSeq] @unchecked =>
Par(m.backing.toVector)
.map {
case (k, ss) =>
val deps = ss.flatMap(_.dependencies).toSet
(
k,
new Compiled(k.asInstanceOf[ScopedKey[Any]], deps, ss.asInstanceOf[SettingSeq[Any]])
)
}
.toVector
.toMap
case _ =>
sMap.toTypedSeq.map {
case sMap.TPair(k, ss) =>
val deps = ss.flatMap(_.dependencies)
(k, new Compiled(k, deps, ss))
}.toMap
}
def grouped(init: Seq[Setting[_]]): ScopedMap =
init.foldLeft(IMap.empty: ScopedMap)((m, s) => add(m, s))
def grouped(init: Seq[Setting[_]]): ScopedMap = {
val result = new java.util.HashMap[ScopedKey[_], Seq[Setting[_]]]
init.foreach { s =>
result.putIfAbsent(s.key, Vector(s)) match {
case null =>
case ss => result.put(s.key, if (s.definitive) Vector(s) else ss :+ s)
}
}
IMap.fromJMap[ScopedKey, SettingSeq](
result.asInstanceOf[java.util.Map[ScopedKey[_], SettingSeq[_]]]
)
}
def add[T](m: ScopedMap, s: Setting[T]): ScopedMap =
m.mapValue[T](s.key, Vector.empty[Setting[T]], ss => append(ss, s))
@ -251,22 +281,36 @@ trait Init[ScopeType] {
delegateForKey(sMap, k, delegates(k.scope), ref, selfRefOk || !isFirst)
}
type ValidatedSettings[T] = Either[Seq[Undefined], SettingSeq[T]]
val f = λ[SettingSeq ~> ValidatedSettings] { (ks: Seq[Setting[_]]) =>
val (undefs, valid) = Util.separate(ks.zipWithIndex) {
case (s, i) => s validateKeyReferenced refMap(s, i == 0)
}
if (undefs.isEmpty) Right(valid) else Left(undefs.flatten)
import scala.collection.JavaConverters._
val undefined = new java.util.ArrayList[Undefined]
val result = new java.util.concurrent.ConcurrentHashMap[ScopedKey[_], Any]
val backing = sMap.toSeq
Par(backing).foreach {
case (key, settings) =>
val valid = new java.util.ArrayList[Setting[_]]
val undefs = new java.util.ArrayList[Undefined]
def validate(s: Setting[_], first: Boolean): Unit = {
s.validateKeyReferenced(refMap(s, first)) match {
case Right(v) => valid.add(v); ()
case Left(us) => us.foreach(u => undefs.add(u))
}
}
settings.headOption match {
case Some(s) =>
validate(s, true)
settings.tail.foreach(validate(_, false))
case _ =>
}
if (undefs.isEmpty) result.put(key, valid.asScala.toVector)
else undefined.addAll(undefs)
}
type Undefs[_] = Seq[Undefined]
val (undefineds, result) = sMap.mapSeparate[Undefs, SettingSeq](f)
if (undefineds.isEmpty)
result
if (undefined.isEmpty)
IMap.fromJMap[ScopedKey, SettingSeq](
result.asInstanceOf[java.util.Map[ScopedKey[_], SettingSeq[_]]]
)
else
throw Uninitialized(sMap.keys.toSeq, delegates, undefineds.values.flatten.toList, false)
throw Uninitialized(sMap.keys.toSeq, delegates, undefined.asScala.toList, false)
}
private[this] def delegateForKey[T](
@ -436,19 +480,59 @@ trait Init[ScopeType] {
} else ""
}
/**
* The intersect method was calling Seq.contains which is very slow compared
* to converting the Seq to a Set and calling contains on the Set. This
* private trait abstracts out the two ways that Seq[ScopeType] was actually
* used, `contains` and `exists`. In mkDelegates, we can create and cache
* instances of Delegates so that we don't have to repeatedly convert the
* same Seq to Set. On a 2020 16" macbook pro, creating the compiled map
* for the sbt project is roughly 2 seconds faster after this change
* (about 3.5 seconds before compared to about 1.5 seconds after)
*
*/
private trait Delegates {
def contains(s: ScopeType): Boolean
def exists(f: ScopeType => Boolean): Boolean
}
private[this] def mkDelegates(delegates: ScopeType => Seq[ScopeType]): ScopeType => Delegates = {
val delegateMap = new java.util.concurrent.ConcurrentHashMap[ScopeType, Delegates]
s =>
delegateMap.get(s) match {
case null =>
val seq = delegates(s)
val set = seq.toSet
val d = new Delegates {
override def contains(s: ScopeType): Boolean = set.contains(s)
override def exists(f: ScopeType => Boolean): Boolean = seq.exists(f)
}
delegateMap.put(s, d)
d
case d => d
}
}
/**
* Intersects two scopes, returning the more specific one if they intersect, or None otherwise.
*/
private[sbt] def intersect(s1: ScopeType, s2: ScopeType)(
implicit delegates: ScopeType => Seq[ScopeType]
): Option[ScopeType] = intersectDelegates(s1, s2, mkDelegates(delegates))
/**
* Intersects two scopes, returning the more specific one if they intersect, or None otherwise.
*/
private def intersectDelegates(
s1: ScopeType,
s2: ScopeType,
delegates: ScopeType => Delegates
): Option[ScopeType] =
if (delegates(s1).contains(s2)) Some(s1) // s1 is more specific
else if (delegates(s2).contains(s1)) Some(s2) // s2 is more specific
else None
private[this] def deriveAndLocal(init: Seq[Setting[_]])(
implicit delegates: ScopeType => Seq[ScopeType],
scopeLocal: ScopeLocal
private[this] def deriveAndLocal(init: Seq[Setting[_]], delegates: ScopeType => Delegates)(
implicit scopeLocal: ScopeLocal
): Seq[Setting[_]] = {
import collection.mutable
@ -467,9 +551,10 @@ trait Init[ScopeType] {
}
// separate `derived` settings from normal settings (`defs`)
val (derived, rawDefs) = Util.separate[Setting[_], Derived, Setting[_]](init) {
case d: DerivedSetting[_] => Left(new Derived(d)); case s => Right(s)
}
val (derived, rawDefs) =
Util.separate[Setting[_], Derived, Setting[_]](init) {
case d: DerivedSetting[_] => Left(new Derived(d)); case s => Right(s)
}
val defs = addLocal(rawDefs)(scopeLocal)
// group derived settings by the key they define
@ -518,7 +603,7 @@ trait Init[ScopeType] {
val scope = sk.scope
def localAndDerived(d: Derived): Seq[Setting[_]] = {
def definingScope = d.setting.key.scope
val outputScope = intersect(scope, definingScope)
val outputScope = intersectDelegates(scope, definingScope, delegates)
outputScope collect {
case s if !d.inScopes.contains(s) && d.setting.filter(s) =>
val local = d.dependencies.flatMap(dep => scopeLocal(ScopedKey(s, dep)))

View File

@ -361,9 +361,12 @@ object Index {
settings: Set[AttributeKey[_]]
)(label: AttributeKey[_] => String): Map[String, AttributeKey[_]] = {
val multiMap = settings.groupBy(label)
val duplicates = multiMap collect { case (k, xs) if xs.size > 1 => (k, xs.map(_.manifest)) } collect {
case (k, xs) if xs.size > 1 => (k, xs)
}
val duplicates = multiMap.iterator
.collect { case (k, xs) if xs.size > 1 => (k, xs.map(_.manifest)) }
.collect {
case (k, xs) if xs.size > 1 => (k, xs)
}
.toVector
if (duplicates.isEmpty)
multiMap.collect { case (k, v) if validID(k) => (k, v.head) } toMap
else

View File

@ -22,7 +22,7 @@ object KeyIndex {
projects: Map[URI, Set[String]],
configurations: Map[String, Seq[Configuration]]
): ExtendableKeyIndex =
known.foldLeft(base(projects, configurations)) { _ add _ }
known.par.foldLeft(base(projects, configurations)) { _ add _ }
def aggregate(
known: Iterable[ScopedKey[_]],
extra: BuildUtil[_],