[2.x] fix: Collapse duplicate subtrees in dependency tree rendering (#9226)

Fixes #6886.

dependencyTree, dependencyBrowseTree, and inspect tree re-explore
the same node once per incoming edge. In a DAG with N levels and M
children per node the rendered output is O(M^N) -- the OP needed
>16 GB heap, #7360 has a 6 GB heap dump, and Friendseeker's analysis
on the issue showed the exponential re-traversal directly.

Fix: track a visited set across the renderer's recursion. The first
time a node is encountered it is rendered in full; on subsequent
visits the entry collapses to a one-line +- <id> (*) (ASCII) or a
<id> (*) leaf (JSON), matching Maven's dependency:tree (*)
convention. Cycle detection (separate parents set, (cycle) marker)
is unchanged.

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
BrianHotopp 2026-05-18 19:15:47 -04:00 committed by GitHub
parent 536e676481
commit 3eeac9deae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 364 additions and 35 deletions

View File

@ -96,11 +96,20 @@ object Graph {
}) +
s.slice(at + 1, s.length)
else s
def toAsciiLines(node: A, level: Int, parents: Set[A]): Vector[String] =
// Owned by toAsciiLines; grows monotonically over one render.
import scala.collection.mutable
val visited = mutable.Set.empty[A]
def toAsciiLines(node: A, level: Int, parents: Set[A]): Vector[String] = {
val prefix = if (level == 0) "" else "+-"
if (parents contains node) // cycle
Vector(limitLine((twoSpaces * level) + "#-" + display(node) + " (cycle)"))
else if (visited contains node)
// `prefix` is always "+-" here in practice (root can't re-enter),
// but mirror the level-0 form for symmetry.
Vector(limitLine((twoSpaces * level) + prefix + display(node) + " (*)"))
else {
val line = limitLine((twoSpaces * level) + (if (level == 0) "" else "+-") + display(node))
visited += node
val line = limitLine((twoSpaces * level) + prefix + display(node))
val cs = Vector(children(node)*)
val childLines = cs map {
toAsciiLines(_, level + 1, parents + node)
@ -116,6 +125,7 @@ object Graph {
}
line +: withBar
}
}
toAsciiLines(top, 0, Set.empty).mkString("\n")
}

View File

@ -17,12 +17,12 @@ import sjsonnew.support.scalajson.unsafe.{ CompactPrinter, Converter }
import java.io.{ File, FileOutputStream, InputStream, OutputStream }
import java.net.URI
import scala.collection.mutable
import scala.util.Using
object TreeView {
def createJson(graph: ModuleGraph): String = {
val moduleModels = graph.roots
.map(module => processSubtree(graph, module))
val moduleModels = graph.roots.map(module => processSubtree(graph, module))
val js = moduleModels.map(Converter.toJsonUnsafe(_))
js.map(CompactPrinter).mkString("[", ",", "]")
}
@ -45,26 +45,41 @@ object TreeView {
graph: ModuleGraph,
module: Module,
parents: Set[GraphModuleId] = Set()
): ModuleModel =
processSubtreeImpl(graph, module, parents, mutable.Set.empty[GraphModuleId])
// `visited` is owned by the caller; do not share it across renders.
private def processSubtreeImpl(
graph: ModuleGraph,
module: Module,
parents: Set[GraphModuleId],
visited: mutable.Set[GraphModuleId]
): ModuleModel = {
val cycle = parents.contains(module.id)
val dependencies = if (cycle) List() else graph.dependencyMap.getOrElse(module.id, List())
val duplicate = !cycle && visited.contains(module.id)
val dependencies =
if (cycle || duplicate) List()
else {
visited += module.id
graph.dependencyMap.getOrElse(module.id, List())
}
val children =
dependencies
.map(dependency => processSubtree(graph, dependency, parents + module.id))
.map(dependency => processSubtreeImpl(graph, dependency, parents + module.id, visited))
.toVector
moduleAsModuleAgain(module, cycle, children)
ModuleModel(displayText(module, cycle, duplicate), children)
}
private def moduleAsModuleAgain(
module: Module,
isCycle: Boolean,
children: Vector[ModuleModel]
): ModuleModel = {
val eviction = module.evictedByVersion.map(version => s" (evicted by $version)").getOrElse("")
val cycle = if (isCycle) " (cycle)" else ""
val error = module.error.map(err => s" (errors: $err)").getOrElse("")
val text = module.id.idString + eviction + error + cycle
ModuleModel(text, children)
// Suffix order is pinned by TreeViewTest "concatenate marker suffixes
// in a stable order"; change here means change the test.
private def displayText(module: Module, isCycle: Boolean, isDuplicate: Boolean): String = {
val suffixes = Vector(
module.evictedByVersion.map(v => s" (evicted by $v)"),
module.error.map(err => s" (errors: $err)"),
if (isCycle) Some(" (cycle)") else None,
if (isDuplicate) Some(" (*)") else None,
).flatten
module.id.idString + suffixes.mkString
}
def saveResource(resourcePath: String, to: File): Unit = {

View File

@ -0,0 +1,78 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*/
package sbt.internal.graph.rendering
import sbt.internal.Graph
import verify.BasicTestSuite
object AsciiTreeTest extends BasicTestSuite:
private def render(adj: Map[Int, Seq[Int]], root: Int, width: Int = 80): String =
Graph.toAscii[Int](root, n => adj.getOrElse(n, Nil), _.toString, width)
test("strict tree renders without markers") {
val adj = Map(0 -> Seq(1, 2), 1 -> Seq(3))
val out = render(adj, 0)
assert(!out.contains("(*)"), s"unexpected (*):\n$out")
assert(!out.contains("(cycle)"), s"unexpected (cycle):\n$out")
assert(out.contains("3"), s"node 3 missing:\n$out")
}
test("diamond DAG collapses the duplicate subtree to a single (*) line") {
// 0
// / \
// 1 2
// \ /
// 3
val adj = Map(0 -> Seq(1, 2), 1 -> Seq(3), 2 -> Seq(3))
val out = render(adj, 0)
assert(out.contains("3 (*)"), s"missing collapsed `3 (*)`:\n$out")
val nonDup = out.linesIterator.filter(l => l.contains("3") && !l.contains("(*)")).toList
assert(nonDup.size == 1, s"node 3 must render exactly once in non-collapsed form: $nonDup")
}
test("cycle uses (cycle), not (*)") {
val adj = Map(0 -> Seq(1), 1 -> Seq(0))
val out = render(adj, 0)
assert(out.contains("(cycle)"), s"missing (cycle):\n$out")
assert(!out.contains("(*)"), s"unexpected (*):\n$out")
}
test("canonical occurrence is the first-visited in DFS order") {
// top -> [a, b]; a -> [b]. DFS visits `a` first and expands `b`
// under it; `b` as `top`'s direct second child must collapse.
val adj = Map(0 -> Seq(1, 2), 1 -> Seq(2))
val out = render(adj, 0)
val lines = out.linesIterator.toVector
val firstTwo = lines.indexWhere(l => l.contains("2") && !l.contains("(*)"))
val collapsedTwo = lines.indexWhere(l => l.contains("2 (*)"))
assert(firstTwo >= 0, s"non-duplicate `2` missing:\n$out")
assert(collapsedTwo >= 0, s"collapsed `2 (*)` missing:\n$out")
assert(
firstTwo < collapsedTwo,
s"non-duplicate `2` must render before `(*)` copy (got $firstTwo < $collapsedTwo):\n$out"
)
}
test("exponential DAG renders linear-sized output") {
// 12 nodes; each i depends on every j > i. Without dedup the
// output is O(2^11) lines.
val n = 12
val adj: Map[Int, Seq[Int]] =
(0 until n).map(i => i -> ((i + 1) until n).toVector).toMap
val out = render(adj, 0, width = 200)
val lineCount = out.linesIterator.size
assert(lineCount < 200, s"DAG render exploded to $lineCount lines")
(0 until n).foreach { i =>
val nonDup = out.linesIterator.exists(l => l.contains(s"$i") && !l.contains("(*)"))
assert(nonDup, s"node $i never rendered in non-collapsed form")
}
}
end AsciiTreeTest

View File

@ -11,7 +11,7 @@ package sbt.internal.graph.rendering
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers
import sbt.internal.graph.rendering.TreeView.createJson
import sbt.internal.graph.{ GraphModuleId, Module, ModuleGraph, ModuleModel }
import sbt.internal.graph.{ Edge, GraphModuleId, Module, ModuleGraph, ModuleModel }
class TreeViewTest extends AnyFlatSpec with Matchers {
val modA = GraphModuleId("orgA", "nameA", "1.0")
@ -51,4 +51,120 @@ class TreeViewTest extends AnyFlatSpec with Matchers {
)
assert(TreeView.processSubtree(graph, Module(modC), Set()) == expected)
}
// -- sbt/sbt#6886: DAG dedup --
it should "collapse a duplicate subtree to a leaf marked with (*)" in {
// a -> b -> c
// a -> c (c appears under two paths)
val a = GraphModuleId("o", "a", "1")
val b = GraphModuleId("o", "b", "1")
val c = GraphModuleId("o", "c", "1")
val d = GraphModuleId("o", "d", "1") // grandchild of c -- proves c's subtree only
// materializes once
val g = ModuleGraph(
nodes = Seq(Module(a), Module(b), Module(c), Module(d)),
edges = Seq(a -> b, a -> c, b -> c, c -> d)
)
val tree = TreeView.processSubtree(g, Module(a))
val expected = ModuleModel(
"o:a:1",
Vector(
ModuleModel(
"o:b:1",
Vector(
ModuleModel("o:c:1", Vector(ModuleModel("o:d:1", Vector())))
)
),
// second occurrence collapsed, no children
ModuleModel("o:c:1 (*)", Vector())
)
)
assert(tree == expected, s"got: $tree")
}
it should "render output linear in DAG node count for an exponential graph" in {
// 8 nodes, each pointing to all later ones. Without dedup the JSON
// would be ~2^7 = 128 inner ModuleModels; with dedup it should be
// strictly bounded by the DAG node count (8) plus the duplicate
// collapsed leaves.
val nodes = ('a' to 'h').map(c => GraphModuleId("o", c.toString, "1")).toVector
val edges: Seq[Edge] = for
i <- nodes.indices
j <- (i + 1) until nodes.size
yield nodes(i) -> nodes(j)
val g = ModuleGraph(nodes = nodes.map(Module(_)), edges = edges)
// Helper: count *unique* texts emitted (each module rendered once
// fully) and total ModuleModel count (with collapsed duplicates).
def walk(m: ModuleModel): (Set[String], Int) =
m.children.foldLeft((Set(m.text), 1)) { case ((seen, n), child) =>
val (s, k) = walk(child)
(seen ++ s, n + k)
}
val (texts, total) = walk(TreeView.processSubtree(g, Module(nodes.head)))
// Every module appears as a non-duplicate text exactly once.
nodes.foreach(n => assert(texts.contains(n.idString), s"missing $n in texts: $texts"))
// Without dedup, total would be 2^7 = 128. With dedup, total is the
// sum 1 + 7 + 6 + 5 + 4 + 3 + 2 + 1 = 29 (node `a`'s full subtree
// plus a duplicate leaf for every other path).
assert(total < 50, s"DAG rendering exploded: $total nodes")
}
it should "concatenate marker suffixes in a stable order (eviction, then (*))" in {
// A module that's both evicted *and* shows up twice in the DAG.
// Pins the suffix order so any future renderer refactor can't
// silently change `(evicted by 2.0) (*)` to `(*) (evicted by 2.0)`
// for callers parsing these lines.
val a = GraphModuleId("o", "a", "1")
val b = GraphModuleId("o", "b", "1")
val c = GraphModuleId("o", "c", "1")
val evicted = Module(c, evictedByVersion = Some("2.0"))
val g = ModuleGraph(
nodes = Seq(Module(a), Module(b), evicted),
edges = Seq(a -> b, a -> c, b -> c)
)
val tree = TreeView.processSubtree(g, Module(a))
// First occurrence (under b): "o:c:1 (evicted by 2.0)"; collapsed
// occurrence (direct child of a): "o:c:1 (evicted by 2.0) (*)".
val texts = collectTexts(tree)
assert(
texts.contains("o:c:1 (evicted by 2.0)"),
s"missing first-occurrence text; got: $texts"
)
assert(
texts.contains("o:c:1 (evicted by 2.0) (*)"),
s"missing collapsed-occurrence text; got: $texts"
)
}
private def collectTexts(m: ModuleModel): Set[String] =
m.children.foldLeft(Set(m.text))((s, c) => s ++ collectTexts(c))
it should "report a cycle differently than a duplicate" in {
// a -> b -> a (cycle)
// a -> c (no cycle, no duplicate)
val a = GraphModuleId("o", "a", "1")
val b = GraphModuleId("o", "b", "1")
val c = GraphModuleId("o", "c", "1")
val g = ModuleGraph(
nodes = Seq(Module(a), Module(b), Module(c)),
edges = Seq(a -> b, b -> a, a -> c)
)
val tree = TreeView.processSubtree(g, Module(a))
val expected = ModuleModel(
"o:a:1",
Vector(
ModuleModel(
"o:b:1",
Vector(ModuleModel("o:a:1 (cycle)", Vector()))
),
ModuleModel("o:c:1", Vector())
)
)
assert(tree == expected, s"got: $tree")
}
}

View File

@ -0,0 +1,66 @@
## Dependency tree: duplicate-subtree collapse
`dependencyTree`, `dependencyBrowseTree`, and `inspect tree` now collapse
duplicate subtrees in a DAG to a single line marked `(*)`, matching the
convention used by Maven's `dependency:tree`. The first occurrence is
rendered in full; subsequent occurrences appear as `+- <id> (*)`.
This fixes [#6886][i6886]: rendering a deep diamond DAG no longer
produces `O(M^N)` output (and the OOMs that came with it).
### Output change
Before:
```
o:root_2.13:0.1
+-o:subA_2.13:0.1 [S]
| +-o:common_2.13:0.1 [S]
| | +-org.scala-lang:scala-library:2.13.16 [S]
| +-org.scala-lang:scala-library:2.13.16 [S]
+-o:subB_2.13:0.1 [S]
| +-o:common_2.13:0.1 [S] # full subtree again
| | +-org.scala-lang:scala-library:2.13.16 [S]
| +-org.scala-lang:scala-library:2.13.16 [S]
+-org.scala-lang:scala-library:2.13.16 [S]
```
After:
```
o:root_2.13:0.1
+-o:subA_2.13:0.1 [S]
| +-o:common_2.13:0.1 [S]
| | +-org.scala-lang:scala-library:2.13.16 [S]
| +-org.scala-lang:scala-library:2.13.16 [S]
+-o:subB_2.13:0.1 [S]
| +-o:common_2.13:0.1 [S] (*) # collapsed
| +-org.scala-lang:scala-library:2.13.16 [S] (*)
+-org.scala-lang:scala-library:2.13.16 [S] (*)
```
### Affected surfaces
- `dependencyTree`: ASCII output (also `dependencyTreeList`,
`dependencyTreeStats`).
- `dependencyBrowseTree`: JSON / HTML view.
- `inspect tree`: the setting-graph renderer is the same code path, so
the `(*)` marker shows up there too. Most users don't think of
`inspect tree` as "the dependency tree" -- this note is the
heads-up.
### Contract for tooling consumers
A line whose entry ends with `(*)` is a reference to the canonical
(first-rendered) occurrence of that node within the same render.
Tools parsing `dependencyTree` / `dependencyBrowseTree` output should
treat `<id> (*)` as a back-pointer rather than a distinct dependency.
### Scope
Dedup is currently within a single root's subtree. Cross-root dedup
(when a `ModuleGraph` has multiple roots that share a transitive
closure) is tracked separately as [#9227][i9227].
[i6886]: https://github.com/sbt/sbt/issues/6886
[i9227]: https://github.com/sbt/sbt/issues/9227

View File

@ -7,7 +7,4 @@ whenis20:whenis20_..
|
+-org.typeleve..
+-org.typele..
+-org.type..
+-org.ty..
+-org.ty..

View File

@ -6,8 +6,5 @@ whenisdefault:whenisdefault_2.13:0.1.0-SNAPSHOT [S]
| +-org.typelevel:simulacrum-scalafix-annotations_2.13:0.5.4 [S]
|
+-org.typelevel:cats-effect-std_2.13:3.1.0 [S]
+-org.typelevel:cats-effect-kernel_2.13:3.1.0 [S]
+-org.typelevel:cats-core_2.13:2.6.0 [S]
+-org.typelevel:cats-kernel_2.13:2.6.0 [S]
+-org.typelevel:simulacrum-scalafix-annotations_2.13:0.5.4 [S]
+-org.typelevel:cats-effect-kernel_2.13:3.1.0 [S] (*)

View File

@ -0,0 +1,49 @@
// Regression test for sbt/sbt#6886. Local multi-project diamond: the
// root depends on subA and subB, both of which depend on common. So
// `dependencyTree` for the root must render `common` fully on its
// first occurrence and collapse to `(*)` on the second. Local
// fixture (no external maven dependencies) so this test stays stable
// across Coursier behavior changes and external library churn.
ThisBuild / organization := "org.example"
ThisBuild / version := "0.1"
ThisBuild / scalaVersion := "2.13.16"
lazy val common = (project in file("common"))
.settings(name := "common")
lazy val subA = (project in file("subA"))
.settings(name := "subA")
.dependsOn(common)
lazy val subB = (project in file("subB"))
.settings(name := "subB")
.dependsOn(common)
lazy val root = (project in file("."))
.settings(name := "root")
.dependsOn(subA, subB)
.aggregate(common, subA, subB)
// Grep-based check, not full diff: Coursier may legitimately reorder
// siblings, and pinning exact line offsets makes the test brittle.
TaskKey[Unit]("check") := {
val tree = IO.read(file("target/tree.txt"))
// Tie the assertion to the specific diamond this fixture exercises:
// `common` must appear on at least one line that also carries the
// `(*)` marker. A regression where `common` specifically fails to
// collapse would otherwise slip through as long as scala-library
// (also reachable through both subA and subB) still collapsed.
val collapsedCommon = tree.linesIterator.exists(l =>
l.contains("common") && l.contains("(*)")
)
assert(
collapsedCommon,
s"expected `common` to appear with a `(*)` marker (diamond apex collapsed):\n$tree"
)
// Sanity: cycle marker must NOT appear -- collapse is for duplicate
// (DAG re-traversal), not cycle (graph back-edge).
assert(
!tree.contains("(cycle)"),
s"unexpected (cycle) marker in dependencyTree output:\n$tree"
)
}

View File

@ -0,0 +1,5 @@
# dependencyTree on the aggregating root prints the diamond shape:
# root -> subA, subB; both depend on common. The first occurrence of
# `common` is rendered fully; the second collapses to ` (*)`.
> dependencyTree --out target/tree.txt
> check

View File

@ -27,8 +27,7 @@ check := {
"| +-org.typelevel:cats-effect_2.13:3.1.0 [S]",
"| +-whatdependson:whatdependson_2.13:0.1.0-SNAPSHOT [S]",
"|",
"+-org.typelevel:cats-effect_2.13:3.1.0 [S]",
"+-whatdependson:whatdependson_2.13:0.1.0-SNAPSHOT [S]"
"+-org.typelevel:cats-effect_2.13:3.1.0 [S] (*)"
).mkString("\n")
}
@ -46,8 +45,7 @@ check := {
"| +-org.typelevel:cats-effect_2.13:3.1.0 [S]",
"| +-whatdependson:whatdependson_2.13:0.1.0-SNAPSHOT [S]",
"|",
"+-org.typelevel:cats-effect_2.13:3.1.0 [S]",
"+-whatdependson:whatdependson_2.13:0.1.0-SNAPSHOT [S]"
"+-org.typelevel:cats-effect_2.13:3.1.0 [S] (*)"
).mkString("\n")
checkOutput(withoutVersion.trim, expectedGraphWithoutVersion.trim)

View File

@ -28,8 +28,7 @@ check := {
"| +-org.typelevel:cats-effect_2.13:3.1.0 [S]",
"| +-whatdependson:whatdependson_2.13:0.1.0-SNAPSHOT [S]",
"|",
"+-org.typelevel:cats-effect_2.13:3.1.0 [S]",
"+-whatdependson:whatdependson_2.13:0.1.0-SNAPSHOT [S]"
"+-org.typelevel:cats-effect_2.13:3.1.0 [S] (*)"
).mkString("\n")
checkOutput(withVersion.trim, expectedGraphWithVersion)
@ -46,8 +45,7 @@ check := {
"| +-org.typelevel:cats-effect_2.13:3.1.0 [S]",
"| +-whatdependson:whatdependson_2.13:0.1.0-SNAPSHOT [S]",
"|",
"+-org.typelevel:cats-effect_2.13:3.1.0 [S]",
"+-whatdependson:whatdependson_2.13:0.1.0-SNAPSHOT [S]"
"+-org.typelevel:cats-effect_2.13:3.1.0 [S] (*)"
).mkString("\n")
checkOutput(withoutVersion.trim, expectedGraphWithoutVersion.trim)