From bcd7fe1fbc6196f1242e0df3cab1c3ebc0df4ba8 Mon Sep 17 00:00:00 2001 From: BrianHotopp Date: Wed, 1 Jul 2026 17:02:50 -0400 Subject: [PATCH] [2.x] fix: Probe for a live server before refusing to start (#9337) Any IOException while creating the boot io socket was wrapped in ServerAlreadyBootingException and reported as "sbt thinks that server is already booting" with a stack trace, and non-interactive invocations exited with code 2. Permission or path-length problems with XDG_RUNTIME_DIR or the temp directory and Windows named-pipe access errors all hit this, blocking sbt entirely (#6777). Raw IOExceptions from the constructor (socket directory creation) were not caught at all and crashed startup. getSocketOrExit now connects to the socket (BootServerSocketProbe, shared with the test suite) to check for a live server before believing the exception. Co-authored-by: Claude Fable 5 --- .../java/sbt/internal/BootServerSocket.java | 5 ++ .../sbt/internal/BootServerSocketProbe.scala | 42 ++++++++++ .../sbt/internal/BootServerSocketSpec.scala | 77 +++++++++++++++++++ main/src/main/scala/sbt/Main.scala | 33 +++++--- notes/2.0.0/boot-socket-liveness.md | 16 ++++ 5 files changed, 161 insertions(+), 12 deletions(-) create mode 100644 main-command/src/main/scala/sbt/internal/BootServerSocketProbe.scala create mode 100644 main-command/src/test/scala/sbt/internal/BootServerSocketSpec.scala create mode 100644 notes/2.0.0/boot-socket-liveness.md diff --git a/main-command/src/main/java/sbt/internal/BootServerSocket.java b/main-command/src/main/java/sbt/internal/BootServerSocket.java index 4becac3ff..b7f4353bd 100644 --- a/main-command/src/main/java/sbt/internal/BootServerSocket.java +++ b/main-command/src/main/java/sbt/internal/BootServerSocket.java @@ -123,12 +123,17 @@ public class BootServerSocket implements AutoCloseable { bytes.put(b); clientSocketReads.put(ClientSocket.this); } else { + // close() deregisters from clientSockets like the write + // methods do; a dead entry left behind would block the + // NO_BOOT_CLIENTS_CONNECTED signal in inputStream.read. alive.set(false); + close(); } } } catch (IOException e) { alive.set(false); + close(); } } } catch (final Exception ex) { diff --git a/main-command/src/main/scala/sbt/internal/BootServerSocketProbe.scala b/main-command/src/main/scala/sbt/internal/BootServerSocketProbe.scala new file mode 100644 index 000000000..d5987c500 --- /dev/null +++ b/main-command/src/main/scala/sbt/internal/BootServerSocketProbe.scala @@ -0,0 +1,42 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + */ + +package sbt.internal + +import java.util.concurrent.{ CountDownLatch, TimeUnit } +import java.util.concurrent.atomic.AtomicBoolean +import sbt.protocol.ClientSocket +import scala.util.control.NonFatal + +private[sbt] object BootServerSocketProbe: + private val timeoutMillis = 2000L + + /** + * True only if something answers on the boot socket at `location`. A live server answers + * immediately, so the connect runs on a daemon thread bounded by [[timeoutMillis]]: the + * underlying native connect has no timeout and blocks indefinitely against a bound socket whose + * listen backlog is saturated, which must never hang startup. LinkageError is caught alongside + * NonFatal because the connect may perform the JVM's first JNI/JNA load. + */ + def liveServerDetected(location: String, useJni: Boolean): Boolean = + val answered = new AtomicBoolean(false) + val done = new CountDownLatch(1) + val t = new Thread( + () => + try + ClientSocket.localSocket(location, useJni).close() + answered.set(true) + catch case NonFatal(_) | (_: LinkageError) => () + finally done.countDown(), + "sbt-boot-socket-probe" + ) + t.setDaemon(true) + t.start() + done.await(timeoutMillis, TimeUnit.MILLISECONDS) + answered.get() +end BootServerSocketProbe diff --git a/main-command/src/test/scala/sbt/internal/BootServerSocketSpec.scala b/main-command/src/test/scala/sbt/internal/BootServerSocketSpec.scala new file mode 100644 index 000000000..b4a4f1b09 --- /dev/null +++ b/main-command/src/test/scala/sbt/internal/BootServerSocketSpec.scala @@ -0,0 +1,77 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + */ + +package sbt.internal + +import java.nio.file.{ Files, Paths } +import sbt.internal.util.Util +import verify.BasicTestSuite + +object BootServerSocketSpec extends BasicTestSuite: + + // the constructor only reads baseDirectory; provider is never touched + private def config(base: java.io.File): xsbti.AppConfiguration = + new xsbti.AppConfiguration { + override def arguments(): Array[String] = Array.empty + override def baseDirectory(): java.io.File = base + override def provider(): xsbti.AppProvider = null + } + + private def useJni: Boolean = + BootServerSocket.requiresJNI() || sys.props.getOrElse("sbt.ipcsocket.jni", "false") == "true" + + private def probe(location: String): Boolean = + BootServerSocketProbe.liveServerDetected(location, useJni) + + private def freshBase(prefix: String): (java.io.File, Long) = + val base = Files.createTempDirectory(prefix).toRealPath().toFile + (base, base.getAbsolutePath.hashCode.toLong ^ System.nanoTime()) + + test("a live boot server is detected by the probe") { + val (base, token) = freshBase("boot-socket-live") + val location = BootServerSocket.socketLocation(base.toPath, token) + val server = new BootServerSocket(config(base), token) + val live = + try probe(location) + finally server.close() + assert(live) + } + + test("the probe reports no live server when nothing is listening") { + val (base, token) = freshBase("boot-socket-none") + val location = BootServerSocket.socketLocation(base.toPath, token) + val live = probe(location) + assert(!live) + } + + test("after close, the probe reports no live server") { + val (base, token) = freshBase("boot-socket-closed") + val location = BootServerSocket.socketLocation(base.toPath, token) + val server = new BootServerSocket(config(base), token) + server.close() + val live = probe(location) + assert(!live) + } + + test("a stale socket file is not a live server and does not block a new socket") { + if (!Util.isWindows) { + val (base, token) = freshBase("boot-socket-stale") + val location = Paths.get(BootServerSocket.socketLocation(base.toPath, token)) + Files.createDirectories(location.getParent) + Files.createFile(location) // leftover from a killed process + val staleLooksLive = probe(location.toString) + assert(!staleLooksLive) + val server = new BootServerSocket(config(base), token) // reclaims the path + val liveAfterReclaim = + try probe(location.toString) + finally server.close() + assert(liveAfterReclaim) + } + } + +end BootServerSocketSpec diff --git a/main/src/main/scala/sbt/Main.scala b/main/src/main/scala/sbt/Main.scala index 9671ce5a3..c3b09487a 100644 --- a/main/src/main/scala/sbt/Main.scala +++ b/main/src/main/scala/sbt/Main.scala @@ -152,29 +152,38 @@ private[sbt] object xMain: private def getSocketOrExit( configuration: xsbti.AppConfiguration ): (Option[BootServerSocket], Option[Exit]) = { - def printThrowable(e: Throwable): Unit = { - println("sbt thinks that server is already booting because of this exception:") - e.printStackTrace() - } - - val target = - configuration.baseDirectory().toPath().toRealPath().resolve("project").resolve("target") - val hash = HashUtil.farmHash(target.toString().getBytes("UTF-8")); + val base = configuration.baseDirectory().toPath().toRealPath() + val target = base.resolve("project").resolve("target") + val hash = HashUtil.farmHash(target.toString().getBytes("UTF-8")) + // sbt runs fine without a boot socket (see the UnsatisfiedLinkError case below), so a + // creation failure only means "another sbt is booting" if something answers on it. + def liveServerDetected: Boolean = + BootServerSocketProbe.liveServerDetected( + BootServerSocket.socketLocation(base, hash), + BootServerSocket.requiresJNI() || SysProp.serverUseJni, + ) try Some(new BootServerSocket(configuration, hash)) -> None catch { - case e: ServerAlreadyBootingException if hasConsole && !ITerminal.startedByRemoteClient => - printThrowable(e) + // No live server and nothing the user can do about a socket failure, so proceed + // silently without the boot socket, as for UnsatisfiedLinkError below. + case _: ServerAlreadyBootingException if !liveServerDetected => (None, None) + case _: ServerAlreadyBootingException if hasConsole && !ITerminal.startedByRemoteClient => + println("another sbt appears to be booting in this build.") println("Create a new server? y/n (default y)") val exit = if (ITerminal.get.withRawInput(System.in.read) == 'n'.toInt) Some(Exit(1)) else None (None, exit) - case e: ServerAlreadyBootingException => + case _: ServerAlreadyBootingException => if (SysProp.forceServerStart) (None, None) else { - printThrowable(e) + println("another sbt appears to be booting in this build; exiting.") + println( + "wait for it to finish, attach to it with --client, or pass -Dsbt.server.forcestart=true to start anyway." + ) (None, Some(Exit(2))) } + case _: IOException => (None, None) case _: UnsatisfiedLinkError => (None, None) } } diff --git a/notes/2.0.0/boot-socket-liveness.md b/notes/2.0.0/boot-socket-liveness.md new file mode 100644 index 000000000..0816afc65 --- /dev/null +++ b/notes/2.0.0/boot-socket-liveness.md @@ -0,0 +1,16 @@ +### sbt no longer refuses to start when the boot socket cannot be created + +Failures to create the boot-time io socket used to be misreported: most were +wrapped as "sbt thinks that server is already booting" with a stack trace, and +non-interactive invocations exited with code 2, while failures to create the +socket directory crashed startup outright. Permission or path-length problems +with `XDG_RUNTIME_DIR` or the temp directory and Windows named-pipe access +errors all hit one of these ([#6777][6777]). + +sbt now probes the socket first. Only a live server answering the probe is +treated as another sbt booting in the build (the interactive prompt and +non-interactive exit are unchanged, with a clearer message). Any other failure +is no longer fatal: sbt continues without the boot socket, whose only job is +forwarding boot-time io to early-connecting clients. + + [6777]: https://github.com/sbt/sbt/issues/6777