[2.x] fix: Probe for a live server before refusing to start (#9337)

Any IOException while creating the boot io socket was wrapped in
ServerAlreadyBootingException and reported as "sbt thinks that server
is already booting" with a stack trace, and non-interactive
invocations exited with code 2. Permission or path-length problems
with XDG_RUNTIME_DIR or the temp directory and Windows named-pipe
access errors all hit this, blocking sbt entirely (#6777). Raw
IOExceptions from the constructor (socket directory creation) were
not caught at all and crashed startup.

getSocketOrExit now connects to the socket (BootServerSocketProbe,
shared with the test suite) to check for a live server before
believing the exception.

Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
BrianHotopp 2026-07-01 17:02:50 -04:00 committed by GitHub
parent c8312cdd88
commit bcd7fe1fbc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 161 additions and 12 deletions

View File

@ -123,12 +123,17 @@ public class BootServerSocket implements AutoCloseable {
bytes.put(b);
clientSocketReads.put(ClientSocket.this);
} else {
// close() deregisters from clientSockets like the write
// methods do; a dead entry left behind would block the
// NO_BOOT_CLIENTS_CONNECTED signal in inputStream.read.
alive.set(false);
close();
}
}
} catch (IOException e) {
alive.set(false);
close();
}
}
} catch (final Exception ex) {

View File

@ -0,0 +1,42 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*/
package sbt.internal
import java.util.concurrent.{ CountDownLatch, TimeUnit }
import java.util.concurrent.atomic.AtomicBoolean
import sbt.protocol.ClientSocket
import scala.util.control.NonFatal
private[sbt] object BootServerSocketProbe:
private val timeoutMillis = 2000L
/**
* True only if something answers on the boot socket at `location`. A live server answers
* immediately, so the connect runs on a daemon thread bounded by [[timeoutMillis]]: the
* underlying native connect has no timeout and blocks indefinitely against a bound socket whose
* listen backlog is saturated, which must never hang startup. LinkageError is caught alongside
* NonFatal because the connect may perform the JVM's first JNI/JNA load.
*/
def liveServerDetected(location: String, useJni: Boolean): Boolean =
val answered = new AtomicBoolean(false)
val done = new CountDownLatch(1)
val t = new Thread(
() =>
try
ClientSocket.localSocket(location, useJni).close()
answered.set(true)
catch case NonFatal(_) | (_: LinkageError) => ()
finally done.countDown(),
"sbt-boot-socket-probe"
)
t.setDaemon(true)
t.start()
done.await(timeoutMillis, TimeUnit.MILLISECONDS)
answered.get()
end BootServerSocketProbe

View File

@ -0,0 +1,77 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*/
package sbt.internal
import java.nio.file.{ Files, Paths }
import sbt.internal.util.Util
import verify.BasicTestSuite
object BootServerSocketSpec extends BasicTestSuite:
// the constructor only reads baseDirectory; provider is never touched
private def config(base: java.io.File): xsbti.AppConfiguration =
new xsbti.AppConfiguration {
override def arguments(): Array[String] = Array.empty
override def baseDirectory(): java.io.File = base
override def provider(): xsbti.AppProvider = null
}
private def useJni: Boolean =
BootServerSocket.requiresJNI() || sys.props.getOrElse("sbt.ipcsocket.jni", "false") == "true"
private def probe(location: String): Boolean =
BootServerSocketProbe.liveServerDetected(location, useJni)
private def freshBase(prefix: String): (java.io.File, Long) =
val base = Files.createTempDirectory(prefix).toRealPath().toFile
(base, base.getAbsolutePath.hashCode.toLong ^ System.nanoTime())
test("a live boot server is detected by the probe") {
val (base, token) = freshBase("boot-socket-live")
val location = BootServerSocket.socketLocation(base.toPath, token)
val server = new BootServerSocket(config(base), token)
val live =
try probe(location)
finally server.close()
assert(live)
}
test("the probe reports no live server when nothing is listening") {
val (base, token) = freshBase("boot-socket-none")
val location = BootServerSocket.socketLocation(base.toPath, token)
val live = probe(location)
assert(!live)
}
test("after close, the probe reports no live server") {
val (base, token) = freshBase("boot-socket-closed")
val location = BootServerSocket.socketLocation(base.toPath, token)
val server = new BootServerSocket(config(base), token)
server.close()
val live = probe(location)
assert(!live)
}
test("a stale socket file is not a live server and does not block a new socket") {
if (!Util.isWindows) {
val (base, token) = freshBase("boot-socket-stale")
val location = Paths.get(BootServerSocket.socketLocation(base.toPath, token))
Files.createDirectories(location.getParent)
Files.createFile(location) // leftover from a killed process
val staleLooksLive = probe(location.toString)
assert(!staleLooksLive)
val server = new BootServerSocket(config(base), token) // reclaims the path
val liveAfterReclaim =
try probe(location.toString)
finally server.close()
assert(liveAfterReclaim)
}
}
end BootServerSocketSpec

View File

@ -152,29 +152,38 @@ private[sbt] object xMain:
private def getSocketOrExit(
configuration: xsbti.AppConfiguration
): (Option[BootServerSocket], Option[Exit]) = {
def printThrowable(e: Throwable): Unit = {
println("sbt thinks that server is already booting because of this exception:")
e.printStackTrace()
}
val target =
configuration.baseDirectory().toPath().toRealPath().resolve("project").resolve("target")
val hash = HashUtil.farmHash(target.toString().getBytes("UTF-8"));
val base = configuration.baseDirectory().toPath().toRealPath()
val target = base.resolve("project").resolve("target")
val hash = HashUtil.farmHash(target.toString().getBytes("UTF-8"))
// sbt runs fine without a boot socket (see the UnsatisfiedLinkError case below), so a
// creation failure only means "another sbt is booting" if something answers on it.
def liveServerDetected: Boolean =
BootServerSocketProbe.liveServerDetected(
BootServerSocket.socketLocation(base, hash),
BootServerSocket.requiresJNI() || SysProp.serverUseJni,
)
try Some(new BootServerSocket(configuration, hash)) -> None
catch {
case e: ServerAlreadyBootingException if hasConsole && !ITerminal.startedByRemoteClient =>
printThrowable(e)
// No live server and nothing the user can do about a socket failure, so proceed
// silently without the boot socket, as for UnsatisfiedLinkError below.
case _: ServerAlreadyBootingException if !liveServerDetected => (None, None)
case _: ServerAlreadyBootingException if hasConsole && !ITerminal.startedByRemoteClient =>
println("another sbt appears to be booting in this build.")
println("Create a new server? y/n (default y)")
val exit =
if (ITerminal.get.withRawInput(System.in.read) == 'n'.toInt) Some(Exit(1))
else None
(None, exit)
case e: ServerAlreadyBootingException =>
case _: ServerAlreadyBootingException =>
if (SysProp.forceServerStart) (None, None)
else {
printThrowable(e)
println("another sbt appears to be booting in this build; exiting.")
println(
"wait for it to finish, attach to it with --client, or pass -Dsbt.server.forcestart=true to start anyway."
)
(None, Some(Exit(2)))
}
case _: IOException => (None, None)
case _: UnsatisfiedLinkError => (None, None)
}
}

View File

@ -0,0 +1,16 @@
### sbt no longer refuses to start when the boot socket cannot be created
Failures to create the boot-time io socket used to be misreported: most were
wrapped as "sbt thinks that server is already booting" with a stack trace, and
non-interactive invocations exited with code 2, while failures to create the
socket directory crashed startup outright. Permission or path-length problems
with `XDG_RUNTIME_DIR` or the temp directory and Windows named-pipe access
errors all hit one of these ([#6777][6777]).
sbt now probes the socket first. Only a live server answering the probe is
treated as another sbt booting in the build (the interactive prompt and
non-interactive exit are unchanged, with a clearer message). Any other failure
is no longer fatal: sbt continues without the boot socket, whose only job is
forwarding boot-time io to early-connecting clients.
[6777]: https://github.com/sbt/sbt/issues/6777