[2.x] Reimplement FarmHash

**Problem**
sbtn and server uses FarmHash.

**Solution**
This reimplements FarmHash using Scala.
This commit is contained in:
Eugene Yokota 2026-05-31 05:06:28 -04:00
parent 82e5b28c9c
commit f1c914f4ae
13 changed files with 312 additions and 17 deletions

View File

@ -298,6 +298,7 @@ lazy val utilControl = (project in file("internal") / "util-control")
scalacheck % Test,
scalaVerify % Test,
hedgehog % Test,
zeroAllocationHashing % Test,
),
mimaSettings,
)
@ -636,6 +637,15 @@ lazy val commandProj = (project in file("main-command"))
contrabandSettings,
mimaSettings,
mimaBinaryIssueFilters ++= Vector(
exclude[MissingClassProblem]("sbt.internal.util.JoinThread"),
exclude[MissingClassProblem]("sbt.internal.util.JoinThread$"),
exclude[MissingClassProblem]("sbt.internal.util.ReadJsonFromInputStream"),
exclude[MissingClassProblem]("sbt.internal.util.ReadJsonFromInputStream$"),
exclude[MissingClassProblem]("sbt.internal.client.ServerConnection"),
exclude[IncompatibleResultTypeProblem]("sbt.internal.client.NetworkClient.connection"),
exclude[IncompatibleResultTypeProblem]("sbt.internal.client.NetworkClient.init"),
exclude[DirectMissingMethodProblem]("sbt.internal.BootServerSocket.this"),
exclude[DirectMissingMethodProblem]("sbt.internal.BootServerSocket.socketLocation"),
),
Compile / headerCreate / unmanagedSources := {
val old = (Compile / headerCreate / unmanagedSources).value

View File

@ -39,6 +39,12 @@ class FarmHashHashBenchmark extends AbstractHashBenchmark:
val hash = LongHashFunction.farmNa().hashBytes(buf)
java.lang.Long.toHexString(hash)
class FarmHash64VarHandleHashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
val h = Hashing.farmNaHash64
val hash = h.hash(buf, 0, buf.size)
java.lang.Long.toHexString(hash)
class MurmurHash32HashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
val lo = MurmurHash3.bytesHash(buf, 0x85ebca6b)

View File

@ -0,0 +1,209 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.lang.Long.rotateRight
import java.nio.ByteBuffer
import FarmHashConstants.*
object FarmHash64:
private inline def shiftMix(x: Long): Long =
x ^ (x >>> 47)
private inline def hashLen16(u: Long, v: Long): Long =
hashLen16(u, v, K_MUL)
private inline def hashLen16(u: Long, v: Long, m: Long): Long =
val a = shiftMix((u ^ v) * m)
shiftMix((v ^ a) * m) * m
private inline def mul(len: Long): Long =
K2 + (len << 1)
private def hash1To3Bytes(len: Int, firstByte: Int, midOrLastByte: Int, lastByte: Int): Long =
val y = firstByte + (midOrLastByte << 8)
val z = len + (lastByte << 2)
shiftMix((y.toLong * K2) ^ (z.toLong * K0)) * K2
private def hash4To7Bytes(len: Long, first4Bytes: Long, last4Bytes: Long): Long =
val m = mul(len)
hashLen16(len + (first4Bytes << 3), last4Bytes, m)
private def hash8To16Bytes(len: Long, first8Bytes: Long, last8Bytes: Long): Long =
val m = mul(len)
val a = first8Bytes + K2
val c = rotateRight(last8Bytes, 37) * m + a
val d = (rotateRight(a, 25) + last8Bytes) * m
hashLen16(c, d, m)
private def hashLen0To16[A1](in: A1, offset: Long, len: Long)(access: Access[A1]): Long =
val off = offset.toInt
if len >= 8L then
val a = access.readLongLE(in, off)
val b = access.readLongLE(in, (off + len - 8L).toInt)
hash8To16Bytes(len, a, b)
else if len >= 4L then
val a = access.readIntLE(in, off) & 0xffffffffL
val b = access.readIntLE(in, (off + len - 4L).toInt) & 0xffffffffL
hash4To7Bytes(len, a, b)
else if len > 0L then
val a = access.readByte(in, off)
val b = access.readByte(in, (off + (len >> 1)).toInt)
val c = access.readByte(in, (off + len - 1).toInt)
hash1To3Bytes(len.toInt, a, b, c)
else K2
private def hashLen17To32[A1](in: A1, offset: Long, len: Long)(access: Access[A1]): Long =
val off = offset.toInt
val m = mul(len)
val a = access.readLongLE(in, off) * K1
val b = access.readLongLE(in, off + 8)
val c = access.readLongLE(in, (off + len - 8L).toInt) * m
val d = access.readLongLE(in, (off + len - 16L).toInt) * K2
hashLen16(rotateRight(a + b, 43) + rotateRight(c, 30) + d, a + rotateRight(b + K2, 18) + c, m)
private def naHashLen33To64[A1](in: A1, offset: Long, len: Long)(access: Access[A1]): Long =
val off = offset.toInt
val m = mul(len)
val a = access.readLongLE(in, off) * K2
val b = access.readLongLE(in, off + 8)
val c = access.readLongLE(in, (off + len - 8).toInt) * m
val d = access.readLongLE(in, (off + len - 16).toInt) * K2
val y = rotateRight(a + b, 43) + rotateRight(c, 30) + d
val z = hashLen16(y, a + rotateRight(b + K2, 18) + c, m)
val e = access.readLongLE(in, off + 16) * m
val f = access.readLongLE(in, off + 24)
val g = (y + access.readLongLE(in, (off + len - 32).toInt)) * m
val h = (z + access.readLongLE(in, (off + len - 24).toInt)) * m
hashLen16(rotateRight(e + f, 43) + rotateRight(g, 30) + h, e + rotateRight(f + a, 18) + g, m)
def naHash64[A1](in: A1, offset: Long, len: Long)(access: Access[A1]): Long =
val seed: Long = 81L
if len <= 32 then
if len <= 16 then hashLen0To16(in, offset, len)(access)
else hashLen17To32(in, offset, len)(access)
else if len <= 64 then naHashLen33To64(in, offset, len)(access)
else
var off = offset.toInt
// For strings over 64 bytes we loop. Internal state consists of
// 56 bytes: v, w, x, y, and z.
var x: Long = seed
// == seed * k1 + 113 This overflows uint64 and is a compile error,
// so we expand the constant by hand
var y: Long = seed * K1 + 113
var z: Long = shiftMix(y * K2 + 113) * K2
var v1: Long = 0L
var v2: Long = 0L
var w1: Long = 0L
var w2: Long = 0L
x = x * K2 + access.readLongLE(in, off)
// Set end so that after the loop we have 1 to 64 bytes left to process.
val fin = off + ((len - 1) >> 6) * 64
val last64 = fin + ((len - 1) & 63) - 63
while
x = rotateRight(x + y + v1 + access.readLongLE(in, (off + 8).toInt), 37) * K1
y = rotateRight(y + v2 + access.readLongLE(in, (off + 48).toInt), 42) * K1
x ^= w2
y += v1 + access.readLongLE(in, off + 40)
z = rotateRight(z + w1, 33) * K1
var a: Long = v2 * K1
var b: Long = x + w1
val z1 = access.readLongLE(in, off + 24)
a += access.readLongLE(in, off)
b = rotateRight(b + a + z1, 21)
val c = a
a += access.readLongLE(in, off + 8)
a += access.readLongLE(in, off + 16)
b += rotateRight(a, 44)
v1 = a + z1
v2 = b + c
var a1 = z + w2
var b1 = y + access.readLongLE(in, off + 16)
var z2 = access.readLongLE(in, off + 32 + 24)
a1 += access.readLongLE(in, off + 32)
b1 = rotateRight(b1 + a1 + z2, 21)
val c1 = a1
a1 += access.readLongLE(in, off + 32 + 8)
a1 += access.readLongLE(in, off + 32 + 16)
b1 += rotateRight(a1, 44)
w1 = a1 + z2
w2 = b1 + c1
val t = z
z = x
x = t
off += 64
off != fin
do ()
off = last64.toInt
val m = K1 + ((z & 0xff) << 1)
// Make s point to the last 64 bytes of input.
w1 += (len - 1) & 63
v1 += w1
w1 += v1
x = rotateRight(x + y + v1 + access.readLongLE(in, off + 8), 37) * m
y = rotateRight(y + v2 + access.readLongLE(in, off + 48), 42) * m
x ^= w2 * 9
y += v1 * 9 + access.readLongLE(in, off + 40)
z = rotateRight(z + w1, 33) * m
var a: Long = v2 * m
var b: Long = x + w1
val z1 = access.readLongLE(in, off + 24)
a += access.readLongLE(in, off)
b = rotateRight(b + a + z1, 21)
val c = a
a += access.readLongLE(in, off + 8)
a += access.readLongLE(in, off + 16)
b += rotateRight(a, 44)
v1 = a + z1
v2 = b + c
var a1: Long = z + w2
var b1: Long = y + access.readLongLE(in, off + 16)
val z2 = access.readLongLE(in, off + 32 + 24)
a1 += access.readLongLE(in, off + 32)
b1 = rotateRight(b1 + a1 + z2, 21)
val c1 = a1
a1 += access.readLongLE(in, off + 32 + 8)
a1 += access.readLongLE(in, off + 32 + 16)
b1 += rotateRight(a1, 44)
w1 = a1 + z2
w2 = b1 + c1
val t = z
z = x
x = t
hashLen16(hashLen16(v1, w1, m) + shiftMix(y) * K0 + z, hashLen16(v2, w2, m) + x, m)
end FarmHash64
object FarmNaSeedlessHash64:
private lazy val arrayInstance: FarmNaSeedlessHash64[Array[Byte]] =
new FarmNaSeedlessHash64()
private lazy val byteBufferInstance: FarmNaSeedlessHash64[ByteBuffer] =
new FarmNaSeedlessHash64()
def byteArray: FarmNaSeedlessHash64[Array[Byte]] =
arrayInstance
def byteBuffer: FarmNaSeedlessHash64[ByteBuffer] =
byteBufferInstance
end FarmNaSeedlessHash64
class FarmNaSeedlessHash64[A1: Access] extends HashAlgo:
import FarmHash64.*
private val access: Access[A1] = summon[Access[A1]]
override def hash(buf: A1, offset: Int, len: Int): Long =
val hash = naHash64(buf, offset, len)(access)
hash
end FarmNaSeedlessHash64

View File

@ -0,0 +1,17 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object FarmHashConstants:
final val K0 = 0xc3a5c85c97cb3127L
final val K1 = 0xb492b66fbe98f273L
final val K2 = 0x9ae16a3b2f90404fL
final val K_MUL = 0x9ddfea08eb382d69L
end FarmHashConstants

View File

@ -10,7 +10,7 @@
package sbt.internal.util.hashing
import java.io.File
import java.nio.file.{ Path as NioPath }
import java.nio.file.Path as NioPath
trait FileHash:
def hash(file: File): Long

View File

@ -11,7 +11,7 @@ package sbt.internal.util.hashing
import java.io.{ File, RandomAccessFile }
import java.nio.ByteBuffer
import java.nio.file.{ Path as NioPath }
import java.nio.file.Path as NioPath
import scala.util.Using
object FileSampleHash:

View File

@ -12,6 +12,9 @@ package sbt.internal.util.hashing
import java.nio.ByteBuffer
object Hashing:
def farmNaHash64: HashAlgo[Array[Byte]] =
FarmNaSeedlessHash64.byteArray
def xxhash64(seed: Long): HashAlgo[Array[Byte]] =
XXHash64.byteArray(seed)

View File

@ -0,0 +1,44 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import verify.BasicTestSuite
import java.util.concurrent.ThreadLocalRandom
import net.openhft.hashing.LongHashFunction
object FarmHashTest extends BasicTestSuite:
lazy val reference = LongHashFunction.farmNa()
def hash64: HashAlgo[Array[Byte]] =
Hashing.farmNaHash64
def emptyHash: Long = -7286425919675154353L
def zeroHash: Long = -4728684028706075820L
test("Hash empty array"):
val buf: Array[Byte] = new Array[Byte](0)
val r = hash64.hash(buf, 0, 0)
assert(r == emptyHash)
val r2 = reference.hashBytes(buf)
assert(r == r2)
test("Hash one byte array"):
val buf: Array[Byte] = Array[Byte](0)
val r = hash64.hash(buf, 0, 1)
assert(r == zeroHash)
val r2 = reference.hashBytes(buf)
assert(r == r2)
test("Hash 2048 bytes"):
val buf: Array[Byte] = new Array[Byte](2048)
ThreadLocalRandom.current().nextBytes(buf)
val r = hash64.hash(buf, 0, 2048)
val r2 = reference.hashBytes(buf)
assert(r == r2)
end FarmHashTest

View File

@ -281,18 +281,18 @@ public class BootServerSocket implements AutoCloseable {
}
};
public BootServerSocket(final AppConfiguration configuration)
public BootServerSocket(final AppConfiguration configuration, final long farmHash)
throws ServerAlreadyBootingException, IOException {
final Path base = configuration.baseDirectory().toPath().toRealPath();
if (!isWindows) {
final String actualSocketLocation = socketLocation(base);
final String actualSocketLocation = socketLocation(base, farmHash);
final Path target = Paths.get(actualSocketLocation).getParent();
if (!Files.isDirectory(target)) Files.createDirectories(target);
socketFile = Paths.get(actualSocketLocation);
} else {
socketFile = null;
}
serverSocket = newSocket(socketLocation(base));
serverSocket = newSocket(socketLocation(base, farmHash));
if (serverSocket != null) {
running.set(true);
acceptFuture = service.submit(acceptRunnable);
@ -302,20 +302,17 @@ public class BootServerSocket implements AutoCloseable {
}
}
public static String socketLocation(final Path base)
public static String socketLocation(final Path base, final long farmHash)
throws UnsupportedEncodingException, IOException {
final Path target = base.resolve("project").resolve("target");
long hash =
((long) target.toString().hashCode() << 32)
| (target.toString().length() * 31 & 0xffffffffL);
if (isWindows) {
return "sbt-load" + hash;
return "sbt-load" + farmHash;
} else {
final String alternativeSocketLocation =
System.getenv().getOrDefault("XDG_RUNTIME_DIR", System.getProperty("java.io.tmpdir"));
final Path alternativeSocketLocationRoot =
Paths.get(alternativeSocketLocation).resolve(".sbt");
final Path locationForSocket = alternativeSocketLocationRoot.resolve("sbt-socket" + hash);
final Path locationForSocket = alternativeSocketLocationRoot.resolve("sbt-socket" + farmHash);
final Path pathForSocket = locationForSocket.resolve("sbt-load.sock");
return pathForSocket.toString();
}

View File

@ -34,7 +34,7 @@ import sbt.internal.util.{
import sbt.io.IO
import sbt.io.syntax.*
import sbt.protocol.*
import sbt.util.{ Level, Logger }
import sbt.util.{ HashUtil, Level, Logger }
import sjsonnew.BasicJsonProtocol.*
import sjsonnew.shaded.scalajson.ast.unsafe.{ JObject, JValue }
import sjsonnew.support.scalajson.unsafe.Converter
@ -341,8 +341,10 @@ class NetworkClient(
* This instance must be shutdown explicitly via `sbt -client shutdown`
*/
def waitForServer(portfile: File, log: Boolean, startServer: Boolean): Unit = {
val bootSocketName =
BootServerSocket.socketLocation(arguments.baseDirectory.toPath.toRealPath())
val base = arguments.baseDirectory.toPath.toRealPath()
val target = base.resolve("project").resolve("target")
val hash = HashUtil.farmHash(target.toString().getBytes("UTF-8"))
val bootSocketName = BootServerSocket.socketLocation(base, hash)
/*
* For unknown reasons, linux sometimes struggles to connect to the socket in some

View File

@ -32,7 +32,7 @@ import sbt.internal.util.complete.Parser
import sbt.internal.util.{ RunningProcesses, Terminal as ITerminal, * }
import sbt.io.*
import sbt.io.syntax.*
import sbt.util.{ ActionCache, Level, Logger, Show }
import sbt.util.{ ActionCache, HashUtil, Level, Logger, Show }
import xsbti.AppProvider
import scala.annotation.{ nowarn, tailrec }
@ -157,7 +157,10 @@ private[sbt] object xMain:
e.printStackTrace()
}
try Some(new BootServerSocket(configuration)) -> None
val target =
configuration.baseDirectory().toPath().toRealPath().resolve("project").resolve("target")
val hash = HashUtil.farmHash(target.toString().getBytes("UTF-8"));
try Some(new BootServerSocket(configuration, hash)) -> None
catch {
case e: ServerAlreadyBootingException if hasConsole && !ITerminal.startedByRemoteClient =>
printThrowable(e)

View File

@ -113,6 +113,7 @@ object Dependencies {
val caffeine = "com.github.ben-manes.caffeine" % "caffeine" % "2.8.5"
val blake3 = "pt.kcry" %% "blake3" % "3.1.2"
val zeroAllocationHashing = "net.openhft" % "zero-allocation-hashing" % "0.16"
val hedgehog = "qa.hedgehog" %% "hedgehog-sbt" % "0.13.0"
val disruptor = "com.lmax" % "disruptor" % "3.4.2"

View File

@ -1,9 +1,12 @@
package sbt.util
import java.nio.file.{ Path as NioPath }
import java.nio.file.Path as NioPath
import sbt.internal.util.hashing.Hashing
object HashUtil:
private[sbt] def farmHash(bytes: Array[Byte]): Long =
Hashing.farmNaHash64.hash(bytes, 0, bytes.size)
private[sbt] def xxhash64(bytes: Array[Byte]): Long =
Hashing.xxhash64(0L).hash(bytes, 0, bytes.size)