From e8a358373ce467b0403dea1c8951c25779189285 Mon Sep 17 00:00:00 2001 From: Eugene Yokota Date: Sun, 31 May 2026 02:11:25 -0400 Subject: [PATCH 1/2] Refactor VarHandle to Access typeclass --- .../sbt/internal/util/HashBenchmark.scala | 8 +- .../{VarHandleUtils.scala => Access.scala} | 41 ++-- .../sbt/internal/util/hashing/HashAlgo.scala | 36 +-- .../sbt/internal/util/hashing/Hashing.scala | 18 +- .../hashing/StreamingWyHash64VarHandle.scala | 26 +- .../hashing/StreamingXXHash64VarHandle.scala | 22 +- .../sbt/internal/util/hashing/WyHash64.scala | 168 +++++++++++++ .../util/hashing/WyHash64VarHandle.scala | 162 ------------- .../sbt/internal/util/hashing/XXHash64.scala | 132 +++++++++++ .../util/hashing/XXHash64VarHandle.scala | 222 ------------------ .../util/AbstractByteBufferHashTest.scala | 31 +++ .../sbt/internal/util/AbstractHashTest.scala | 25 +- .../scala/sbt/internal/util/WyHashTest.scala | 15 +- .../scala/sbt/internal/util/XXHashTest.scala | 15 +- .../src/main/scala/sbt/util/HashUtil.scala | 2 +- 15 files changed, 435 insertions(+), 488 deletions(-) rename internal/util-control/src/main/scala/sbt/internal/util/hashing/{VarHandleUtils.scala => Access.scala} (50%) create mode 100644 internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64.scala delete mode 100644 internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64VarHandle.scala create mode 100644 internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64.scala delete mode 100644 internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64VarHandle.scala create mode 100644 internal/util-control/src/test/scala/sbt/internal/util/AbstractByteBufferHashTest.scala diff --git a/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala b/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala index 5f19269f2..165862816 100644 --- a/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala +++ b/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala @@ -24,14 +24,14 @@ end AbstractHashBenchmark class XXHash64HashBenchmark extends AbstractHashBenchmark: override def hash(buf: Array[Byte]): String = - val h = Hashing.xxhash64 - val hash = h.hash(buf, 0, buf.size, 0) + val h = Hashing.xxhash64(0L) + val hash = h.hash(buf, 0, buf.size) java.lang.Long.toHexString(hash) class WyHash64HashBenchmark extends AbstractHashBenchmark: override def hash(buf: Array[Byte]): String = - val h = Hashing.wyhash64 - val hash = h.hash(buf, 0, buf.size, 0) + val h = Hashing.wyhash64(0L) + val hash = h.hash(buf, 0, buf.size) java.lang.Long.toHexString(hash) class FarmHashHashBenchmark extends AbstractHashBenchmark: diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/VarHandleUtils.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/Access.scala similarity index 50% rename from internal/util-control/src/main/scala/sbt/internal/util/hashing/VarHandleUtils.scala rename to internal/util-control/src/main/scala/sbt/internal/util/hashing/Access.scala index bcdbb5847..0eda97575 100644 --- a/internal/util-control/src/main/scala/sbt/internal/util/hashing/VarHandleUtils.scala +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/Access.scala @@ -12,7 +12,13 @@ package sbt.internal.util.hashing import java.lang.invoke.{ MethodHandles, VarHandle } import java.nio.{ ByteBuffer, ByteOrder } -object VarHandleUtils: +sealed trait Access[A1]: + def readByte(a: A1, off: Int): Byte + def readIntLE(a: A1, off: Int): Int + def readLongLE(a: A1, off: Int): Long +end Access + +object Access: private def getArrayClass(c: Class[?]): Class[?] = java.lang.reflect.Array.newInstance(c, 0).getClass private val LONG_HANDLE: VarHandle = @@ -24,18 +30,21 @@ object VarHandleUtils: private val BB_INT_HANDLE: VarHandle = MethodHandles.byteBufferViewVarHandle(getArrayClass(classOf[Int]), ByteOrder.LITTLE_ENDIAN) - inline def readByte(buf: Array[Byte], off: Int): Byte = - buf(off) - inline def readIntLE(buf: Array[Byte], off: Int): Int = - INT_HANDLE.get(buf, off).asInstanceOf[Int] - inline def readLongLE(buf: Array[Byte], off: Int): Long = - LONG_HANDLE.get(buf, off).asInstanceOf[Long] - inline def readByte(buf: ByteBuffer, i: Int): Byte = - buf.get(i) - inline def readIntLE(buf: ByteBuffer, i: Int): Int = - assert(buf.order() == ByteOrder.LITTLE_ENDIAN) - BB_INT_HANDLE.get(buf, i).asInstanceOf[Int] - inline def readLongLE(buf: ByteBuffer, i: Int): Long = - assert(buf.order() == ByteOrder.LITTLE_ENDIAN) - BB_LONG_HANDLE.get(buf, i).asInstanceOf[Long] -end VarHandleUtils + given Access[Array[Byte]]: + inline def readByte(buf: Array[Byte], off: Int): Byte = + buf(off) + inline def readIntLE(buf: Array[Byte], off: Int): Int = + INT_HANDLE.get(buf, off).asInstanceOf[Int] + inline def readLongLE(buf: Array[Byte], off: Int): Long = + LONG_HANDLE.get(buf, off).asInstanceOf[Long] + + given Access[ByteBuffer]: + inline def readByte(buf: ByteBuffer, off: Int): Byte = + buf.get(off) + inline def readIntLE(buf: ByteBuffer, off: Int): Int = + assert(buf.order() == ByteOrder.LITTLE_ENDIAN) + BB_INT_HANDLE.get(buf, off).asInstanceOf[Int] + inline def readLongLE(buf: ByteBuffer, off: Int): Long = + assert(buf.order() == ByteOrder.LITTLE_ENDIAN) + BB_LONG_HANDLE.get(buf, off).asInstanceOf[Long] +end Access diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/HashAlgo.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/HashAlgo.scala index c3fb0c3f4..6db43425e 100644 --- a/internal/util-control/src/main/scala/sbt/internal/util/hashing/HashAlgo.scala +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/HashAlgo.scala @@ -9,12 +9,15 @@ package sbt.internal.util.hashing -import java.nio.ByteBuffer +// import java.nio.ByteBuffer + +import scala.annotation.nowarn /** * Hash algorithm interface */ -trait HashAlgo: +@nowarn +trait HashAlgo[A1: Access]: /** * Computes the 64-bits hash of buf[off:off+len] using the seed. @@ -25,34 +28,7 @@ trait HashAlgo: * @param seed the seed to use * @return the hash value */ - def hash(buf: Array[Byte], off: Int, len: Int, seed: Long): Long - - /** - * Computes the hash of the given slice of the ByteBuffer. - * ByteBuffer#position() position and ByteBuffer#limit() limit - * are not modified. - * - * @param buf the input data - * @param off the start offset in buf - * @param len the number of bytes to hash - * @param seed the seed to use - * @return the hash value - */ - def hash(buf: ByteBuffer, off: Int, len: Int, seed: Long): Long - - /** - * Computes the hash of the given ByteBuffer. The - * ByteBuffer#position() position is moved in order to reflect bytes - * which have been read. - * - * @param buf the input data - * @param seed the seed to use - * @return the hash value - */ - def hash(buf: ByteBuffer, seed: Long): Long = - val r = hash(buf, buf.position(), buf.remaining(), seed) - buf.position(buf.limit()) - r + def hash(buf: A1, off: Int, len: Int): Long override def toString(): String = getClass().getSimpleName() diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala index 781e21948..f7670d3b5 100644 --- a/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala @@ -9,16 +9,30 @@ package sbt.internal.util.hashing +import java.nio.ByteBuffer + object Hashing: - def xxhash64: HashAlgo = XXHash64VarHandle.INSTANCE - def wyhash64: HashAlgo = WyHash64VarHandle.INSTANCE + def xxhash64(seed: Long): HashAlgo[Array[Byte]] = + XXHash64.byteArray(seed) + + def xxhash64ByteBuffer(seed: Long): HashAlgo[ByteBuffer] = + XXHash64.byteBuffer(seed) + + def wyhash64(seed: Long): HashAlgo[Array[Byte]] = + WyHash64.byteArray(seed) + + def wyhash64ByteBuffer(seed: Long): HashAlgo[ByteBuffer] = + WyHash64.byteBuffer(seed) def newStreamingXXHash64(seed: Long): StreamingHashAlgo = new StreamingXXHash64VarHandle(seed) + def newStreamingWyHash64(seed: Long): StreamingHashAlgo = new StreamingWyHash64VarHandle(seed) + def samplingFileHashXXHash64(seed: Long): FileHash = FileSampleHash(newStreamingXXHash64(seed)) + def samplingFileHashWyHash64(seed: Long): FileHash = FileSampleHash(newStreamingWyHash64(seed)) end Hashing diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingWyHash64VarHandle.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingWyHash64VarHandle.scala index 8130c5ce7..8b256cc21 100644 --- a/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingWyHash64VarHandle.scala +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingWyHash64VarHandle.scala @@ -9,9 +9,8 @@ package sbt.internal.util.hashing -import WyHash64VarHandle.* +import WyHash64.* import WyHashConstants.* -import VarHandleUtils.* class StreamingWyHash64VarHandle(seed: Long) extends StreamingHashAlgo(seed): protected var a: Long = 0 @@ -23,6 +22,7 @@ class StreamingWyHash64VarHandle(seed: Long) extends StreamingHashAlgo(seed): protected var totalLen: Long = 0L protected val memory = new Array[Byte](48) protected var memoryLen: Int = 0 + private val access = summon[Access[Array[Byte]]] reset() override def reset(): Unit = @@ -47,10 +47,10 @@ class StreamingWyHash64VarHandle(seed: Long) extends StreamingHashAlgo(seed): if inputLen >= 4 then val end = inputLen - 4 val quarter = (inputLen >> 3) << 2 - _a = (readIntLE(input, 0).toLong << 32) - | (readIntLE(input, quarter) & 0xffffffffL) - _b = (readIntLE(input, end) << 32).toLong - | (readIntLE(input, end - quarter) & 0xffffffffL) + _a = (access.readIntLE(input, 0).toLong << 32) + | (access.readIntLE(input, quarter) & 0xffffffffL) + _b = (access.readIntLE(input, end) << 32).toLong + | (access.readIntLE(input, end - quarter) & 0xffffffffL) else if inputLen > 0 then _a = ((input(0) & 0xffL) << 16) | ((input(inputLen >> 1) & 0xffL) << 8) | (input(inputLen - 1) & 0xffL) @@ -73,11 +73,11 @@ class StreamingWyHash64VarHandle(seed: Long) extends StreamingHashAlgo(seed): var i = 0 while i + 16 < inputLen do - v0 = mix(readLongLE(input, i) ^ PRIME64_1, readLongLE(input, i + 8) ^ v0) + v0 = mix(access.readLongLE(input, i) ^ PRIME64_1, access.readLongLE(input, i + 8) ^ v0) i += 16 - _a = readLongLE(input, inputLen - 16) - _b = readLongLE(input, inputLen - 8) + _a = access.readLongLE(input, inputLen - 16) + _b = access.readLongLE(input, inputLen - 8) end if finishHash(_a, _b, v0, this.totalLen) @@ -113,8 +113,10 @@ class StreamingWyHash64VarHandle(seed: Long) extends StreamingHashAlgo(seed): end update private def round(buf: Array[Byte], p: Int): Unit = - this.v0 = mix(readLongLE(buf, p) ^ PRIME64_1, readLongLE(buf, p + 8) ^ this.v0) - this.v1 = mix(readLongLE(buf, p + 16) ^ PRIME64_2, readLongLE(buf, p + 24) ^ this.v1) - this.v2 = mix(readLongLE(buf, p + 32) ^ PRIME64_3, readLongLE(buf, p + 40) ^ this.v2) + this.v0 = mix(access.readLongLE(buf, p) ^ PRIME64_1, access.readLongLE(buf, p + 8) ^ this.v0) + this.v1 = + mix(access.readLongLE(buf, p + 16) ^ PRIME64_2, access.readLongLE(buf, p + 24) ^ this.v1) + this.v2 = + mix(access.readLongLE(buf, p + 32) ^ PRIME64_3, access.readLongLE(buf, p + 40) ^ this.v2) end StreamingWyHash64VarHandle diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingXXHash64VarHandle.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingXXHash64VarHandle.scala index fe35fe2cb..965dc5898 100644 --- a/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingXXHash64VarHandle.scala +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingXXHash64VarHandle.scala @@ -11,7 +11,6 @@ package sbt.internal.util.hashing import java.lang.Long.rotateLeft import SafeUtils.checkRange -import VarHandleUtils.* import XXHashConstants.* /** @@ -22,6 +21,7 @@ import XXHashConstants.* * Streaming xxhash. */ class StreamingXXHash64VarHandle(seed: Long) extends AbstractStreamingXXHash64Scala(seed): + private val access = summon[Access[Array[Byte]]] override def getValue: Long = var h64: Long = 0L @@ -61,7 +61,7 @@ class StreamingXXHash64VarHandle(seed: Long) extends AbstractStreamingXXHash64Sc var off: Int = 0 while off <= memSize - 8 do - var k1: Long = readLongLE(memory, off) + var k1: Long = access.readLongLE(memory, off) k1 *= PRIME64_2 k1 = rotateLeft(k1, 31) k1 *= PRIME64_1 @@ -70,7 +70,7 @@ class StreamingXXHash64VarHandle(seed: Long) extends AbstractStreamingXXHash64Sc off += 8 if off <= memSize - 4 then - h64 ^= (readIntLE(memory, off) & 0xffffffffL) * PRIME64_1 + h64 ^= (access.readIntLE(memory, off) & 0xffffffffL) * PRIME64_1 h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3 off += 4 else () @@ -104,19 +104,19 @@ class StreamingXXHash64VarHandle(seed: Long) extends AbstractStreamingXXHash64Sc if memSize > 0 then // data left from previous update System.arraycopy(buf, off, memory, memSize, 32 - memSize) - v1 += readLongLE(memory, 0) * PRIME64_2 + v1 += access.readLongLE(memory, 0) * PRIME64_2 v1 = rotateLeft(v1, 31) v1 *= PRIME64_1 - v2 += readLongLE(memory, 8) * PRIME64_2 + v2 += access.readLongLE(memory, 8) * PRIME64_2 v2 = rotateLeft(v2, 31) v2 *= PRIME64_1 - v3 += readLongLE(memory, 16) * PRIME64_2 + v3 += access.readLongLE(memory, 16) * PRIME64_2 v3 = rotateLeft(v3, 31) v3 *= PRIME64_1 - v4 += readLongLE(memory, 24) * PRIME64_2 + v4 += access.readLongLE(memory, 24) * PRIME64_2 v4 = rotateLeft(v4, 31) v4 *= PRIME64_1 @@ -132,22 +132,22 @@ class StreamingXXHash64VarHandle(seed: Long) extends AbstractStreamingXXHash64Sc var v4: Long = this.v4 while off <= limit do - v1 += readLongLE(buf, off) * PRIME64_2 + v1 += access.readLongLE(buf, off) * PRIME64_2 v1 = rotateLeft(v1, 31) v1 *= PRIME64_1 off += 8 - v2 += readLongLE(buf, off) * PRIME64_2 + v2 += access.readLongLE(buf, off) * PRIME64_2 v2 = rotateLeft(v2, 31) v2 *= PRIME64_1 off += 8 - v3 += readLongLE(buf, off) * PRIME64_2 + v3 += access.readLongLE(buf, off) * PRIME64_2 v3 = rotateLeft(v3, 31) v3 *= PRIME64_1 off += 8 - v4 += readLongLE(buf, off) * PRIME64_2 + v4 += access.readLongLE(buf, off) * PRIME64_2 v4 = rotateLeft(v4, 31) v4 *= PRIME64_1 off += 8 diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64.scala new file mode 100644 index 000000000..7d4ddaae4 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64.scala @@ -0,0 +1,168 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.nio.ByteBuffer +import WyHashConstants.* + +object WyHash64: + private lazy val arrayInstance: WyHash64[Array[Byte]] = + new WyHash64(0) + private lazy val byteBufferInstance: WyHash64[ByteBuffer] = + new WyHash64(0) + + def byteArray(seed: Long): WyHash64[Array[Byte]] = + if seed == 0L then arrayInstance + else new WyHash64(seed) + + def byteBuffer(seed: Long): WyHash64[ByteBuffer] = + if seed == 0L then byteBufferInstance + else new WyHash64(seed) + + private[hashing] inline def initSeed(seed: Long): Long = + seed ^ mix(seed ^ PRIME64_0, PRIME64_1) + + private[hashing] def mix(a: Long, b: Long): Long = + val low = a * b + val high = unsignedMultiplyHigh(a, b) + low ^ high + + private[hashing] inline def unsignedMultiplyHigh(a: Long, b: Long): Long = + Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a) + + private[hashing] inline def wyr3[A1: Access](buf: A1, off: Int, k: Int): Long = + val access = summon[Access[A1]] + ((access.readByte(buf, off) & 0xffL) << 16) + | ((access.readByte(buf, off + (k >> 1)) & 0xffL) << 8) + | (access.readByte(buf, off + k - 1) & 0xffL) + + private[hashing] inline def finishHash(a: Long, b: Long, seed: Long, len: Long): Long = + val _a = a ^ PRIME64_1 + val _b = b ^ seed + val low = _a * _b + val high = unsignedMultiplyHigh(_a, _b) + mix(low ^ PRIME64_0 ^ len, high ^ PRIME64_1) + +end WyHash64 + +/** + * Wyhash matching Zig 0.15 std.hash.Wyhash. + */ +class WyHash64[A1: Access](seed: Long) extends HashAlgo[A1]: + import WyHash64.* + + private val access: Access[A1] = summon[Access[A1]] + + override def hash(buf: A1, offset: Int, len: Int): Long = + var off = offset + var s: Long = initSeed(seed) + val secret1 = PRIME64_1 + val secret2 = PRIME64_2 + val secret3 = PRIME64_3 + var a: Long = 0L + var b: Long = 0L + + if len <= 16 then + if len >= 4 then + a = (access.readIntLE(buf, off).toLong << 32) + | (access.readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL) + b = (access.readIntLE(buf, off + len - 4).toLong << 32) + | (access.readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL) + else if len > 0 then + a = wyr3(buf, off, len) + b = 0 + else + a = 0 + b = 0 + else + var i = len + var p = off + var see0 = s + var see1 = s + var see2 = s + + while i > 48 do + see0 = mix(access.readLongLE(buf, p) ^ secret1, access.readLongLE(buf, p + 8) ^ see0) + see1 = mix(access.readLongLE(buf, p + 16) ^ secret2, access.readLongLE(buf, p + 24) ^ see1) + see2 = mix(access.readLongLE(buf, p + 32) ^ secret3, access.readLongLE(buf, p + 40) ^ see2) + p += 48 + i -= 48 + end while + + see0 ^= see1 ^ see2 + while i > 16 do + see0 = mix(access.readLongLE(buf, p) ^ secret1, access.readLongLE(buf, p + 8) ^ see0) + i -= 16 + p += 16 + end while + + a = access.readLongLE(buf, off + len - 16) + b = access.readLongLE(buf, off + len - 8) + s = see0 + end if + finishHash(a, b, s, len) + end hash + + // override def hash(buffer: ByteBuffer, offset: Int, len: Int, seed: Long): Long = + // if buffer.hasArray() then hash(buffer.array(), offset + buffer.arrayOffset(), len, seed) + // else + // var off = offset + // ByteBufferUtils.checkRange(buffer, off, len) + // val buf = ByteBufferUtils.inLittleEndianOrder(buffer) + // var s: Long = initSeed(seed) + // val secret1 = PRIME64_1 + // val secret2 = PRIME64_2 + // val secret3 = PRIME64_3 + // var a: Long = 0L + // var b: Long = 0L + + // if len <= 16 then + // if len >= 4 then + // a = (readIntLE(buf, off).toLong << 32) + // | (readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL) + // b = (readIntLE(buf, off + len - 4).toLong << 32) + // | (readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL) + // else if len > 0 then + // a = wyr3(buf, off, len) + // b = 0 + // else + // a = 0 + // b = 0 + // else + // var i = len + // var p = off + // var see0 = s + // var see1 = s + // var see2 = s + + // while i > 48 do + // see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) + // see1 = mix(readLongLE(buf, p + 16) ^ secret2, readLongLE(buf, p + 24) ^ see1) + // see2 = mix(readLongLE(buf, p + 32) ^ secret3, readLongLE(buf, p + 40) ^ see2) + // p += 48 + // i -= 48 + // end while + + // see0 ^= see1 ^ see2 + // while i > 16 do + // see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) + // i -= 16 + // p += 16 + // end while + + // a = readLongLE(buf, off + len - 16) + // b = readLongLE(buf, off + len - 8) + // s = see0 + // end if + // finishHash(a, b, s, len) + // end if + // end hash + +end WyHash64 diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64VarHandle.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64VarHandle.scala deleted file mode 100644 index 86413f3a8..000000000 --- a/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64VarHandle.scala +++ /dev/null @@ -1,162 +0,0 @@ -/* - * sbt - * Copyright 2023, Scala center - * Copyright 2011 - 2022, Lightbend, Inc. - * Copyright 2008 - 2010, Mark Harrah - * Licensed under Apache License 2.0 (see LICENSE) - * - */ - -package sbt.internal.util.hashing - -import java.nio.ByteBuffer -import WyHashConstants.* -import VarHandleUtils.* - -object WyHash64VarHandle: - private[hashing] val INSTANCE = new WyHash64VarHandle() - - private[hashing] inline def initSeed(seed: Long): Long = - seed ^ mix(seed ^ PRIME64_0, PRIME64_1) - - private[hashing] def mix(a: Long, b: Long): Long = - val low = a * b - val high = unsignedMultiplyHigh(a, b) - low ^ high - - private[hashing] inline def unsignedMultiplyHigh(a: Long, b: Long): Long = - Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a) - - private[hashing] inline def wyr3(buf: Array[Byte], off: Int, k: Int): Long = - ((buf(off) & 0xffL) << 16) - | ((buf(off + (k >> 1)) & 0xffL) << 8) - | (buf(off + k - 1) & 0xffL) - - private[hashing] inline def wyr3(buf: ByteBuffer, off: Int, k: Int): Long = - ((buf.get(off) & 0xffL) << 16) - | ((buf.get(off + (k >> 1)) & 0xffL) << 8) - | (buf.get(off + k - 1) & 0xffL) - - private[hashing] inline def finishHash(a: Long, b: Long, seed: Long, len: Long): Long = - val _a = a ^ PRIME64_1 - val _b = b ^ seed - val low = _a * _b - val high = unsignedMultiplyHigh(_a, _b) - mix(low ^ PRIME64_0 ^ len, high ^ PRIME64_1) - -end WyHash64VarHandle - -/** - * Wyhash matching Zig 0.15 std.hash.Wyhash. - */ -class WyHash64VarHandle extends HashAlgo: - import WyHash64VarHandle.* - - override def hash(buf: Array[Byte], offset: Int, len: Int, seed: Long): Long = - SafeUtils.checkRange(buf, offset, len) - - var off = offset - var s: Long = initSeed(seed) - val secret1 = PRIME64_1 - val secret2 = PRIME64_2 - val secret3 = PRIME64_3 - var a: Long = 0L - var b: Long = 0L - - if len <= 16 then - if len >= 4 then - a = (readIntLE(buf, off).toLong << 32) - | (readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL) - b = (readIntLE(buf, off + len - 4).toLong << 32) - | (readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL) - else if len > 0 then - a = wyr3(buf, off, len) - b = 0 - else - a = 0 - b = 0 - else - var i = len - var p = off - var see0 = s - var see1 = s - var see2 = s - - while i > 48 do - see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) - see1 = mix(readLongLE(buf, p + 16) ^ secret2, readLongLE(buf, p + 24) ^ see1) - see2 = mix(readLongLE(buf, p + 32) ^ secret3, readLongLE(buf, p + 40) ^ see2) - p += 48 - i -= 48 - end while - - see0 ^= see1 ^ see2 - while i > 16 do - see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) - i -= 16 - p += 16 - end while - - a = readLongLE(buf, off + len - 16) - b = readLongLE(buf, off + len - 8) - s = see0 - end if - finishHash(a, b, s, len) - end hash - - override def hash(buffer: ByteBuffer, offset: Int, len: Int, seed: Long): Long = - if buffer.hasArray() then hash(buffer.array(), offset + buffer.arrayOffset(), len, seed) - else - var off = offset - ByteBufferUtils.checkRange(buffer, off, len) - val buf = ByteBufferUtils.inLittleEndianOrder(buffer) - var s: Long = initSeed(seed) - val secret1 = PRIME64_1 - val secret2 = PRIME64_2 - val secret3 = PRIME64_3 - var a: Long = 0L - var b: Long = 0L - - if len <= 16 then - if len >= 4 then - a = (readIntLE(buf, off).toLong << 32) - | (readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL) - b = (readIntLE(buf, off + len - 4).toLong << 32) - | (readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL) - else if len > 0 then - a = wyr3(buf, off, len) - b = 0 - else - a = 0 - b = 0 - else - var i = len - var p = off - var see0 = s - var see1 = s - var see2 = s - - while i > 48 do - see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) - see1 = mix(readLongLE(buf, p + 16) ^ secret2, readLongLE(buf, p + 24) ^ see1) - see2 = mix(readLongLE(buf, p + 32) ^ secret3, readLongLE(buf, p + 40) ^ see2) - p += 48 - i -= 48 - end while - - see0 ^= see1 ^ see2 - while i > 16 do - see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) - i -= 16 - p += 16 - end while - - a = readLongLE(buf, off + len - 16) - b = readLongLE(buf, off + len - 8) - s = see0 - end if - finishHash(a, b, s, len) - end if - end hash - -end WyHash64VarHandle diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64.scala new file mode 100644 index 000000000..02905dcc3 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64.scala @@ -0,0 +1,132 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.lang.Long.rotateLeft +import java.nio.ByteBuffer +import XXHashConstants.* + +object XXHash64: + private lazy val arrayInstance: XXHash64[Array[Byte]] = + new XXHash64(0) + private lazy val byteBufferInstance: XXHash64[ByteBuffer] = + new XXHash64(0) + + def byteArray(seed: Long): XXHash64[Array[Byte]] = + if seed == 0L then arrayInstance + else new XXHash64(seed) + + def byteBuffer(seed: Long): XXHash64[ByteBuffer] = + if seed == 0L then byteBufferInstance + else new XXHash64(seed) +end XXHash64 + +/** + * The implementation is based on lz4-java. + * Copyright 2020 Linnaea Von Lavia and the lz4-java contributors. + * Licensed under the Apache License. + * + * Instances of this class are **not** thread-safe. + */ +class XXHash64[A1: Access](seed: Long) extends HashAlgo[A1]: + private val access: Access[A1] = summon[Access[A1]] + + override def hash(buf: A1, offset: Int, len: Int): Long = + var off = offset + val end: Int = off + len + var h64: Long = 0L + + if len >= 32 then + val limit = end - 32 + var v1: Long = seed + PRIME64_1 + PRIME64_2 + var v2: Long = seed + PRIME64_2 + var v3: Long = seed + 0 + var v4: Long = seed - PRIME64_1 + while + v1 += access.readLongLE(buf, off) * PRIME64_2 + v1 = rotateLeft(v1, 31) + v1 *= PRIME64_1 + off += 8 + + v2 += access.readLongLE(buf, off) * PRIME64_2 + v2 = rotateLeft(v2, 31) + v2 *= PRIME64_1 + off += 8 + + v3 += access.readLongLE(buf, off) * PRIME64_2 + v3 = rotateLeft(v3, 31) + v3 *= PRIME64_1 + off += 8 + + v4 += access.readLongLE(buf, off) * PRIME64_2 + v4 = rotateLeft(v4, 31) + v4 = v4 * PRIME64_1 + off += 8 + off <= limit + do () + + h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18) + + v1 *= PRIME64_2 + v1 = rotateLeft(v1, 31) + v1 *= PRIME64_1 + h64 ^= v1 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v2 *= PRIME64_2 + v2 = rotateLeft(v2, 31) + v2 *= PRIME64_1 + h64 ^= v2 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v3 *= PRIME64_2 + v3 = rotateLeft(v3, 31) + v3 *= PRIME64_1 + h64 ^= v3 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v4 *= PRIME64_2 + v4 = rotateLeft(v4, 31) + v4 *= PRIME64_1 + h64 ^= v4 + h64 = h64 * PRIME64_1 + PRIME64_4 + else h64 = seed + PRIME64_5 + + h64 += len + + while off <= end - 8 do + var k1: Long = access.readLongLE(buf, off) + k1 *= PRIME64_2 + k1 = rotateLeft(k1, 31) + k1 *= PRIME64_1 + h64 ^= k1 + h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4 + off += 8 + + if off <= end - 4 then + h64 ^= (access.readIntLE(buf, off) & 0xffffffffL) * PRIME64_1 + h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3 + off += 4 + else () + + while off < end do + h64 ^= (access.readByte(buf, off) & 0xff) * PRIME64_5 + h64 = rotateLeft(h64, 11) * PRIME64_1 + off += 1 + + h64 ^= (h64 >>> 33) + h64 *= PRIME64_2 + h64 ^= (h64 >>> 29) + h64 *= PRIME64_3 + h64 ^= (h64 >>> 32) + + h64 + end hash +end XXHash64 diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64VarHandle.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64VarHandle.scala deleted file mode 100644 index 8418a8c4b..000000000 --- a/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64VarHandle.scala +++ /dev/null @@ -1,222 +0,0 @@ -/* - * sbt - * Copyright 2023, Scala center - * Copyright 2011 - 2022, Lightbend, Inc. - * Copyright 2008 - 2010, Mark Harrah - * Licensed under Apache License 2.0 (see LICENSE) - * - */ - -package sbt.internal.util.hashing - -import java.lang.Long.rotateLeft -import java.nio.ByteBuffer -import VarHandleUtils.* -import XXHashConstants.* - -object XXHash64VarHandle: - private[sbt] val INSTANCE = new XXHash64VarHandle() -end XXHash64VarHandle - -/** - * The implementation is based on lz4-java. - * Copyright 2020 Linnaea Von Lavia and the lz4-java contributors. - * Licensed under the Apache License. - * - * Instances of this class are **not** thread-safe. - */ -class XXHash64VarHandle extends HashAlgo: - override def hash(buf: Array[Byte], offset: Int, len: Int, seed: Long): Long = - SafeUtils.checkRange(buf, offset, len) - - var off = offset - val end: Int = off + len - var h64: Long = 0L - - if len >= 32 then - val limit = end - 32 - var v1: Long = seed + PRIME64_1 + PRIME64_2 - var v2: Long = seed + PRIME64_2 - var v3: Long = seed + 0 - var v4: Long = seed - PRIME64_1 - while - v1 += readLongLE(buf, off) * PRIME64_2 - v1 = rotateLeft(v1, 31) - v1 *= PRIME64_1 - off += 8 - - v2 += readLongLE(buf, off) * PRIME64_2 - v2 = rotateLeft(v2, 31) - v2 *= PRIME64_1 - off += 8 - - v3 += readLongLE(buf, off) * PRIME64_2 - v3 = rotateLeft(v3, 31) - v3 *= PRIME64_1 - off += 8 - - v4 += readLongLE(buf, off) * PRIME64_2 - v4 = rotateLeft(v4, 31) - v4 = v4 * PRIME64_1 - off += 8 - off <= limit - do () - - h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18) - - v1 *= PRIME64_2 - v1 = rotateLeft(v1, 31) - v1 *= PRIME64_1 - h64 ^= v1 - h64 = h64 * PRIME64_1 + PRIME64_4 - - v2 *= PRIME64_2 - v2 = rotateLeft(v2, 31) - v2 *= PRIME64_1 - h64 ^= v2 - h64 = h64 * PRIME64_1 + PRIME64_4 - - v3 *= PRIME64_2 - v3 = rotateLeft(v3, 31) - v3 *= PRIME64_1 - h64 ^= v3 - h64 = h64 * PRIME64_1 + PRIME64_4 - - v4 *= PRIME64_2 - v4 = rotateLeft(v4, 31) - v4 *= PRIME64_1 - h64 ^= v4 - h64 = h64 * PRIME64_1 + PRIME64_4 - else h64 = seed + PRIME64_5 - - h64 += len - - while off <= end - 8 do - var k1: Long = readLongLE(buf, off) - k1 *= PRIME64_2 - k1 = rotateLeft(k1, 31) - k1 *= PRIME64_1 - h64 ^= k1 - h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4 - off += 8 - - if off <= end - 4 then - h64 ^= (readIntLE(buf, off) & 0xffffffffL) * PRIME64_1 - h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3 - off += 4 - else () - - while off < end do - h64 ^= (readByte(buf, off) & 0xff) * PRIME64_5 - h64 = rotateLeft(h64, 11) * PRIME64_1 - off += 1 - - h64 ^= (h64 >>> 33) - h64 *= PRIME64_2 - h64 ^= (h64 >>> 29) - h64 *= PRIME64_3 - h64 ^= (h64 >>> 32) - - h64 - end hash - - override def hash(buffer: ByteBuffer, offset: Int, len: Int, seed: Long): Long = - if buffer.hasArray() then hash(buffer.array(), offset + buffer.arrayOffset(), len, seed) - else - var off = offset - ByteBufferUtils.checkRange(buffer, off, len) - val buf = ByteBufferUtils.inLittleEndianOrder(buffer) - - val end: Int = off + len - var h64: Long = 0L - - if len >= 32 then - val limit: Int = end - 32 - var v1: Long = seed + PRIME64_1 + PRIME64_2 - var v2: Long = seed + PRIME64_2 - var v3: Long = seed + 0 - var v4: Long = seed - PRIME64_1 - while - v1 = v1 + readLongLE(buf, off) * PRIME64_2 - v1 = rotateLeft(v1, 31) - v1 = v1 * PRIME64_1 - off = off + 8 - - v2 += readLongLE(buf, off) * PRIME64_2 - v2 = rotateLeft(v2, 31) - v2 *= PRIME64_1 - off = off + 8 - - v3 += readLongLE(buf, off) * PRIME64_2 - v3 = rotateLeft(v3, 31) - v3 *= PRIME64_1 - off = off + 8 - - v4 += readLongLE(buf, off) * PRIME64_2 - v4 = rotateLeft(v4, 31) - v4 *= PRIME64_1 - off = off + 8 - - off <= limit - do () - - h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18) - - v1 *= PRIME64_2 - v1 = rotateLeft(v1, 31) - v1 *= PRIME64_1 - h64 ^= v1 - h64 = h64 * PRIME64_1 + PRIME64_4 - - v2 *= PRIME64_2 - v2 = rotateLeft(v2, 31) - v2 *= PRIME64_1 - h64 ^= v2 - h64 = h64 * PRIME64_1 + PRIME64_4 - - v3 *= PRIME64_2 - v3 = rotateLeft(v3, 31) - v3 *= PRIME64_1 - h64 ^= v3 - h64 = h64 * PRIME64_1 + PRIME64_4 - - v4 *= PRIME64_2 - v4 = rotateLeft(v4, 31) - v4 *= PRIME64_1 - h64 ^= v4 - h64 = h64 * PRIME64_1 + PRIME64_4 - else h64 = seed + PRIME64_5 - - h64 += len - - while off <= end - 8 do - var k1: Long = readLongLE(buf, off) - k1 *= PRIME64_2 - k1 = rotateLeft(k1, 31) - k1 *= PRIME64_1 - h64 ^= k1 - h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4 - off = off + 8 - - if off <= end - 4 then - h64 ^= (readIntLE(buf, off) & 0xffffffffL) * PRIME64_1 - h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3 - off = off + 4 - else () - - while off < end do - h64 ^= (readByte(buf, off) & 0xff) * PRIME64_5 - h64 = rotateLeft(h64, 11) * PRIME64_1 - off += 1 - - h64 ^= h64 >>> 33 - h64 *= PRIME64_2 - h64 ^= h64 >>> 29 - h64 *= PRIME64_3 - h64 ^= h64 >>> 32 - - h64 - end if - end hash - -end XXHash64VarHandle diff --git a/internal/util-control/src/test/scala/sbt/internal/util/AbstractByteBufferHashTest.scala b/internal/util-control/src/test/scala/sbt/internal/util/AbstractByteBufferHashTest.scala new file mode 100644 index 000000000..219246195 --- /dev/null +++ b/internal/util-control/src/test/scala/sbt/internal/util/AbstractByteBufferHashTest.scala @@ -0,0 +1,31 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.nio.ByteBuffer +import verify.BasicTestSuite + +abstract class AbstractByteBufferHashTest extends BasicTestSuite: + def hash64: HashAlgo[ByteBuffer] + def emptyHash: Long + def zeroHash: Long + + test("Hash empty ByteBuffer"): + val buf: ByteBuffer = ByteBuffer.allocate(0) + val r = hash64.hash(buf, 0, 0) + assert(r == emptyHash) + + test("Hash one byte ByteBuffer"): + val buf: ByteBuffer = ByteBuffer.allocate(1) + buf.put(0: Byte) + buf.rewind() + val r = hash64.hash(buf, 0, 1) + assert(r == zeroHash) +end AbstractByteBufferHashTest diff --git a/internal/util-control/src/test/scala/sbt/internal/util/AbstractHashTest.scala b/internal/util-control/src/test/scala/sbt/internal/util/AbstractHashTest.scala index bd2e93ed3..821b395b7 100644 --- a/internal/util-control/src/test/scala/sbt/internal/util/AbstractHashTest.scala +++ b/internal/util-control/src/test/scala/sbt/internal/util/AbstractHashTest.scala @@ -9,43 +9,24 @@ package sbt.internal.util.hashing -import java.nio.ByteBuffer import verify.BasicTestSuite abstract class AbstractHashTest extends BasicTestSuite: - def hash64: HashAlgo + def hash64: HashAlgo[Array[Byte]] def newStreaming(seed: Int): StreamingHashAlgo def emptyHash: Long def zeroHash: Long test("Hash empty array"): val buf: Array[Byte] = Array[Byte](0) - val r = hash64.hash(buf, 0, 0, 0) - assert(r == emptyHash) - - test("Hash empty ByteBuffer"): - val buf: ByteBuffer = ByteBuffer.allocate(0) - val r = hash64.hash(buf, 0, 0, 0) + val r = hash64.hash(buf, 0, 0) assert(r == emptyHash) test("Hash one byte array"): val buf: Array[Byte] = Array[Byte](0) - val r = hash64.hash(buf, 0, 1, 0) + val r = hash64.hash(buf, 0, 1) assert(r == zeroHash) - test("Hash one byte ByteBuffer"): - val buf: ByteBuffer = ByteBuffer.allocate(1) - buf.put(0: Byte) - buf.rewind() - val r = hash64.hash(buf, 0, 1, 0) - assert(r == zeroHash) - - test("Streaming hash empty ByteBuffer"): - val hash = newStreaming(0) - try - assert(hash.getValue == emptyHash) - finally hash.close() - test("Streaming one byte array"): val hash = newStreaming(0) try diff --git a/internal/util-control/src/test/scala/sbt/internal/util/WyHashTest.scala b/internal/util-control/src/test/scala/sbt/internal/util/WyHashTest.scala index b744c7784..222dcbbea 100644 --- a/internal/util-control/src/test/scala/sbt/internal/util/WyHashTest.scala +++ b/internal/util-control/src/test/scala/sbt/internal/util/WyHashTest.scala @@ -9,10 +9,19 @@ package sbt.internal.util.hashing -object WyHashTest extends AbstractHashTest: - override val hash64: HashAlgo = Hashing.wyhash64 +import java.nio.ByteBuffer + +object WyHashByteArrayTest extends AbstractHashTest: + override val hash64: HashAlgo[Array[Byte]] = Hashing.wyhash64(0L) override def newStreaming(seed: Int): StreamingHashAlgo = Hashing.newStreamingWyHash64(seed) override val emptyHash = 290873116282709081L override val zeroHash = -295637713410278011L -end WyHashTest +end WyHashByteArrayTest + +object WyHasByteBufferHashTest extends AbstractByteBufferHashTest: + override val hash64: HashAlgo[ByteBuffer] = + Hashing.wyhash64ByteBuffer(0L) + override val emptyHash = 290873116282709081L + override val zeroHash = -295637713410278011L +end WyHasByteBufferHashTest diff --git a/internal/util-control/src/test/scala/sbt/internal/util/XXHashTest.scala b/internal/util-control/src/test/scala/sbt/internal/util/XXHashTest.scala index e56d7994b..18fb4308c 100644 --- a/internal/util-control/src/test/scala/sbt/internal/util/XXHashTest.scala +++ b/internal/util-control/src/test/scala/sbt/internal/util/XXHashTest.scala @@ -9,10 +9,19 @@ package sbt.internal.util.hashing -object XXHashTest extends AbstractHashTest: - override val hash64: HashAlgo = Hashing.xxhash64 +import java.nio.ByteBuffer + +object XXHashByteArrayTest extends AbstractHashTest: + override val hash64: HashAlgo[Array[Byte]] = Hashing.xxhash64(0L) override def newStreaming(seed: Int): StreamingHashAlgo = Hashing.newStreamingXXHash64(seed) override val emptyHash = -1205034819632174695L override val zeroHash = -1642502924627794072L -end XXHashTest +end XXHashByteArrayTest + +object XXHashByteBufferHashTest extends AbstractByteBufferHashTest: + override val hash64: HashAlgo[ByteBuffer] = + Hashing.xxhash64ByteBuffer(0L) + override val emptyHash = -1205034819632174695L + override val zeroHash = -1642502924627794072L +end XXHashByteBufferHashTest diff --git a/util-cache/src/main/scala/sbt/util/HashUtil.scala b/util-cache/src/main/scala/sbt/util/HashUtil.scala index cc1ec9216..3ce184bd8 100644 --- a/util-cache/src/main/scala/sbt/util/HashUtil.scala +++ b/util-cache/src/main/scala/sbt/util/HashUtil.scala @@ -5,7 +5,7 @@ import sbt.internal.util.hashing.Hashing object HashUtil: private[sbt] def xxhash64(bytes: Array[Byte]): Long = - Hashing.xxhash64.hash(bytes, 0, bytes.size, 0) + Hashing.xxhash64(0L).hash(bytes, 0, bytes.size) private[sbt] def imohash64(path: NioPath): Long = val hash64 = Hashing.samplingFileHashWyHash64(0) From 0bab2066dfc259b4d08e3f0215f2bb4f7912abb5 Mon Sep 17 00:00:00 2001 From: Eugene Yokota Date: Sun, 31 May 2026 05:06:28 -0400 Subject: [PATCH 2/2] [2.x] Reimplement FarmHash **Problem** sbtn and server uses FarmHash. **Solution** This reimplements FarmHash using Scala. --- build.sbt | 5 +- .../sbt/internal/util/HashBenchmark.scala | 6 + .../internal/util/hashing/FarmHash64.scala | 209 ++++++++++++++++++ .../util/hashing/FarmHashConstants.scala | 17 ++ .../sbt/internal/util/hashing/Hashing.scala | 3 + .../sbt/internal/util/FarmHashTest.scala | 44 ++++ .../java/sbt/internal/BootServerSocket.java | 15 +- .../sbt/internal/client/NetworkClient.scala | 8 +- main/src/main/scala/sbt/Main.scala | 7 +- project/Dependencies.scala | 1 + .../src/main/scala/sbt/util/HashUtil.scala | 3 + 11 files changed, 303 insertions(+), 15 deletions(-) create mode 100644 internal/util-control/src/main/scala/sbt/internal/util/hashing/FarmHash64.scala create mode 100644 internal/util-control/src/main/scala/sbt/internal/util/hashing/FarmHashConstants.scala create mode 100644 internal/util-control/src/test/scala/sbt/internal/util/FarmHashTest.scala diff --git a/build.sbt b/build.sbt index 5315a80ce..b031efd61 100644 --- a/build.sbt +++ b/build.sbt @@ -300,6 +300,7 @@ lazy val utilControl = (project in file("internal") / "util-control") scalacheck % Test, scalaVerify % Test, hedgehog % Test, + zeroAllocationHashing % Test, ), mimaSettings, ) @@ -635,7 +636,9 @@ lazy val commandProj = (project in file("main-command")) exclude[MissingClassProblem]("sbt.internal.util.ReadJsonFromInputStream$"), exclude[MissingClassProblem]("sbt.internal.client.ServerConnection"), exclude[IncompatibleResultTypeProblem]("sbt.internal.client.NetworkClient.connection"), - exclude[IncompatibleResultTypeProblem]("sbt.internal.client.NetworkClient.init") + exclude[IncompatibleResultTypeProblem]("sbt.internal.client.NetworkClient.init"), + exclude[DirectMissingMethodProblem]("sbt.internal.BootServerSocket.this"), + exclude[DirectMissingMethodProblem]("sbt.internal.BootServerSocket.socketLocation"), ), Compile / headerCreate / unmanagedSources := { val old = (Compile / headerCreate / unmanagedSources).value diff --git a/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala b/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala index 165862816..b60847f8f 100644 --- a/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala +++ b/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala @@ -39,6 +39,12 @@ class FarmHashHashBenchmark extends AbstractHashBenchmark: val hash = LongHashFunction.farmNa().hashBytes(buf) java.lang.Long.toHexString(hash) +class FarmHash64VarHandleHashBenchmark extends AbstractHashBenchmark: + override def hash(buf: Array[Byte]): String = + val h = Hashing.farmNaHash64 + val hash = h.hash(buf, 0, buf.size) + java.lang.Long.toHexString(hash) + class MurmurHash32HashBenchmark extends AbstractHashBenchmark: override def hash(buf: Array[Byte]): String = val lo = MurmurHash3.bytesHash(buf, 0x85ebca6b) diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/FarmHash64.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/FarmHash64.scala new file mode 100644 index 000000000..61d9ada40 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/FarmHash64.scala @@ -0,0 +1,209 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.lang.Long.rotateRight +import java.nio.ByteBuffer +import FarmHashConstants.* + +object FarmHash64: + private inline def shiftMix(x: Long): Long = + x ^ (x >>> 47) + + private inline def hashLen16(u: Long, v: Long): Long = + hashLen16(u, v, K_MUL) + + private inline def hashLen16(u: Long, v: Long, m: Long): Long = + val a = shiftMix((u ^ v) * m) + shiftMix((v ^ a) * m) * m + + private inline def mul(len: Long): Long = + K2 + (len << 1) + + private def hash1To3Bytes(len: Int, firstByte: Int, midOrLastByte: Int, lastByte: Int): Long = + val y = firstByte + (midOrLastByte << 8) + val z = len + (lastByte << 2) + shiftMix((y.toLong * K2) ^ (z.toLong * K0)) * K2 + + private def hash4To7Bytes(len: Long, first4Bytes: Long, last4Bytes: Long): Long = + val m = mul(len) + hashLen16(len + (first4Bytes << 3), last4Bytes, m) + + private def hash8To16Bytes(len: Long, first8Bytes: Long, last8Bytes: Long): Long = + val m = mul(len) + val a = first8Bytes + K2 + val c = rotateRight(last8Bytes, 37) * m + a + val d = (rotateRight(a, 25) + last8Bytes) * m + hashLen16(c, d, m) + + private def hashLen0To16[A1](in: A1, offset: Long, len: Long)(access: Access[A1]): Long = + val off = offset.toInt + if len >= 8L then + val a = access.readLongLE(in, off) + val b = access.readLongLE(in, (off + len - 8L).toInt) + hash8To16Bytes(len, a, b) + else if len >= 4L then + val a = access.readIntLE(in, off) & 0xffffffffL + val b = access.readIntLE(in, (off + len - 4L).toInt) & 0xffffffffL + hash4To7Bytes(len, a, b) + else if len > 0L then + val a = access.readByte(in, off) + val b = access.readByte(in, (off + (len >> 1)).toInt) + val c = access.readByte(in, (off + len - 1).toInt) + hash1To3Bytes(len.toInt, a, b, c) + else K2 + + private def hashLen17To32[A1](in: A1, offset: Long, len: Long)(access: Access[A1]): Long = + val off = offset.toInt + val m = mul(len) + val a = access.readLongLE(in, off) * K1 + val b = access.readLongLE(in, off + 8) + val c = access.readLongLE(in, (off + len - 8L).toInt) * m + val d = access.readLongLE(in, (off + len - 16L).toInt) * K2 + hashLen16(rotateRight(a + b, 43) + rotateRight(c, 30) + d, a + rotateRight(b + K2, 18) + c, m) + + private def naHashLen33To64[A1](in: A1, offset: Long, len: Long)(access: Access[A1]): Long = + val off = offset.toInt + val m = mul(len) + val a = access.readLongLE(in, off) * K2 + val b = access.readLongLE(in, off + 8) + val c = access.readLongLE(in, (off + len - 8).toInt) * m + val d = access.readLongLE(in, (off + len - 16).toInt) * K2 + val y = rotateRight(a + b, 43) + rotateRight(c, 30) + d + val z = hashLen16(y, a + rotateRight(b + K2, 18) + c, m) + val e = access.readLongLE(in, off + 16) * m + val f = access.readLongLE(in, off + 24) + val g = (y + access.readLongLE(in, (off + len - 32).toInt)) * m + val h = (z + access.readLongLE(in, (off + len - 24).toInt)) * m + hashLen16(rotateRight(e + f, 43) + rotateRight(g, 30) + h, e + rotateRight(f + a, 18) + g, m) + + def naHash64[A1](in: A1, offset: Long, len: Long)(access: Access[A1]): Long = + val seed: Long = 81L + if len <= 32 then + if len <= 16 then hashLen0To16(in, offset, len)(access) + else hashLen17To32(in, offset, len)(access) + else if len <= 64 then naHashLen33To64(in, offset, len)(access) + else + var off = offset.toInt + // For strings over 64 bytes we loop. Internal state consists of + // 56 bytes: v, w, x, y, and z. + var x: Long = seed + // == seed * k1 + 113 This overflows uint64 and is a compile error, + // so we expand the constant by hand + var y: Long = seed * K1 + 113 + var z: Long = shiftMix(y * K2 + 113) * K2 + var v1: Long = 0L + var v2: Long = 0L + var w1: Long = 0L + var w2: Long = 0L + x = x * K2 + access.readLongLE(in, off) + + // Set end so that after the loop we have 1 to 64 bytes left to process. + val fin = off + ((len - 1) >> 6) * 64 + val last64 = fin + ((len - 1) & 63) - 63 + + while + x = rotateRight(x + y + v1 + access.readLongLE(in, (off + 8).toInt), 37) * K1 + y = rotateRight(y + v2 + access.readLongLE(in, (off + 48).toInt), 42) * K1 + x ^= w2 + y += v1 + access.readLongLE(in, off + 40) + z = rotateRight(z + w1, 33) * K1 + var a: Long = v2 * K1 + var b: Long = x + w1 + val z1 = access.readLongLE(in, off + 24) + a += access.readLongLE(in, off) + b = rotateRight(b + a + z1, 21) + val c = a + a += access.readLongLE(in, off + 8) + a += access.readLongLE(in, off + 16) + b += rotateRight(a, 44) + v1 = a + z1 + v2 = b + c + var a1 = z + w2 + var b1 = y + access.readLongLE(in, off + 16) + var z2 = access.readLongLE(in, off + 32 + 24) + a1 += access.readLongLE(in, off + 32) + b1 = rotateRight(b1 + a1 + z2, 21) + val c1 = a1 + a1 += access.readLongLE(in, off + 32 + 8) + a1 += access.readLongLE(in, off + 32 + 16) + b1 += rotateRight(a1, 44) + w1 = a1 + z2 + w2 = b1 + c1 + val t = z + z = x + x = t + off += 64 + off != fin + do () + + off = last64.toInt + + val m = K1 + ((z & 0xff) << 1) + + // Make s point to the last 64 bytes of input. + w1 += (len - 1) & 63 + v1 += w1 + w1 += v1 + x = rotateRight(x + y + v1 + access.readLongLE(in, off + 8), 37) * m + y = rotateRight(y + v2 + access.readLongLE(in, off + 48), 42) * m + x ^= w2 * 9 + y += v1 * 9 + access.readLongLE(in, off + 40) + z = rotateRight(z + w1, 33) * m + var a: Long = v2 * m + var b: Long = x + w1 + val z1 = access.readLongLE(in, off + 24) + a += access.readLongLE(in, off) + b = rotateRight(b + a + z1, 21) + val c = a + a += access.readLongLE(in, off + 8) + a += access.readLongLE(in, off + 16) + b += rotateRight(a, 44) + v1 = a + z1 + v2 = b + c + var a1: Long = z + w2 + var b1: Long = y + access.readLongLE(in, off + 16) + val z2 = access.readLongLE(in, off + 32 + 24) + a1 += access.readLongLE(in, off + 32) + b1 = rotateRight(b1 + a1 + z2, 21) + val c1 = a1 + a1 += access.readLongLE(in, off + 32 + 8) + a1 += access.readLongLE(in, off + 32 + 16) + b1 += rotateRight(a1, 44) + w1 = a1 + z2 + w2 = b1 + c1 + val t = z + z = x + x = t + hashLen16(hashLen16(v1, w1, m) + shiftMix(y) * K0 + z, hashLen16(v2, w2, m) + x, m) +end FarmHash64 + +object FarmNaSeedlessHash64: + + private lazy val arrayInstance: FarmNaSeedlessHash64[Array[Byte]] = + new FarmNaSeedlessHash64() + private lazy val byteBufferInstance: FarmNaSeedlessHash64[ByteBuffer] = + new FarmNaSeedlessHash64() + + def byteArray: FarmNaSeedlessHash64[Array[Byte]] = + arrayInstance + + def byteBuffer: FarmNaSeedlessHash64[ByteBuffer] = + byteBufferInstance +end FarmNaSeedlessHash64 + +class FarmNaSeedlessHash64[A1: Access] extends HashAlgo: + import FarmHash64.* + private val access: Access[A1] = summon[Access[A1]] + + override def hash(buf: A1, offset: Int, len: Int): Long = + val hash = naHash64(buf, offset, len)(access) + hash +end FarmNaSeedlessHash64 diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/FarmHashConstants.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/FarmHashConstants.scala new file mode 100644 index 000000000..4d6969c19 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/FarmHashConstants.scala @@ -0,0 +1,17 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +object FarmHashConstants: + final val K0 = 0xc3a5c85c97cb3127L + final val K1 = 0xb492b66fbe98f273L + final val K2 = 0x9ae16a3b2f90404fL + final val K_MUL = 0x9ddfea08eb382d69L +end FarmHashConstants diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala index f7670d3b5..1db106b71 100644 --- a/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala @@ -12,6 +12,9 @@ package sbt.internal.util.hashing import java.nio.ByteBuffer object Hashing: + def farmNaHash64: HashAlgo[Array[Byte]] = + FarmNaSeedlessHash64.byteArray + def xxhash64(seed: Long): HashAlgo[Array[Byte]] = XXHash64.byteArray(seed) diff --git a/internal/util-control/src/test/scala/sbt/internal/util/FarmHashTest.scala b/internal/util-control/src/test/scala/sbt/internal/util/FarmHashTest.scala new file mode 100644 index 000000000..a0c04c66d --- /dev/null +++ b/internal/util-control/src/test/scala/sbt/internal/util/FarmHashTest.scala @@ -0,0 +1,44 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import verify.BasicTestSuite +import java.util.concurrent.ThreadLocalRandom +import net.openhft.hashing.LongHashFunction + +object FarmHashTest extends BasicTestSuite: + lazy val reference = LongHashFunction.farmNa() + + def hash64: HashAlgo[Array[Byte]] = + Hashing.farmNaHash64 + def emptyHash: Long = -7286425919675154353L + def zeroHash: Long = -4728684028706075820L + + test("Hash empty array"): + val buf: Array[Byte] = new Array[Byte](0) + val r = hash64.hash(buf, 0, 0) + assert(r == emptyHash) + val r2 = reference.hashBytes(buf) + assert(r == r2) + + test("Hash one byte array"): + val buf: Array[Byte] = Array[Byte](0) + val r = hash64.hash(buf, 0, 1) + assert(r == zeroHash) + val r2 = reference.hashBytes(buf) + assert(r == r2) + + test("Hash 2048 bytes"): + val buf: Array[Byte] = new Array[Byte](2048) + ThreadLocalRandom.current().nextBytes(buf) + val r = hash64.hash(buf, 0, 2048) + val r2 = reference.hashBytes(buf) + assert(r == r2) +end FarmHashTest diff --git a/main-command/src/main/java/sbt/internal/BootServerSocket.java b/main-command/src/main/java/sbt/internal/BootServerSocket.java index ace29c12a..4becac3ff 100644 --- a/main-command/src/main/java/sbt/internal/BootServerSocket.java +++ b/main-command/src/main/java/sbt/internal/BootServerSocket.java @@ -278,18 +278,18 @@ public class BootServerSocket implements AutoCloseable { } }; - public BootServerSocket(final AppConfiguration configuration) + public BootServerSocket(final AppConfiguration configuration, final long farmHash) throws ServerAlreadyBootingException, IOException { final Path base = configuration.baseDirectory().toPath().toRealPath(); if (!isWindows) { - final String actualSocketLocation = socketLocation(base); + final String actualSocketLocation = socketLocation(base, farmHash); final Path target = Paths.get(actualSocketLocation).getParent(); if (!Files.isDirectory(target)) Files.createDirectories(target); socketFile = Paths.get(actualSocketLocation); } else { socketFile = null; } - serverSocket = newSocket(socketLocation(base)); + serverSocket = newSocket(socketLocation(base, farmHash)); if (serverSocket != null) { running.set(true); acceptFuture = service.submit(acceptRunnable); @@ -299,20 +299,17 @@ public class BootServerSocket implements AutoCloseable { } } - public static String socketLocation(final Path base) + public static String socketLocation(final Path base, final long farmHash) throws UnsupportedEncodingException, IOException { final Path target = base.resolve("project").resolve("target"); - long hash = - ((long) target.toString().hashCode() << 32) - | (target.toString().length() * 31 & 0xffffffffL); if (isWindows) { - return "sbt-load" + hash; + return "sbt-load" + farmHash; } else { final String alternativeSocketLocation = System.getenv().getOrDefault("XDG_RUNTIME_DIR", System.getProperty("java.io.tmpdir")); final Path alternativeSocketLocationRoot = Paths.get(alternativeSocketLocation).resolve(".sbt"); - final Path locationForSocket = alternativeSocketLocationRoot.resolve("sbt-socket" + hash); + final Path locationForSocket = alternativeSocketLocationRoot.resolve("sbt-socket" + farmHash); final Path pathForSocket = locationForSocket.resolve("sbt-load.sock"); return pathForSocket.toString(); } diff --git a/main-command/src/main/scala/sbt/internal/client/NetworkClient.scala b/main-command/src/main/scala/sbt/internal/client/NetworkClient.scala index 450c9b81a..4c3fc0613 100644 --- a/main-command/src/main/scala/sbt/internal/client/NetworkClient.scala +++ b/main-command/src/main/scala/sbt/internal/client/NetworkClient.scala @@ -34,7 +34,7 @@ import sbt.internal.util.{ import sbt.io.IO import sbt.io.syntax.* import sbt.protocol.* -import sbt.util.{ Level, Logger } +import sbt.util.{ HashUtil, Level, Logger } import sjsonnew.BasicJsonProtocol.* import sjsonnew.shaded.scalajson.ast.unsafe.{ JObject, JValue } import sjsonnew.support.scalajson.unsafe.Converter @@ -342,8 +342,10 @@ class NetworkClient( * This instance must be shutdown explicitly via `sbt -client shutdown` */ def waitForServer(portfile: File, log: Boolean, startServer: Boolean): Unit = { - val bootSocketName = - BootServerSocket.socketLocation(arguments.baseDirectory.toPath.toRealPath()) + val base = arguments.baseDirectory.toPath.toRealPath() + val target = base.resolve("project").resolve("target") + val hash = HashUtil.farmHash(target.toString().getBytes("UTF-8")) + val bootSocketName = BootServerSocket.socketLocation(base, hash) /* * For unknown reasons, linux sometimes struggles to connect to the socket in some diff --git a/main/src/main/scala/sbt/Main.scala b/main/src/main/scala/sbt/Main.scala index 9c35ebae3..9671ce5a3 100644 --- a/main/src/main/scala/sbt/Main.scala +++ b/main/src/main/scala/sbt/Main.scala @@ -32,7 +32,7 @@ import sbt.internal.util.complete.Parser import sbt.internal.util.{ RunningProcesses, Terminal as ITerminal, * } import sbt.io.* import sbt.io.syntax.* -import sbt.util.{ ActionCache, Level, Logger, Show } +import sbt.util.{ ActionCache, HashUtil, Level, Logger, Show } import xsbti.AppProvider import scala.annotation.{ nowarn, tailrec } @@ -157,7 +157,10 @@ private[sbt] object xMain: e.printStackTrace() } - try Some(new BootServerSocket(configuration)) -> None + val target = + configuration.baseDirectory().toPath().toRealPath().resolve("project").resolve("target") + val hash = HashUtil.farmHash(target.toString().getBytes("UTF-8")); + try Some(new BootServerSocket(configuration, hash)) -> None catch { case e: ServerAlreadyBootingException if hasConsole && !ITerminal.startedByRemoteClient => printThrowable(e) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 08b629b73..3bace591b 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -109,6 +109,7 @@ object Dependencies { val caffeine = "com.github.ben-manes.caffeine" % "caffeine" % "2.8.5" val blake3 = "pt.kcry" %% "blake3" % "3.1.2" + val zeroAllocationHashing = "net.openhft" % "zero-allocation-hashing" % "0.16" val hedgehog = "qa.hedgehog" %% "hedgehog-sbt" % "0.13.0" val disruptor = "com.lmax" % "disruptor" % "3.4.2" diff --git a/util-cache/src/main/scala/sbt/util/HashUtil.scala b/util-cache/src/main/scala/sbt/util/HashUtil.scala index 3ce184bd8..104ec205d 100644 --- a/util-cache/src/main/scala/sbt/util/HashUtil.scala +++ b/util-cache/src/main/scala/sbt/util/HashUtil.scala @@ -4,6 +4,9 @@ import java.nio.file.Path as NioPath import sbt.internal.util.hashing.Hashing object HashUtil: + private[sbt] def farmHash(bytes: Array[Byte]): Long = + Hashing.farmNaHash64.hash(bytes, 0, bytes.size) + private[sbt] def xxhash64(bytes: Array[Byte]): Long = Hashing.xxhash64(0L).hash(bytes, 0, bytes.size)