Port WyHash to Scala

This commit is contained in:
Eugene Yokota 2026-05-24 23:51:28 -04:00
parent c56a2621c9
commit cab1397dd7
7 changed files with 382 additions and 43 deletions

View File

@ -11,7 +11,10 @@ package sbt.internal.util.hashing
object Hashing:
def xxhash64: HashAlgo = XXHash64VarHandle.INSTANCE
def wyhash64: HashAlgo = WyHash64VarHandle.INSTANCE
def newStreamingXXHash64(seed: Long): StreamingHashAlgo =
new StreamingXXHash64VarHandle(seed)
def newStreamingWyHash64(seed: Long): StreamingHashAlgo =
new StreamingWyHash64VarHandle(seed)
end Hashing

View File

@ -0,0 +1,120 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import WyHash64VarHandle.*
import WyHashConstants.*
import VarHandleUtils.*
class StreamingWyHash64VarHandle(seed: Long) extends StreamingHashAlgo(seed):
protected var a: Long = 0
protected var b: Long = 0
protected val state: Array[Long] = new Array[Long](3)
protected var v0: Long = 0
protected var v1: Long = 0
protected var v2: Long = 0
protected var totalLen: Long = 0L
protected val memory = new Array[Byte](48)
protected var memoryLen: Int = 0
reset()
override def reset(): Unit =
val s: Long = initSeed(seed)
this.v0 = s
this.v1 = s
this.v2 = s
this.totalLen = 0
this.memoryLen = 0
def getValue: Long =
var _a: Long = this.a
var _b: Long = this.b
var v0: Long = this.v0
var v1: Long = this.v1
var v2: Long = this.v2
var input = this.memory
var inputLen = this.memoryLen
if this.totalLen <= 16 then
if inputLen >= 4 then
val end = inputLen - 4
val quarter = (inputLen >> 3) << 2
_a = (readIntLE(input, 0).toLong << 32)
| (readIntLE(input, quarter) & 0xffffffffL)
_b = (readIntLE(input, end) << 32).toLong
| (readIntLE(input, end - quarter) & 0xffffffffL)
else if inputLen > 0 then
_a = ((input(0) & 0xffL) << 16) | ((input(inputLen >> 1) & 0xffL) << 8)
| (input(inputLen - 1) & 0xffL)
_b = 0
else
_a = 0
_b = 0
end if
else
var scratch: Array[Byte] = null
if inputLen < 16 then
val rem = 16 - inputLen
scratch = new Array[Byte](16)
System.arraycopy(memory, 48 - rem, scratch, 0, rem)
System.arraycopy(memory, 0, scratch, rem, inputLen)
input = scratch
inputLen = 16
if this.totalLen >= 48 then v0 ^= v1 ^ v2
var i = 0
while i + 16 < inputLen do
v0 = mix(readLongLE(input, i) ^ PRIME64_1, readLongLE(input, i + 8) ^ v0)
i += 16
_a = readLongLE(input, inputLen - 16)
_b = readLongLE(input, inputLen - 8)
end if
finishHash(_a, _b, v0, this.totalLen)
end getValue
def update(buf: Array[Byte], off: Int, len: Int): Unit =
this.totalLen += len
if len <= 48 - this.memoryLen then
System.arraycopy(buf, off, this.memory, this.memoryLen, len)
this.memoryLen += len
else
var i: Int = 0
if this.memoryLen > 0 then
i = 48 - this.memoryLen
System.arraycopy(buf, off, this.memory, this.memoryLen, i)
round(this.memory, 0)
this.memoryLen = 0
end if
while i + 48 < len do
round(buf, off + i)
i += 48
val remaining = len - i
if remaining < 16 && i >= 48 then
val rem = 16 - remaining
System.arraycopy(buf, off + i - rem, this.memory, 48 - rem, rem)
System.arraycopy(buf, off + i, this.memory, 0, remaining)
this.memoryLen = remaining
end if
end update
private def round(buf: Array[Byte], p: Int): Unit =
this.v0 = mix(readLongLE(buf, p) ^ PRIME64_1, readLongLE(buf, p + 8) ^ this.v0)
this.v1 = mix(readLongLE(buf, p + 16) ^ PRIME64_2, readLongLE(buf, p + 24) ^ this.v1)
this.v2 = mix(readLongLE(buf, p + 32) ^ PRIME64_3, readLongLE(buf, p + 40) ^ this.v2)
end StreamingWyHash64VarHandle

View File

@ -0,0 +1,162 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.nio.ByteBuffer
import WyHashConstants.*
import VarHandleUtils.*
object WyHash64VarHandle:
private[hashing] val INSTANCE = new WyHash64VarHandle()
private[hashing] inline def initSeed(seed: Long): Long =
seed ^ mix(seed ^ PRIME64_0, PRIME64_1)
private[hashing] def mix(a: Long, b: Long): Long =
val low = a * b
val high = unsignedMultiplyHigh(a, b)
low ^ high
private[hashing] inline def unsignedMultiplyHigh(a: Long, b: Long): Long =
Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a)
private[hashing] inline def wyr3(buf: Array[Byte], off: Int, k: Int): Long =
((buf(off) & 0xffL) << 16)
| ((buf(off + (k >> 1)) & 0xffL) << 8)
| (buf(off + k - 1) & 0xffL)
private[hashing] inline def wyr3(buf: ByteBuffer, off: Int, k: Int): Long =
((buf.get(off) & 0xffL) << 16)
| ((buf.get(off + (k >> 1)) & 0xffL) << 8)
| (buf.get(off + k - 1) & 0xffL)
private[hashing] inline def finishHash(a: Long, b: Long, seed: Long, len: Long): Long =
val _a = a ^ PRIME64_1
val _b = b ^ seed
val low = _a * _b
val high = unsignedMultiplyHigh(_a, _b)
mix(low ^ PRIME64_0 ^ len, high ^ PRIME64_1)
end WyHash64VarHandle
/**
* Wyhash matching Zig 0.15 std.hash.Wyhash.
*/
class WyHash64VarHandle extends HashAlgo:
import WyHash64VarHandle.*
override def hash(buf: Array[Byte], offset: Int, len: Int, seed: Long): Long =
SafeUtils.checkRange(buf, offset, len)
var off = offset
var s: Long = initSeed(seed)
val secret1 = PRIME64_1
val secret2 = PRIME64_2
val secret3 = PRIME64_3
var a: Long = 0L
var b: Long = 0L
if len <= 16 then
if len >= 4 then
a = (readIntLE(buf, off).toLong << 32)
| (readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL)
b = (readIntLE(buf, off + len - 4).toLong << 32)
| (readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL)
else if len > 0 then
a = wyr3(buf, off, len)
b = 0
else
a = 0
b = 0
else
var i = len
var p = off
var see0 = s
var see1 = s
var see2 = s
while i > 48 do
see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0)
see1 = mix(readLongLE(buf, p + 16) ^ secret2, readLongLE(buf, p + 24) ^ see1)
see2 = mix(readLongLE(buf, p + 32) ^ secret3, readLongLE(buf, p + 40) ^ see2)
p += 48
i -= 48
end while
see0 ^= see1 ^ see2
while i > 16 do
see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0)
i -= 16
p += 16
end while
a = readLongLE(buf, off + len - 16)
b = readLongLE(buf, off + len - 8)
s = see0
end if
finishHash(a, b, s, len)
end hash
override def hash(buffer: ByteBuffer, offset: Int, len: Int, seed: Long): Long =
if buffer.hasArray() then hash(buffer.array(), offset + buffer.arrayOffset(), len, seed)
else
var off = offset
ByteBufferUtils.checkRange(buffer, off, len)
val buf = ByteBufferUtils.inLittleEndianOrder(buffer)
var s: Long = initSeed(seed)
val secret1 = PRIME64_1
val secret2 = PRIME64_2
val secret3 = PRIME64_3
var a: Long = 0L
var b: Long = 0L
if len <= 16 then
if len >= 4 then
a = (readIntLE(buf, off).toLong << 32)
| (readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL)
b = (readIntLE(buf, off + len - 4).toLong << 32)
| (readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL)
else if len > 0 then
a = wyr3(buf, off, len)
b = 0
else
a = 0
b = 0
else
var i = len
var p = off
var see0 = s
var see1 = s
var see2 = s
while i > 48 do
see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0)
see1 = mix(readLongLE(buf, p + 16) ^ secret2, readLongLE(buf, p + 24) ^ see1)
see2 = mix(readLongLE(buf, p + 32) ^ secret3, readLongLE(buf, p + 40) ^ see2)
p += 48
i -= 48
end while
see0 ^= see1 ^ see2
while i > 16 do
see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0)
i -= 16
p += 16
end while
a = readLongLE(buf, off + len - 16)
b = readLongLE(buf, off + len - 8)
s = see0
end if
finishHash(a, b, s, len)
end if
end hash
end WyHash64VarHandle

View File

@ -0,0 +1,17 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object WyHashConstants:
final val PRIME64_0 = 0xa0761d6478bd642fL
final val PRIME64_1 = 0xe7037ed1a0b428dbL
final val PRIME64_2 = 0x8ebc6af09c88c6e3L
final val PRIME64_3 = 0x589965cc75374cc3L
end WyHashConstants

View File

@ -0,0 +1,56 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.nio.ByteBuffer
import verify.BasicTestSuite
abstract class AbstractHashTest extends BasicTestSuite:
def hash64: HashAlgo
def newStreaming(seed: Int): StreamingHashAlgo
def emptyHash: Long
def zeroHash: Long
test("Hash empty array"):
val buf: Array[Byte] = Array[Byte](0)
val r = hash64.hash(buf, 0, 0, 0)
assert(r == emptyHash)
test("Hash empty ByteBuffer"):
val buf: ByteBuffer = ByteBuffer.allocate(0)
val r = hash64.hash(buf, 0, 0, 0)
assert(r == emptyHash)
test("Hash one byte array"):
val buf: Array[Byte] = Array[Byte](0)
val r = hash64.hash(buf, 0, 1, 0)
assert(r == zeroHash)
test("Hash one byte ByteBuffer"):
val buf: ByteBuffer = ByteBuffer.allocate(1)
buf.put(0: Byte)
buf.rewind()
val r = hash64.hash(buf, 0, 1, 0)
assert(r == zeroHash)
test("Streaming hash empty ByteBuffer"):
val hash = newStreaming(0)
try
assert(hash.getValue == emptyHash)
finally hash.close()
test("Streaming one byte array"):
val hash = newStreaming(0)
try
val buf: Array[Byte] = Array[Byte](0)
hash.update(buf, 0, 1)
assert(hash.getValue == zeroHash)
finally hash.close()
end AbstractHashTest

View File

@ -0,0 +1,18 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object WyHashTest extends AbstractHashTest:
override val hash64: HashAlgo = Hashing.wyhash64
override def newStreaming(seed: Int): StreamingHashAlgo =
Hashing.newStreamingWyHash64(seed)
override val emptyHash = 290873116282709081L
override val zeroHash = -295637713410278011L
end WyHashTest

View File

@ -9,47 +9,10 @@
package sbt.internal.util.hashing
import java.nio.ByteBuffer
import verify.BasicTestSuite
object XXHashTest extends BasicTestSuite:
val hash64: HashAlgo = Hashing.xxhash64
final val emptyHash = -1205034819632174695L
final val zeroHash = -1642502924627794072L
test("Hash empty array"):
val buf: Array[Byte] = Array[Byte](0)
val r = hash64.hash(buf, 0, 0, 0)
assert(r == emptyHash)
test("Hash empty ByteBuffer"):
val buf: ByteBuffer = ByteBuffer.allocate(0)
val r = hash64.hash(buf, 0, 0, 0)
assert(r == emptyHash)
test("Hash one byte array"):
val buf: Array[Byte] = Array[Byte](0)
val r = hash64.hash(buf, 0, 1, 0)
assert(r == zeroHash)
test("Hash one byte ByteBuffer"):
val buf: ByteBuffer = ByteBuffer.allocate(1)
buf.put(0: Byte)
buf.rewind()
val r = hash64.hash(buf, 0, 1, 0)
assert(r == zeroHash)
test("Streaming hash empty ByteBuffer"):
val hash = Hashing.newStreamingXXHash64(0)
try
assert(hash.getValue == emptyHash)
finally hash.close()
test("Streaming one byte array"):
val hash = Hashing.newStreamingXXHash64(0)
try
val buf: Array[Byte] = Array[Byte](0)
hash.update(buf, 0, 1)
assert(hash.getValue == zeroHash)
finally hash.close()
object XXHashTest extends AbstractHashTest:
override val hash64: HashAlgo = Hashing.xxhash64
override def newStreaming(seed: Int): StreamingHashAlgo =
Hashing.newStreamingXXHash64(seed)
override val emptyHash = -1205034819632174695L
override val zeroHash = -1642502924627794072L
end XXHashTest