[2.x] Port xxHash64 to Scala

This commit is contained in:
Eugene Yokota 2026-05-24 02:58:14 -04:00
parent 5e4d2744a1
commit c56a2621c9
12 changed files with 736 additions and 0 deletions

View File

@ -295,6 +295,11 @@ lazy val utilInterface = (project in file("internal") / "util-interface").settin
lazy val utilControl = (project in file("internal") / "util-control").settings(
utilCommonSettings,
name := "Util Control",
libraryDependencies ++= Seq(
scalacheck % Test,
scalaVerify % Test,
hedgehog % Test,
),
mimaSettings,
)

View File

@ -0,0 +1,33 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import XXHashConstants.PRIME64_1
import XXHashConstants.PRIME64_2
abstract class AbstractStreamingXXHash64Scala(seed: Long) extends StreamingHashAlgo(seed):
protected var memSize: Int = 0
protected var v1: Long = 0
protected var v2: Long = 0
protected var v3: Long = 0
protected var v4: Long = 0
protected var totalLen: Long = 0
protected val memory = new Array[Byte](32)
reset()
override def reset(): Unit =
v1 = seed + PRIME64_1 + PRIME64_2
v2 = seed + PRIME64_2
v3 = seed + 0
v4 = seed - PRIME64_1
totalLen = 0
memSize = 0
end AbstractStreamingXXHash64Scala

View File

@ -0,0 +1,29 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.nio.{ ByteBuffer, ByteOrder }
object ByteBufferUtils:
def checkRange(buf: ByteBuffer, off: Int): Unit =
if off < 0 || off >= buf.capacity() then throw new ArrayIndexOutOfBoundsException(off)
else ()
def checkRange(buf: ByteBuffer, off: Int, len: Int): Unit =
SafeUtils.checkLength(len)
if len > 0 then
checkRange(buf, off)
checkRange(buf, off + len - 1)
else ()
def inLittleEndianOrder(buf: ByteBuffer): ByteBuffer =
if buf.order() == ByteOrder.LITTLE_ENDIAN then buf
else buf.duplicate().order(ByteOrder.LITTLE_ENDIAN)
end ByteBufferUtils

View File

@ -0,0 +1,60 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.nio.ByteBuffer
/**
* Hash algorithm interface
*/
trait HashAlgo:
/**
* Computes the 64-bits hash of buf[off:off+len] using the seed.
*
* @param buf the input data
* @param off the start offset in buf
* @param len the number of bytes to hash
* @param seed the seed to use
* @return the hash value
*/
def hash(buf: Array[Byte], off: Int, len: Int, seed: Long): Long
/**
* Computes the hash of the given slice of the ByteBuffer.
* ByteBuffer#position() position and ByteBuffer#limit() limit
* are not modified.
*
* @param buf the input data
* @param off the start offset in buf
* @param len the number of bytes to hash
* @param seed the seed to use
* @return the hash value
*/
def hash(buf: ByteBuffer, off: Int, len: Int, seed: Long): Long
/**
* Computes the hash of the given ByteBuffer. The
* ByteBuffer#position() position is moved in order to reflect bytes
* which have been read.
*
* @param buf the input data
* @param seed the seed to use
* @return the hash value
*/
def hash(buf: ByteBuffer, seed: Long): Long =
val r = hash(buf, buf.position(), buf.remaining(), seed)
buf.position(buf.limit())
r
override def toString(): String =
getClass().getSimpleName()
end HashAlgo

View File

@ -0,0 +1,17 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object Hashing:
def xxhash64: HashAlgo = XXHash64VarHandle.INSTANCE
def newStreamingXXHash64(seed: Long): StreamingHashAlgo =
new StreamingXXHash64VarHandle(seed)
end Hashing

View File

@ -0,0 +1,27 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object SafeUtils:
def checkRange(buf: Array[Byte], off: Int): Unit =
if off < 0 || off >= buf.length then throw new ArrayIndexOutOfBoundsException(off)
else ()
def checkRange(buf: Array[Byte], off: Int, len: Int): Unit =
checkLength(len)
if len > 0 then
checkRange(buf, off)
checkRange(buf, off + len - 1)
else ()
def checkLength(len: Int): Unit =
if len < 0 then throw new IllegalArgumentException("lengths must be >= 0")
else ()
end SafeUtils

View File

@ -0,0 +1,55 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.io.Closeable
/**
* Streaming interface for hashing.
* The implementation is based on lz4-java.
* Copyright 2020 Linnaea Von Lavia and the lz4-java contributors.
* Licensed under the Apache License.
*
* Instances of this class are **not** thread-safe.
*/
abstract class StreamingHashAlgo(val seed: Long) extends Closeable:
/**
* Returns the value of the checksum.
*
* @return the checksum
*/
def getValue: Long
/**
* Updates the value of the hash with buf[off:off+len].
*
* @param buf the input data
* @param off the start offset in buf
* @param len the number of bytes to hash
*/
def update(buf: Array[Byte], off: Int, len: Int): Unit
/**
* Resets this instance to the state it had right after instantiation. The
* seed remains unchanged.
*/
def reset(): Unit
/**
* Releases any system resources associated with this instance.
* It is not mandatory to call this method after using this instance
* because the system resources are released anyway when this instance
* is reclaimed by GC.
*/
override def close(): Unit = ()
override def toString: String =
getClass().getSimpleName() + "(seed=" + seed + ")"
end StreamingHashAlgo

View File

@ -0,0 +1,168 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.lang.Long.rotateLeft
import SafeUtils.checkRange
import VarHandleUtils.*
import XXHashConstants.*
/**
* The implementation is based on lz4-java.
* Copyright 2020 Linnaea Von Lavia and the lz4-java contributors.
* Licensed under the Apache License.
*
* Streaming xxhash.
*/
class StreamingXXHash64VarHandle(seed: Long) extends AbstractStreamingXXHash64Scala(seed):
override def getValue: Long =
var h64: Long = 0L
if totalLen >= 32 then
var v1: Long = this.v1
var v2: Long = this.v2
var v3: Long = this.v3
var v4: Long = this.v4
h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18);
v1 *= PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1; h64 ^= v1
h64 = h64 * PRIME64_1 + PRIME64_4
v2 *= PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
h64 ^= v2
h64 = h64 * PRIME64_1 + PRIME64_4;
v3 *= PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
h64 ^= v3
h64 = h64 * PRIME64_1 + PRIME64_4
v4 *= PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
h64 ^= v4
h64 = h64 * PRIME64_1 + PRIME64_4
else h64 = seed + PRIME64_5
h64 += totalLen
var off: Int = 0
while off <= memSize - 8 do
var k1: Long = readLongLE(memory, off)
k1 *= PRIME64_2
k1 = rotateLeft(k1, 31)
k1 *= PRIME64_1
h64 ^= k1
h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4
off += 8
if off <= memSize - 4 then
h64 ^= (readIntLE(memory, off) & 0xffffffffL) * PRIME64_1
h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3
off += 4
else ()
while off < memSize do
h64 ^= (memory(off) & 0xff) * PRIME64_5
h64 = rotateLeft(h64, 11) * PRIME64_1
off += 1
h64 ^= h64 >>> 33
h64 *= PRIME64_2
h64 ^= h64 >>> 29
h64 *= PRIME64_3
h64 ^= h64 >>> 32
h64
end getValue
override def update(buf: Array[Byte], offset: Int, len: Int): Unit =
var off = offset
checkRange(buf, off, len)
totalLen += len
if memSize + len < 32 then // fill in tmp buffer
System.arraycopy(buf, off, memory, memSize, len)
memSize += len
else
val end: Int = off + len
if memSize > 0 then // data left from previous update
System.arraycopy(buf, off, memory, memSize, 32 - memSize)
v1 += readLongLE(memory, 0) * PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
v2 += readLongLE(memory, 8) * PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
v3 += readLongLE(memory, 16) * PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
v4 += readLongLE(memory, 24) * PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
off += 32 - memSize
memSize = 0
else ()
{
val limit: Int = end - 32
var v1: Long = this.v1
var v2: Long = this.v2
var v3: Long = this.v3
var v4: Long = this.v4
while off <= limit do
v1 += readLongLE(buf, off) * PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
off += 8
v2 += readLongLE(buf, off) * PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
off += 8
v3 += readLongLE(buf, off) * PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
off += 8
v4 += readLongLE(buf, off) * PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
off += 8
this.v1 = v1
this.v2 = v2
this.v3 = v3
this.v4 = v4
}
if off < end then
System.arraycopy(buf, off, memory, 0, end - off)
memSize = end - off
else ()
end if
end update
end StreamingXXHash64VarHandle

View File

@ -0,0 +1,41 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.lang.invoke.{ MethodHandles, VarHandle }
import java.nio.{ ByteBuffer, ByteOrder }
object VarHandleUtils:
private def getArrayClass(c: Class[?]): Class[?] =
java.lang.reflect.Array.newInstance(c, 0).getClass
private val LONG_HANDLE: VarHandle =
MethodHandles.byteArrayViewVarHandle(getArrayClass(classOf[Long]), ByteOrder.LITTLE_ENDIAN)
private val INT_HANDLE: VarHandle =
MethodHandles.byteArrayViewVarHandle(getArrayClass(classOf[Int]), ByteOrder.LITTLE_ENDIAN)
private val BB_LONG_HANDLE: VarHandle =
MethodHandles.byteBufferViewVarHandle(getArrayClass(classOf[Long]), ByteOrder.LITTLE_ENDIAN)
private val BB_INT_HANDLE: VarHandle =
MethodHandles.byteBufferViewVarHandle(getArrayClass(classOf[Int]), ByteOrder.LITTLE_ENDIAN)
inline def readByte(buf: Array[Byte], off: Int): Byte =
buf(off)
inline def readIntLE(buf: Array[Byte], off: Int): Int =
INT_HANDLE.get(buf, off).asInstanceOf[Int]
inline def readLongLE(buf: Array[Byte], off: Int): Long =
LONG_HANDLE.get(buf, off).asInstanceOf[Long]
inline def readByte(buf: ByteBuffer, i: Int): Byte =
buf.get(i)
inline def readIntLE(buf: ByteBuffer, i: Int): Int =
assert(buf.order() == ByteOrder.LITTLE_ENDIAN)
BB_INT_HANDLE.get(buf, i).asInstanceOf[Int]
inline def readLongLE(buf: ByteBuffer, i: Int): Long =
assert(buf.order() == ByteOrder.LITTLE_ENDIAN)
BB_LONG_HANDLE.get(buf, i).asInstanceOf[Long]
end VarHandleUtils

View File

@ -0,0 +1,222 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.lang.Long.rotateLeft
import java.nio.ByteBuffer
import VarHandleUtils.*
import XXHashConstants.*
object XXHash64VarHandle:
private[sbt] val INSTANCE = new XXHash64VarHandle()
end XXHash64VarHandle
/**
* The implementation is based on lz4-java.
* Copyright 2020 Linnaea Von Lavia and the lz4-java contributors.
* Licensed under the Apache License.
*
* Instances of this class are **not** thread-safe.
*/
class XXHash64VarHandle extends HashAlgo:
override def hash(buf: Array[Byte], offset: Int, len: Int, seed: Long): Long =
SafeUtils.checkRange(buf, offset, len)
var off = offset
val end: Int = off + len
var h64: Long = 0L
if len >= 32 then
val limit = end - 32
var v1: Long = seed + PRIME64_1 + PRIME64_2
var v2: Long = seed + PRIME64_2
var v3: Long = seed + 0
var v4: Long = seed - PRIME64_1
while
v1 += readLongLE(buf, off) * PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
off += 8
v2 += readLongLE(buf, off) * PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
off += 8
v3 += readLongLE(buf, off) * PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
off += 8
v4 += readLongLE(buf, off) * PRIME64_2
v4 = rotateLeft(v4, 31)
v4 = v4 * PRIME64_1
off += 8
off <= limit
do ()
h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18)
v1 *= PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
h64 ^= v1
h64 = h64 * PRIME64_1 + PRIME64_4
v2 *= PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
h64 ^= v2
h64 = h64 * PRIME64_1 + PRIME64_4
v3 *= PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
h64 ^= v3
h64 = h64 * PRIME64_1 + PRIME64_4
v4 *= PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
h64 ^= v4
h64 = h64 * PRIME64_1 + PRIME64_4
else h64 = seed + PRIME64_5
h64 += len
while off <= end - 8 do
var k1: Long = readLongLE(buf, off)
k1 *= PRIME64_2
k1 = rotateLeft(k1, 31)
k1 *= PRIME64_1
h64 ^= k1
h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4
off += 8
if off <= end - 4 then
h64 ^= (readIntLE(buf, off) & 0xffffffffL) * PRIME64_1
h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3
off += 4
else ()
while off < end do
h64 ^= (readByte(buf, off) & 0xff) * PRIME64_5
h64 = rotateLeft(h64, 11) * PRIME64_1
off += 1
h64 ^= (h64 >>> 33)
h64 *= PRIME64_2
h64 ^= (h64 >>> 29)
h64 *= PRIME64_3
h64 ^= (h64 >>> 32)
h64
end hash
override def hash(buffer: ByteBuffer, offset: Int, len: Int, seed: Long): Long =
if buffer.hasArray() then hash(buffer.array(), offset + buffer.arrayOffset(), len, seed)
else
var off = offset
ByteBufferUtils.checkRange(buffer, off, len)
val buf = ByteBufferUtils.inLittleEndianOrder(buffer)
val end: Int = off + len
var h64: Long = 0L
if len >= 32 then
val limit: Int = end - 32
var v1: Long = seed + PRIME64_1 + PRIME64_2
var v2: Long = seed + PRIME64_2
var v3: Long = seed + 0
var v4: Long = seed - PRIME64_1
while
v1 = v1 + readLongLE(buf, off) * PRIME64_2
v1 = rotateLeft(v1, 31)
v1 = v1 * PRIME64_1
off = off + 8
v2 += readLongLE(buf, off) * PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
off = off + 8
v3 += readLongLE(buf, off) * PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
off = off + 8
v4 += readLongLE(buf, off) * PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
off = off + 8
off <= limit
do ()
h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18)
v1 *= PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
h64 ^= v1
h64 = h64 * PRIME64_1 + PRIME64_4
v2 *= PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
h64 ^= v2
h64 = h64 * PRIME64_1 + PRIME64_4
v3 *= PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
h64 ^= v3
h64 = h64 * PRIME64_1 + PRIME64_4
v4 *= PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
h64 ^= v4
h64 = h64 * PRIME64_1 + PRIME64_4
else h64 = seed + PRIME64_5
h64 += len
while off <= end - 8 do
var k1: Long = readLongLE(buf, off)
k1 *= PRIME64_2
k1 = rotateLeft(k1, 31)
k1 *= PRIME64_1
h64 ^= k1
h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4
off = off + 8
if off <= end - 4 then
h64 ^= (readIntLE(buf, off) & 0xffffffffL) * PRIME64_1
h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3
off = off + 4
else ()
while off < end do
h64 ^= (readByte(buf, off) & 0xff) * PRIME64_5
h64 = rotateLeft(h64, 11) * PRIME64_1
off += 1
h64 ^= h64 >>> 33
h64 *= PRIME64_2
h64 ^= h64 >>> 29
h64 *= PRIME64_3
h64 ^= h64 >>> 32
h64
end if
end hash
end XXHash64VarHandle

View File

@ -0,0 +1,24 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object XXHashConstants:
final val PRIME1 = -1640531535
final val PRIME2 = -2048144777
final val PRIME3 = -1028477379
final val PRIME4 = 668265263
final val PRIME5 = 374761393
final val PRIME64_1 = -7046029288634856825L // 11400714785074694791
final val PRIME64_2 = -4417276706812531889L // 14029467366897019727
final val PRIME64_3 = 1609587929392839161L
final val PRIME64_4 = -8796714831421723037L // 9650029242287828579
final val PRIME64_5 = 2870177450012600261L
end XXHashConstants

View File

@ -0,0 +1,55 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.nio.ByteBuffer
import verify.BasicTestSuite
object XXHashTest extends BasicTestSuite:
val hash64: HashAlgo = Hashing.xxhash64
final val emptyHash = -1205034819632174695L
final val zeroHash = -1642502924627794072L
test("Hash empty array"):
val buf: Array[Byte] = Array[Byte](0)
val r = hash64.hash(buf, 0, 0, 0)
assert(r == emptyHash)
test("Hash empty ByteBuffer"):
val buf: ByteBuffer = ByteBuffer.allocate(0)
val r = hash64.hash(buf, 0, 0, 0)
assert(r == emptyHash)
test("Hash one byte array"):
val buf: Array[Byte] = Array[Byte](0)
val r = hash64.hash(buf, 0, 1, 0)
assert(r == zeroHash)
test("Hash one byte ByteBuffer"):
val buf: ByteBuffer = ByteBuffer.allocate(1)
buf.put(0: Byte)
buf.rewind()
val r = hash64.hash(buf, 0, 1, 0)
assert(r == zeroHash)
test("Streaming hash empty ByteBuffer"):
val hash = Hashing.newStreamingXXHash64(0)
try
assert(hash.getValue == emptyHash)
finally hash.close()
test("Streaming one byte array"):
val hash = Hashing.newStreamingXXHash64(0)
try
val buf: Array[Byte] = Array[Byte](0)
hash.update(buf, 0, 1)
assert(hash.getValue == zeroHash)
finally hash.close()
end XXHashTest