[2.0.x] Migrate FarmHash usage to xxhash64 (#9267)

Problem
ZAHa, which we use for FarmHash, uses Unsafe.

Solution
This ports xxhash64 to Scala.
This commit is contained in:
eugene yokota 2026-05-30 19:14:53 -04:00 committed by Eugene Yokota
parent ae1192109d
commit 27f09caba5
32 changed files with 1488 additions and 36 deletions

View File

@ -56,6 +56,10 @@ jobs:
java: 17
distribution: zulu
jobtype: 12
# - os: ubuntu-latest
# java: 17
# distribution: temurin
# jobtype: 13
runs-on: ${{ matrix.os }}
timeout-minutes: 25
env:
@ -200,4 +204,9 @@ jobs:
if: ${{ matrix.jobtype == 12 }}
shell: bash
run: |
./sbt -v "scripted cache/*"
./sbt -v "scripted cache/*"
# - name: Hash Benchmark
# if: ${{ matrix.jobtype == 13 }}
# shell: bash
# run: |
# ./sbt -v "hashBenchmark/Jmh/run -i 5 -wi 3 -f1 -t1"

View File

@ -290,11 +290,18 @@ lazy val utilInterface = (project in file("internal") / "util-interface").settin
mimaSettings,
)
lazy val utilControl = (project in file("internal") / "util-control").settings(
utilCommonSettings,
name := "Util Control",
mimaSettings,
)
lazy val utilControl = (project in file("internal") / "util-control")
.settings(
utilCommonSettings,
name := "Util Control",
libraryDependencies ++= Seq(
scalacheck % Test,
scalaVerify % Test,
hedgehog % Test,
),
mimaSettings,
)
.configure(addSbtIOForTest)
lazy val utilPosition = (project in file("internal") / "util-position")
.settings(
@ -379,7 +386,10 @@ lazy val utilCache = project
contrabandSettings,
mimaSettings,
mimaBinaryIssueFilters ++= Seq(
exclude[ReversedMissingMethodProblem]("sbt.util.CacheImplicits.sbt$util*")
exclude[ReversedMissingMethodProblem]("sbt.util.CacheImplicits.sbt$util*"),
exclude[DirectMissingMethodProblem]("sbt.util.HashUtil.farmHash"),
exclude[DirectMissingMethodProblem]("sbt.util.HashUtil.farmHashStr"),
exclude[DirectMissingMethodProblem]("sbt.util.HashUtil.toFarmHashString"),
),
Test / fork := true,
)
@ -388,6 +398,18 @@ lazy val utilCache = project
addSbtCompilerInterface,
)
lazy val hashBenchmark = (project in file("internal") / "hash-benchmark")
.dependsOn(utilControl, utilCache)
.enablePlugins(JmhPlugin)
.settings(
utilCommonSettings,
name := "Hash Benchmark",
Jmh / run / javaOptions ++= Seq("-Xmx1G", "-Dfile.encoding=UTF8"),
libraryDependencies += blake3,
mimaSettings,
publish / skip := true,
)
// Builds on cache to provide caching for filesystem-related operations
lazy val utilTracking = (project in file("util-tracking"))
.dependsOn(utilCache)

View File

@ -0,0 +1,60 @@
package sbt.internal.util
import java.util.concurrent.TimeUnit
import java.nio.file.{ Files, Path as NioPath }
import sbt.io.IO
import sbt.io.syntax.*
import sbt.util.Digest
import scala.util.Using
import org.openjdk.jmh.annotations.*
import pt.kcry.blake3.{ Blake3 as Blake3Impl }
@State(Scope.Benchmark)
abstract class AbstractFileHashBenchmark:
val tempDir = IO.createTemporaryDirectory
val temp = tempDir / "test.txt"
val buf: Array[Byte] = Array.fill[Byte](1024)(0.toByte)
for i <- 0 until 1024 do IO.append(temp, buf)
def hash(path: NioPath): String
@Benchmark
@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MICROSECONDS)
def hashFile: Unit =
hash(temp.toPath())
end AbstractFileHashBenchmark
class XXHash64FileHashBenchmark extends AbstractFileHashBenchmark:
override def hash(path: NioPath): String =
Digest.xx64Hash(path).toString
class WyHash64FileHashBenchmark extends AbstractFileHashBenchmark:
override def hash(path: NioPath): String =
Digest.wy64Hash(path).toString
class ImoXXHash64FileHashBenchmark extends AbstractFileHashBenchmark:
override def hash(path: NioPath): String =
Digest.imoxx64Hash(path).toString
class ImoWyHash64FileHashBenchmark extends AbstractFileHashBenchmark:
override def hash(path: NioPath): String =
Digest.imowy64Hash(path).toString
class Sha1FileHashBenchmark extends AbstractFileHashBenchmark:
override def hash(path: NioPath): String =
Digest.sha1Hash(path).toString
class Sha256FileHashBenchmark extends AbstractFileHashBenchmark:
override def hash(path: NioPath): String =
Digest.sha256Hash(path).toString
class Blake3FileHashBenchmark extends AbstractFileHashBenchmark:
override def hash(path: NioPath): String =
Using.resource(Files.newInputStream(path)) { input =>
val digest = Blake3Impl.newHasher()
digest.update(input)
val h = digest.doneHex(64)
s"blake3-$h/${Files.size(path)}"
}

View File

@ -0,0 +1,65 @@
package sbt.internal.util
import java.util.concurrent.{ ThreadLocalRandom, TimeUnit }
import net.openhft.hashing.LongHashFunction
import org.openjdk.jmh.annotations.*
import pt.kcry.blake3.Blake3
import sbt.util.Digest
import sbt.internal.util.hashing.Hashing
import scala.util.hashing.MurmurHash3
@State(Scope.Benchmark)
abstract class AbstractHashBenchmark:
def hash(buf: Array[Byte]): String
val buf: Array[Byte] = new Array[Byte](2048)
ThreadLocalRandom.current().nextBytes(buf)
@Benchmark
@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MICROSECONDS)
def hashByteArray: Unit =
hash(buf)
end AbstractHashBenchmark
class XXHash64HashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
val h = Hashing.xxhash64
val hash = h.hash(buf, 0, buf.size, 0)
java.lang.Long.toHexString(hash)
class WyHash64HashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
val h = Hashing.wyhash64
val hash = h.hash(buf, 0, buf.size, 0)
java.lang.Long.toHexString(hash)
class FarmHashHashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
val hash = LongHashFunction.farmNa().hashBytes(buf)
java.lang.Long.toHexString(hash)
class MurmurHash32HashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
val lo = MurmurHash3.bytesHash(buf, 0x85ebca6b)
val hash = lo.toLong & 0xffffffffL
java.lang.Long.toHexString(hash)
class MurmurHash64HashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
val hi = MurmurHash3.bytesHash(buf, 0x9747b28c)
val lo = MurmurHash3.bytesHash(buf, 0x85ebca6b)
val hash = (hi.toLong << 32) | (lo.toLong & 0xffffffffL)
java.lang.Long.toHexString(hash)
class Md5HashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
Digest.md5Hash(buf).toString
class Sha256HashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
Digest.sha256Hash(buf).toString
class Blake3HashBenchmark extends AbstractHashBenchmark:
override def hash(buf: Array[Byte]): String =
Blake3.hex(buf, 64)

View File

@ -0,0 +1,33 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import XXHashConstants.PRIME64_1
import XXHashConstants.PRIME64_2
abstract class AbstractStreamingXXHash64Scala(seed: Long) extends StreamingHashAlgo(seed):
protected var memSize: Int = 0
protected var v1: Long = 0
protected var v2: Long = 0
protected var v3: Long = 0
protected var v4: Long = 0
protected var totalLen: Long = 0
protected val memory = new Array[Byte](32)
reset()
override def reset(): Unit =
v1 = seed + PRIME64_1 + PRIME64_2
v2 = seed + PRIME64_2
v3 = seed + 0
v4 = seed - PRIME64_1
totalLen = 0
memSize = 0
end AbstractStreamingXXHash64Scala

View File

@ -0,0 +1,29 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.nio.{ ByteBuffer, ByteOrder }
object ByteBufferUtils:
def checkRange(buf: ByteBuffer, off: Int): Unit =
if off < 0 || off >= buf.capacity() then throw new ArrayIndexOutOfBoundsException(off)
else ()
def checkRange(buf: ByteBuffer, off: Int, len: Int): Unit =
SafeUtils.checkLength(len)
if len > 0 then
checkRange(buf, off)
checkRange(buf, off + len - 1)
else ()
def inLittleEndianOrder(buf: ByteBuffer): ByteBuffer =
if buf.order() == ByteOrder.LITTLE_ENDIAN then buf
else buf.duplicate().order(ByteOrder.LITTLE_ENDIAN)
end ByteBufferUtils

View File

@ -0,0 +1,20 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.io.File
import java.nio.file.{ Path as NioPath }
trait FileHash:
def hash(file: File): Long
def hash(file: NioPath): Long
override def toString(): String =
getClass().getSimpleName()
end FileHash

View File

@ -0,0 +1,75 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.io.{ File, RandomAccessFile }
import java.nio.ByteBuffer
import java.nio.file.{ Path as NioPath }
import scala.util.Using
object FileSampleHash:
final val defaultSampleBytes = 16 * 1024
final val defaultThresoldBytes = 128L * 1024L
def apply(underlying: StreamingHashAlgo): FileSampleHash =
new FileSampleHash(defaultSampleBytes, defaultThresoldBytes, underlying)
end FileSampleHash
/**
* Based on Imohash https://github.com/kalafut/imohash/blob/master/algorithm.md
*/
class FileSampleHash(sampleBytes: Int, thresholdBytes: Long, underlying: StreamingHashAlgo)
extends FileHash:
require(sampleBytes >= 0)
val buffer: Array[Byte] = new Array[Byte](4096)
override def hash(file: NioPath): Long =
hash(file.toFile())
override def hash(file: File): Long =
Using.resource(new RandomAccessFile(file, "r")): raf =>
hash(raf, raf.length())
private def hash(input: RandomAccessFile, fileLength: Long): Long =
underlying.reset()
if fileLength < thresholdBytes || sampleBytes < 1 then hashBytes(input, fileLength)
else
hashBytes(input, sampleBytes)
// skip to halfway point
input.seek(fileLength / 2)
hashBytes(input, sampleBytes)
input.seek(fileLength - sampleBytes)
hashBytes(input, sampleBytes)
// write file size
if fileLength > 0 then
val sizeBuf = ByteBuffer.allocate(java.lang.Long.BYTES)
sizeBuf.putLong(fileLength)
underlying.update(sizeBuf.array(), 0, sizeBuf.array().size)
underlying.getValue
end hash
private def hashBytes(input: RandomAccessFile, toHash: Long): Unit =
var remaining: Long = toHash
var pos = 0
while remaining > 0 do
val toread = math.min(buffer.size - pos, remaining).toInt
val bytesRead = input.read(buffer, pos, toread)
if bytesRead < 0 then sys.error("unexpected EOF")
pos += bytesRead
remaining -= bytesRead
if pos >= buffer.length then
underlying.update(buffer, 0, buffer.length)
pos = 0
if pos > 0 then underlying.update(buffer, 0, pos)
end hashBytes
end FileSampleHash

View File

@ -0,0 +1,60 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.nio.ByteBuffer
/**
* Hash algorithm interface
*/
trait HashAlgo:
/**
* Computes the 64-bits hash of buf[off:off+len] using the seed.
*
* @param buf the input data
* @param off the start offset in buf
* @param len the number of bytes to hash
* @param seed the seed to use
* @return the hash value
*/
def hash(buf: Array[Byte], off: Int, len: Int, seed: Long): Long
/**
* Computes the hash of the given slice of the ByteBuffer.
* ByteBuffer#position() position and ByteBuffer#limit() limit
* are not modified.
*
* @param buf the input data
* @param off the start offset in buf
* @param len the number of bytes to hash
* @param seed the seed to use
* @return the hash value
*/
def hash(buf: ByteBuffer, off: Int, len: Int, seed: Long): Long
/**
* Computes the hash of the given ByteBuffer. The
* ByteBuffer#position() position is moved in order to reflect bytes
* which have been read.
*
* @param buf the input data
* @param seed the seed to use
* @return the hash value
*/
def hash(buf: ByteBuffer, seed: Long): Long =
val r = hash(buf, buf.position(), buf.remaining(), seed)
buf.position(buf.limit())
r
override def toString(): String =
getClass().getSimpleName()
end HashAlgo

View File

@ -0,0 +1,24 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object Hashing:
def xxhash64: HashAlgo = XXHash64VarHandle.INSTANCE
def wyhash64: HashAlgo = WyHash64VarHandle.INSTANCE
def newStreamingXXHash64(seed: Long): StreamingHashAlgo =
new StreamingXXHash64VarHandle(seed)
def newStreamingWyHash64(seed: Long): StreamingHashAlgo =
new StreamingWyHash64VarHandle(seed)
def samplingFileHashXXHash64(seed: Long): FileHash =
FileSampleHash(newStreamingXXHash64(seed))
def samplingFileHashWyHash64(seed: Long): FileHash =
FileSampleHash(newStreamingWyHash64(seed))
end Hashing

View File

@ -0,0 +1,27 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object SafeUtils:
def checkRange(buf: Array[Byte], off: Int): Unit =
if off < 0 || off >= buf.length then throw new ArrayIndexOutOfBoundsException(off)
else ()
def checkRange(buf: Array[Byte], off: Int, len: Int): Unit =
checkLength(len)
if len > 0 then
checkRange(buf, off)
checkRange(buf, off + len - 1)
else ()
def checkLength(len: Int): Unit =
if len < 0 then throw new IllegalArgumentException("lengths must be >= 0")
else ()
end SafeUtils

View File

@ -0,0 +1,55 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.io.Closeable
/**
* Streaming interface for hashing.
* The implementation is based on lz4-java.
* Copyright 2020 Linnaea Von Lavia and the lz4-java contributors.
* Licensed under the Apache License.
*
* Instances of this class are **not** thread-safe.
*/
abstract class StreamingHashAlgo(val seed: Long) extends Closeable:
/**
* Returns the value of the checksum.
*
* @return the checksum
*/
def getValue: Long
/**
* Updates the value of the hash with buf[off:off+len].
*
* @param buf the input data
* @param off the start offset in buf
* @param len the number of bytes to hash
*/
def update(buf: Array[Byte], off: Int, len: Int): Unit
/**
* Resets this instance to the state it had right after instantiation. The
* seed remains unchanged.
*/
def reset(): Unit
/**
* Releases any system resources associated with this instance.
* It is not mandatory to call this method after using this instance
* because the system resources are released anyway when this instance
* is reclaimed by GC.
*/
override def close(): Unit = ()
override def toString: String =
getClass().getSimpleName() + "(seed=" + seed + ")"
end StreamingHashAlgo

View File

@ -0,0 +1,120 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import WyHash64VarHandle.*
import WyHashConstants.*
import VarHandleUtils.*
class StreamingWyHash64VarHandle(seed: Long) extends StreamingHashAlgo(seed):
protected var a: Long = 0
protected var b: Long = 0
protected val state: Array[Long] = new Array[Long](3)
protected var v0: Long = 0
protected var v1: Long = 0
protected var v2: Long = 0
protected var totalLen: Long = 0L
protected val memory = new Array[Byte](48)
protected var memoryLen: Int = 0
reset()
override def reset(): Unit =
val s: Long = initSeed(seed)
this.v0 = s
this.v1 = s
this.v2 = s
this.totalLen = 0
this.memoryLen = 0
def getValue: Long =
var _a: Long = this.a
var _b: Long = this.b
var v0: Long = this.v0
var v1: Long = this.v1
var v2: Long = this.v2
var input = this.memory
var inputLen = this.memoryLen
if this.totalLen <= 16 then
if inputLen >= 4 then
val end = inputLen - 4
val quarter = (inputLen >> 3) << 2
_a = (readIntLE(input, 0).toLong << 32)
| (readIntLE(input, quarter) & 0xffffffffL)
_b = (readIntLE(input, end) << 32).toLong
| (readIntLE(input, end - quarter) & 0xffffffffL)
else if inputLen > 0 then
_a = ((input(0) & 0xffL) << 16) | ((input(inputLen >> 1) & 0xffL) << 8)
| (input(inputLen - 1) & 0xffL)
_b = 0
else
_a = 0
_b = 0
end if
else
var scratch: Array[Byte] = null
if inputLen < 16 then
val rem = 16 - inputLen
scratch = new Array[Byte](16)
System.arraycopy(memory, 48 - rem, scratch, 0, rem)
System.arraycopy(memory, 0, scratch, rem, inputLen)
input = scratch
inputLen = 16
if this.totalLen >= 48 then v0 ^= v1 ^ v2
var i = 0
while i + 16 < inputLen do
v0 = mix(readLongLE(input, i) ^ PRIME64_1, readLongLE(input, i + 8) ^ v0)
i += 16
_a = readLongLE(input, inputLen - 16)
_b = readLongLE(input, inputLen - 8)
end if
finishHash(_a, _b, v0, this.totalLen)
end getValue
def update(buf: Array[Byte], off: Int, len: Int): Unit =
this.totalLen += len
if len <= 48 - this.memoryLen then
System.arraycopy(buf, off, this.memory, this.memoryLen, len)
this.memoryLen += len
else
var i: Int = 0
if this.memoryLen > 0 then
i = 48 - this.memoryLen
System.arraycopy(buf, off, this.memory, this.memoryLen, i)
round(this.memory, 0)
this.memoryLen = 0
end if
while i + 48 < len do
round(buf, off + i)
i += 48
val remaining = len - i
if remaining < 16 && i >= 48 then
val rem = 16 - remaining
System.arraycopy(buf, off + i - rem, this.memory, 48 - rem, rem)
System.arraycopy(buf, off + i, this.memory, 0, remaining)
this.memoryLen = remaining
end if
end update
private def round(buf: Array[Byte], p: Int): Unit =
this.v0 = mix(readLongLE(buf, p) ^ PRIME64_1, readLongLE(buf, p + 8) ^ this.v0)
this.v1 = mix(readLongLE(buf, p + 16) ^ PRIME64_2, readLongLE(buf, p + 24) ^ this.v1)
this.v2 = mix(readLongLE(buf, p + 32) ^ PRIME64_3, readLongLE(buf, p + 40) ^ this.v2)
end StreamingWyHash64VarHandle

View File

@ -0,0 +1,168 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.lang.Long.rotateLeft
import SafeUtils.checkRange
import VarHandleUtils.*
import XXHashConstants.*
/**
* The implementation is based on lz4-java.
* Copyright 2020 Linnaea Von Lavia and the lz4-java contributors.
* Licensed under the Apache License.
*
* Streaming xxhash.
*/
class StreamingXXHash64VarHandle(seed: Long) extends AbstractStreamingXXHash64Scala(seed):
override def getValue: Long =
var h64: Long = 0L
if totalLen >= 32 then
var v1: Long = this.v1
var v2: Long = this.v2
var v3: Long = this.v3
var v4: Long = this.v4
h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18);
v1 *= PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1; h64 ^= v1
h64 = h64 * PRIME64_1 + PRIME64_4
v2 *= PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
h64 ^= v2
h64 = h64 * PRIME64_1 + PRIME64_4;
v3 *= PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
h64 ^= v3
h64 = h64 * PRIME64_1 + PRIME64_4
v4 *= PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
h64 ^= v4
h64 = h64 * PRIME64_1 + PRIME64_4
else h64 = seed + PRIME64_5
h64 += totalLen
var off: Int = 0
while off <= memSize - 8 do
var k1: Long = readLongLE(memory, off)
k1 *= PRIME64_2
k1 = rotateLeft(k1, 31)
k1 *= PRIME64_1
h64 ^= k1
h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4
off += 8
if off <= memSize - 4 then
h64 ^= (readIntLE(memory, off) & 0xffffffffL) * PRIME64_1
h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3
off += 4
else ()
while off < memSize do
h64 ^= (memory(off) & 0xff) * PRIME64_5
h64 = rotateLeft(h64, 11) * PRIME64_1
off += 1
h64 ^= h64 >>> 33
h64 *= PRIME64_2
h64 ^= h64 >>> 29
h64 *= PRIME64_3
h64 ^= h64 >>> 32
h64
end getValue
override def update(buf: Array[Byte], offset: Int, len: Int): Unit =
var off = offset
checkRange(buf, off, len)
totalLen += len
if memSize + len < 32 then // fill in tmp buffer
System.arraycopy(buf, off, memory, memSize, len)
memSize += len
else
val end: Int = off + len
if memSize > 0 then // data left from previous update
System.arraycopy(buf, off, memory, memSize, 32 - memSize)
v1 += readLongLE(memory, 0) * PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
v2 += readLongLE(memory, 8) * PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
v3 += readLongLE(memory, 16) * PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
v4 += readLongLE(memory, 24) * PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
off += 32 - memSize
memSize = 0
else ()
{
val limit: Int = end - 32
var v1: Long = this.v1
var v2: Long = this.v2
var v3: Long = this.v3
var v4: Long = this.v4
while off <= limit do
v1 += readLongLE(buf, off) * PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
off += 8
v2 += readLongLE(buf, off) * PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
off += 8
v3 += readLongLE(buf, off) * PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
off += 8
v4 += readLongLE(buf, off) * PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
off += 8
this.v1 = v1
this.v2 = v2
this.v3 = v3
this.v4 = v4
}
if off < end then
System.arraycopy(buf, off, memory, 0, end - off)
memSize = end - off
else ()
end if
end update
end StreamingXXHash64VarHandle

View File

@ -0,0 +1,41 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.lang.invoke.{ MethodHandles, VarHandle }
import java.nio.{ ByteBuffer, ByteOrder }
object VarHandleUtils:
private def getArrayClass(c: Class[?]): Class[?] =
java.lang.reflect.Array.newInstance(c, 0).getClass
private val LONG_HANDLE: VarHandle =
MethodHandles.byteArrayViewVarHandle(getArrayClass(classOf[Long]), ByteOrder.LITTLE_ENDIAN)
private val INT_HANDLE: VarHandle =
MethodHandles.byteArrayViewVarHandle(getArrayClass(classOf[Int]), ByteOrder.LITTLE_ENDIAN)
private val BB_LONG_HANDLE: VarHandle =
MethodHandles.byteBufferViewVarHandle(getArrayClass(classOf[Long]), ByteOrder.LITTLE_ENDIAN)
private val BB_INT_HANDLE: VarHandle =
MethodHandles.byteBufferViewVarHandle(getArrayClass(classOf[Int]), ByteOrder.LITTLE_ENDIAN)
inline def readByte(buf: Array[Byte], off: Int): Byte =
buf(off)
inline def readIntLE(buf: Array[Byte], off: Int): Int =
INT_HANDLE.get(buf, off).asInstanceOf[Int]
inline def readLongLE(buf: Array[Byte], off: Int): Long =
LONG_HANDLE.get(buf, off).asInstanceOf[Long]
inline def readByte(buf: ByteBuffer, i: Int): Byte =
buf.get(i)
inline def readIntLE(buf: ByteBuffer, i: Int): Int =
assert(buf.order() == ByteOrder.LITTLE_ENDIAN)
BB_INT_HANDLE.get(buf, i).asInstanceOf[Int]
inline def readLongLE(buf: ByteBuffer, i: Int): Long =
assert(buf.order() == ByteOrder.LITTLE_ENDIAN)
BB_LONG_HANDLE.get(buf, i).asInstanceOf[Long]
end VarHandleUtils

View File

@ -0,0 +1,162 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.nio.ByteBuffer
import WyHashConstants.*
import VarHandleUtils.*
object WyHash64VarHandle:
private[hashing] val INSTANCE = new WyHash64VarHandle()
private[hashing] inline def initSeed(seed: Long): Long =
seed ^ mix(seed ^ PRIME64_0, PRIME64_1)
private[hashing] def mix(a: Long, b: Long): Long =
val low = a * b
val high = unsignedMultiplyHigh(a, b)
low ^ high
private[hashing] inline def unsignedMultiplyHigh(a: Long, b: Long): Long =
Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a)
private[hashing] inline def wyr3(buf: Array[Byte], off: Int, k: Int): Long =
((buf(off) & 0xffL) << 16)
| ((buf(off + (k >> 1)) & 0xffL) << 8)
| (buf(off + k - 1) & 0xffL)
private[hashing] inline def wyr3(buf: ByteBuffer, off: Int, k: Int): Long =
((buf.get(off) & 0xffL) << 16)
| ((buf.get(off + (k >> 1)) & 0xffL) << 8)
| (buf.get(off + k - 1) & 0xffL)
private[hashing] inline def finishHash(a: Long, b: Long, seed: Long, len: Long): Long =
val _a = a ^ PRIME64_1
val _b = b ^ seed
val low = _a * _b
val high = unsignedMultiplyHigh(_a, _b)
mix(low ^ PRIME64_0 ^ len, high ^ PRIME64_1)
end WyHash64VarHandle
/**
* Wyhash matching Zig 0.15 std.hash.Wyhash.
*/
class WyHash64VarHandle extends HashAlgo:
import WyHash64VarHandle.*
override def hash(buf: Array[Byte], offset: Int, len: Int, seed: Long): Long =
SafeUtils.checkRange(buf, offset, len)
var off = offset
var s: Long = initSeed(seed)
val secret1 = PRIME64_1
val secret2 = PRIME64_2
val secret3 = PRIME64_3
var a: Long = 0L
var b: Long = 0L
if len <= 16 then
if len >= 4 then
a = (readIntLE(buf, off).toLong << 32)
| (readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL)
b = (readIntLE(buf, off + len - 4).toLong << 32)
| (readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL)
else if len > 0 then
a = wyr3(buf, off, len)
b = 0
else
a = 0
b = 0
else
var i = len
var p = off
var see0 = s
var see1 = s
var see2 = s
while i > 48 do
see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0)
see1 = mix(readLongLE(buf, p + 16) ^ secret2, readLongLE(buf, p + 24) ^ see1)
see2 = mix(readLongLE(buf, p + 32) ^ secret3, readLongLE(buf, p + 40) ^ see2)
p += 48
i -= 48
end while
see0 ^= see1 ^ see2
while i > 16 do
see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0)
i -= 16
p += 16
end while
a = readLongLE(buf, off + len - 16)
b = readLongLE(buf, off + len - 8)
s = see0
end if
finishHash(a, b, s, len)
end hash
override def hash(buffer: ByteBuffer, offset: Int, len: Int, seed: Long): Long =
if buffer.hasArray() then hash(buffer.array(), offset + buffer.arrayOffset(), len, seed)
else
var off = offset
ByteBufferUtils.checkRange(buffer, off, len)
val buf = ByteBufferUtils.inLittleEndianOrder(buffer)
var s: Long = initSeed(seed)
val secret1 = PRIME64_1
val secret2 = PRIME64_2
val secret3 = PRIME64_3
var a: Long = 0L
var b: Long = 0L
if len <= 16 then
if len >= 4 then
a = (readIntLE(buf, off).toLong << 32)
| (readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL)
b = (readIntLE(buf, off + len - 4).toLong << 32)
| (readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL)
else if len > 0 then
a = wyr3(buf, off, len)
b = 0
else
a = 0
b = 0
else
var i = len
var p = off
var see0 = s
var see1 = s
var see2 = s
while i > 48 do
see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0)
see1 = mix(readLongLE(buf, p + 16) ^ secret2, readLongLE(buf, p + 24) ^ see1)
see2 = mix(readLongLE(buf, p + 32) ^ secret3, readLongLE(buf, p + 40) ^ see2)
p += 48
i -= 48
end while
see0 ^= see1 ^ see2
while i > 16 do
see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0)
i -= 16
p += 16
end while
a = readLongLE(buf, off + len - 16)
b = readLongLE(buf, off + len - 8)
s = see0
end if
finishHash(a, b, s, len)
end if
end hash
end WyHash64VarHandle

View File

@ -0,0 +1,17 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object WyHashConstants:
final val PRIME64_0 = 0xa0761d6478bd642fL
final val PRIME64_1 = 0xe7037ed1a0b428dbL
final val PRIME64_2 = 0x8ebc6af09c88c6e3L
final val PRIME64_3 = 0x589965cc75374cc3L
end WyHashConstants

View File

@ -0,0 +1,222 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.lang.Long.rotateLeft
import java.nio.ByteBuffer
import VarHandleUtils.*
import XXHashConstants.*
object XXHash64VarHandle:
private[sbt] val INSTANCE = new XXHash64VarHandle()
end XXHash64VarHandle
/**
* The implementation is based on lz4-java.
* Copyright 2020 Linnaea Von Lavia and the lz4-java contributors.
* Licensed under the Apache License.
*
* Instances of this class are **not** thread-safe.
*/
class XXHash64VarHandle extends HashAlgo:
override def hash(buf: Array[Byte], offset: Int, len: Int, seed: Long): Long =
SafeUtils.checkRange(buf, offset, len)
var off = offset
val end: Int = off + len
var h64: Long = 0L
if len >= 32 then
val limit = end - 32
var v1: Long = seed + PRIME64_1 + PRIME64_2
var v2: Long = seed + PRIME64_2
var v3: Long = seed + 0
var v4: Long = seed - PRIME64_1
while
v1 += readLongLE(buf, off) * PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
off += 8
v2 += readLongLE(buf, off) * PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
off += 8
v3 += readLongLE(buf, off) * PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
off += 8
v4 += readLongLE(buf, off) * PRIME64_2
v4 = rotateLeft(v4, 31)
v4 = v4 * PRIME64_1
off += 8
off <= limit
do ()
h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18)
v1 *= PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
h64 ^= v1
h64 = h64 * PRIME64_1 + PRIME64_4
v2 *= PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
h64 ^= v2
h64 = h64 * PRIME64_1 + PRIME64_4
v3 *= PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
h64 ^= v3
h64 = h64 * PRIME64_1 + PRIME64_4
v4 *= PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
h64 ^= v4
h64 = h64 * PRIME64_1 + PRIME64_4
else h64 = seed + PRIME64_5
h64 += len
while off <= end - 8 do
var k1: Long = readLongLE(buf, off)
k1 *= PRIME64_2
k1 = rotateLeft(k1, 31)
k1 *= PRIME64_1
h64 ^= k1
h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4
off += 8
if off <= end - 4 then
h64 ^= (readIntLE(buf, off) & 0xffffffffL) * PRIME64_1
h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3
off += 4
else ()
while off < end do
h64 ^= (readByte(buf, off) & 0xff) * PRIME64_5
h64 = rotateLeft(h64, 11) * PRIME64_1
off += 1
h64 ^= (h64 >>> 33)
h64 *= PRIME64_2
h64 ^= (h64 >>> 29)
h64 *= PRIME64_3
h64 ^= (h64 >>> 32)
h64
end hash
override def hash(buffer: ByteBuffer, offset: Int, len: Int, seed: Long): Long =
if buffer.hasArray() then hash(buffer.array(), offset + buffer.arrayOffset(), len, seed)
else
var off = offset
ByteBufferUtils.checkRange(buffer, off, len)
val buf = ByteBufferUtils.inLittleEndianOrder(buffer)
val end: Int = off + len
var h64: Long = 0L
if len >= 32 then
val limit: Int = end - 32
var v1: Long = seed + PRIME64_1 + PRIME64_2
var v2: Long = seed + PRIME64_2
var v3: Long = seed + 0
var v4: Long = seed - PRIME64_1
while
v1 = v1 + readLongLE(buf, off) * PRIME64_2
v1 = rotateLeft(v1, 31)
v1 = v1 * PRIME64_1
off = off + 8
v2 += readLongLE(buf, off) * PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
off = off + 8
v3 += readLongLE(buf, off) * PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
off = off + 8
v4 += readLongLE(buf, off) * PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
off = off + 8
off <= limit
do ()
h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18)
v1 *= PRIME64_2
v1 = rotateLeft(v1, 31)
v1 *= PRIME64_1
h64 ^= v1
h64 = h64 * PRIME64_1 + PRIME64_4
v2 *= PRIME64_2
v2 = rotateLeft(v2, 31)
v2 *= PRIME64_1
h64 ^= v2
h64 = h64 * PRIME64_1 + PRIME64_4
v3 *= PRIME64_2
v3 = rotateLeft(v3, 31)
v3 *= PRIME64_1
h64 ^= v3
h64 = h64 * PRIME64_1 + PRIME64_4
v4 *= PRIME64_2
v4 = rotateLeft(v4, 31)
v4 *= PRIME64_1
h64 ^= v4
h64 = h64 * PRIME64_1 + PRIME64_4
else h64 = seed + PRIME64_5
h64 += len
while off <= end - 8 do
var k1: Long = readLongLE(buf, off)
k1 *= PRIME64_2
k1 = rotateLeft(k1, 31)
k1 *= PRIME64_1
h64 ^= k1
h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4
off = off + 8
if off <= end - 4 then
h64 ^= (readIntLE(buf, off) & 0xffffffffL) * PRIME64_1
h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3
off = off + 4
else ()
while off < end do
h64 ^= (readByte(buf, off) & 0xff) * PRIME64_5
h64 = rotateLeft(h64, 11) * PRIME64_1
off += 1
h64 ^= h64 >>> 33
h64 *= PRIME64_2
h64 ^= h64 >>> 29
h64 *= PRIME64_3
h64 ^= h64 >>> 32
h64
end if
end hash
end XXHash64VarHandle

View File

@ -0,0 +1,24 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object XXHashConstants:
final val PRIME1 = -1640531535
final val PRIME2 = -2048144777
final val PRIME3 = -1028477379
final val PRIME4 = 668265263
final val PRIME5 = 374761393
final val PRIME64_1 = -7046029288634856825L // 11400714785074694791
final val PRIME64_2 = -4417276706812531889L // 14029467366897019727
final val PRIME64_3 = 1609587929392839161L
final val PRIME64_4 = -8796714831421723037L // 9650029242287828579
final val PRIME64_5 = 2870177450012600261L
end XXHashConstants

View File

@ -0,0 +1,56 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import java.nio.ByteBuffer
import verify.BasicTestSuite
abstract class AbstractHashTest extends BasicTestSuite:
def hash64: HashAlgo
def newStreaming(seed: Int): StreamingHashAlgo
def emptyHash: Long
def zeroHash: Long
test("Hash empty array"):
val buf: Array[Byte] = Array[Byte](0)
val r = hash64.hash(buf, 0, 0, 0)
assert(r == emptyHash)
test("Hash empty ByteBuffer"):
val buf: ByteBuffer = ByteBuffer.allocate(0)
val r = hash64.hash(buf, 0, 0, 0)
assert(r == emptyHash)
test("Hash one byte array"):
val buf: Array[Byte] = Array[Byte](0)
val r = hash64.hash(buf, 0, 1, 0)
assert(r == zeroHash)
test("Hash one byte ByteBuffer"):
val buf: ByteBuffer = ByteBuffer.allocate(1)
buf.put(0: Byte)
buf.rewind()
val r = hash64.hash(buf, 0, 1, 0)
assert(r == zeroHash)
test("Streaming hash empty ByteBuffer"):
val hash = newStreaming(0)
try
assert(hash.getValue == emptyHash)
finally hash.close()
test("Streaming one byte array"):
val hash = newStreaming(0)
try
val buf: Array[Byte] = Array[Byte](0)
hash.update(buf, 0, 1)
assert(hash.getValue == zeroHash)
finally hash.close()
end AbstractHashTest

View File

@ -0,0 +1,44 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
import verify.BasicTestSuite
import sbt.io.IO
import sbt.io.syntax.*
object FileSampleHashTest extends BasicTestSuite:
val emptyHash = -1205034819632174695L
val testHash = 2563739794714397383L
test("Hash empty file"):
val hash64 = Hashing.samplingFileHashXXHash64(0)
IO.withTemporaryDirectory: dir =>
val temp = dir / "test.txt"
IO.touch(temp)
val h = hash64.hash(temp)
assert(h == emptyHash)
test("Hash small file"):
val hash64 = Hashing.samplingFileHashXXHash64(0)
IO.withTemporaryDirectory: dir =>
val temp = dir / "test.txt"
IO.write(temp, "test")
val h = hash64.hash(temp)
assert(h == testHash)
test("Hash medium file (1MB)"):
val hash64 = Hashing.samplingFileHashXXHash64(0)
IO.withTemporaryDirectory: dir =>
val temp = dir / "test.txt"
val buf: Array[Byte] = Array.fill[Byte](1024)(0.toByte)
for i <- 0 until 1024 do IO.append(temp, buf)
val h = hash64.hash(temp)
assert(h == -5176567862428962592L)
end FileSampleHashTest

View File

@ -0,0 +1,18 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object WyHashTest extends AbstractHashTest:
override val hash64: HashAlgo = Hashing.wyhash64
override def newStreaming(seed: Int): StreamingHashAlgo =
Hashing.newStreamingWyHash64(seed)
override val emptyHash = 290873116282709081L
override val zeroHash = -295637713410278011L
end WyHashTest

View File

@ -0,0 +1,18 @@
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*
*/
package sbt.internal.util.hashing
object XXHashTest extends AbstractHashTest:
override val hash64: HashAlgo = Hashing.xxhash64
override def newStreaming(seed: Int): StreamingHashAlgo =
Hashing.newStreamingXXHash64(seed)
override val emptyHash = -1205034819632174695L
override val zeroHash = -1642502924627794072L
end XXHashTest

View File

@ -30,7 +30,6 @@ import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import net.openhft.hashing.LongHashFunction;
import org.scalasbt.ipcsocket.UnixDomainServerSocket;
import org.scalasbt.ipcsocket.Win32NamedPipeServerSocket;
import org.scalasbt.ipcsocket.Win32NamedPipeSocket;
@ -306,7 +305,9 @@ public class BootServerSocket implements AutoCloseable {
public static String socketLocation(final Path base)
throws UnsupportedEncodingException, IOException {
final Path target = base.resolve("project").resolve("target");
long hash = LongHashFunction.farmNa().hashBytes(target.toString().getBytes("UTF-8"));
long hash =
((long) target.toString().hashCode() << 32)
| (target.toString().length() * 31 & 0xffffffffL);
if (isWindows) {
return "sbt-load" + hash;
} else {

View File

@ -112,6 +112,7 @@ object Dependencies {
val scalaCollectionCompat = "org.scala-lang.modules" %% "scala-collection-compat" % "2.14.0"
val caffeine = "com.github.ben-manes.caffeine" % "caffeine" % "2.8.5"
val blake3 = "pt.kcry" %% "blake3" % "3.1.2"
val hedgehog = "qa.hedgehog" %% "hedgehog-sbt" % "0.13.0"
val disruptor = "com.lmax" % "disruptor" % "3.4.2"

View File

@ -13,5 +13,6 @@ addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.3.4")
addDependencyTreePlugin
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.14.5")
addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.11.7")
addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.8")
// libraryDependencies += "org.scala-sbt" %% "scripted-plugin" % sbtVersion.value

View File

@ -7,7 +7,7 @@ import xsbti.{ BasicVirtualFileRef, VirtualFile }
case class StringVirtualFile1(path: String, content: String)
extends BasicVirtualFileRef(path)
with VirtualFile:
override def contentHash: Long = HashUtil.farmHash(content.getBytes("UTF-8"))
override def contentHash: Long = HashUtil.xxhash64(content.getBytes("UTF-8"))
override def sizeBytes: Long = content.getBytes("UTF-8").size
override def contentHashStr: String =
import Digest.*

View File

@ -2,6 +2,8 @@ package sbt.util
import sjsonnew.IsoString
import sbt.io.Hash
import sbt.internal.util.hashing.Hashing
import scala.util.Using
import xsbti.HashedVirtualFileRef
import java.io.{ BufferedInputStream, InputStream }
import java.nio.ByteBuffer
@ -17,6 +19,10 @@ object Digest:
private[sbt] val Sha256 = "sha256"
private[sbt] val Sha384 = "sha384"
private[sbt] val Sha512 = "sha512"
private[sbt] val Imoxx64 = "imoxx64"
private[sbt] val Imowy64 = "imowy64"
private[sbt] val Xx64 = "xx64"
private[sbt] val Wy64 = "wy64"
extension (d: Digest)
def contentHashStr: String =
@ -43,11 +49,24 @@ object Digest:
apply(ref.contentHashStr() + "/" + ref.sizeBytes.toString)
def apply(algo: String, path: Path): Digest =
val input = Files.newInputStream(path)
try
apply(algo, hashBytes(algo, input), Files.size(path))
finally
input.close()
algo match
case Imoxx64 =>
val hash64 = Hashing.samplingFileHashXXHash64(0)
val h = hash64.hash(path)
apply(algo, longsToBytes(Array(h)), Files.size(path))
case Imowy64 =>
val hash64 = Hashing.samplingFileHashWyHash64(0)
val h = hash64.hash(path)
apply(algo, longsToBytes(Array(h)), Files.size(path))
case Xx64 | Wy64 =>
Using.resource(Files.newInputStream(path)) { input =>
val h = hashBytesInternal(algo, input)
apply(algo, longsToBytes(Array(h)), Files.size(path))
}
case _ =>
Using.resource(Files.newInputStream(path)) { input =>
apply(algo, hashBytes(algo, input), Files.size(path))
}
// used to wrap a Long value as a fake Digest, which will
// later be hashed using sha256 anyway.
@ -56,6 +75,9 @@ object Digest:
lazy val zero: Digest = dummy(0L)
private[sbt] def sha1Hash(path: Path): Digest =
apply(Sha1, path)
def sha256Hash(path: Path): Digest = apply(Sha256, path)
def sha256Hash(bytes: Array[Byte]): Digest =
@ -68,6 +90,17 @@ object Digest:
def sha256Hash(digests: Digest*): Digest =
sha256Hash(digests.toSeq.map(_.toBytes).flatten.toArray[Byte])
def imoxx64Hash(path: Path): Digest = apply(Imoxx64, path)
def imowy64Hash(path: Path): Digest = apply(Imowy64, path)
def xx64Hash(path: Path): Digest = apply(Xx64, path)
def wy64Hash(path: Path): Digest = apply(Wy64, path)
private[sbt] def md5Hash(bytes: Array[Byte]): Digest =
apply(Md5, hashBytes(Md5, bytes), bytes.length)
// first check the file size, then the hash
def sameDigest(path: Path, digest: Digest): Boolean =
if Files.size(path) != digest.sizeBytes then false
@ -92,6 +125,24 @@ object Digest:
digest.digest
finally bis.close()
// using our own hashing algorithms
private def hashBytesInternal(algo: String, input: InputStream): Long =
val BufferSize = 8192
Using.resource(BufferedInputStream(input)) { bis =>
val digest = algo match
case Xx64 => Hashing.newStreamingXXHash64(0)
case Wy64 => Hashing.newStreamingWyHash64(0)
val buf = new Array[Byte](BufferSize)
while
val readBytes = input.read(buf)
if readBytes >= 0 then digest.update(buf, 0, readBytes)
readBytes >= 0
do ()
val h = digest.getValue
digest.close()
h
}
private def validateString(s: String): Unit =
parse(s)
()
@ -102,6 +153,14 @@ object Digest:
case head :: rest :: Nil =>
val subtokens = head :: rest.split("/").toList
subtokens match
case (a @ Xx64) :: value :: sizeBytes :: Nil =>
(a, value, sizeBytes.toLong, parseHex(value, 64))
case (a @ Wy64) :: value :: sizeBytes :: Nil =>
(a, value, sizeBytes.toLong, parseHex(value, 64))
case (a @ Imoxx64) :: value :: sizeBytes :: Nil =>
(a, value, sizeBytes.toLong, parseHex(value, 64))
case (a @ Imowy64) :: value :: sizeBytes :: Nil =>
(a, value, sizeBytes.toLong, parseHex(value, 64))
case (a @ Murmur3) :: value :: sizeBytes :: Nil =>
(a, value, sizeBytes.toLong, parseHex(value, 128))
case (a @ Md5) :: value :: sizeBytes :: Nil =>

View File

@ -1,24 +1,16 @@
package sbt.util
import java.nio.file.{ Files, Path }
import net.openhft.hashing.LongHashFunction
import java.nio.file.{ Path as NioPath }
import sbt.internal.util.hashing.Hashing
object HashUtil:
private[sbt] def farmHash(bytes: Array[Byte]): Long =
LongHashFunction.farmNa().hashBytes(bytes)
private[sbt] def xxhash64(bytes: Array[Byte]): Long =
Hashing.xxhash64.hash(bytes, 0, bytes.size, 0)
private[sbt] def farmHash(path: Path): Long =
import sbt.io.Hash
// allocating many byte arrays for large files may lead to OOME
// but it is more efficient for small files
val largeFileLimit = 10 * 1024 * 1024
private[sbt] def imohash64(path: NioPath): Long =
val hash64 = Hashing.samplingFileHashWyHash64(0)
hash64.hash(path)
if Files.size(path) < largeFileLimit then farmHash(Files.readAllBytes(path))
else farmHash(Hash(path.toFile))
private[sbt] def farmHashStr(path: Path): String =
"farm64-" + farmHash(path).toHexString
private[sbt] def toFarmHashString(digest: Long): String =
s"farm64-${digest.toHexString}"
private[sbt] def imohash64Str(path: NioPath): String =
"imoxx64-" + imohash64(path).toHexString
end HashUtil

View File

@ -24,11 +24,11 @@ object StringStrings:
given Conversion[HashedVirtualFileRef, StringString] =
(x: HashedVirtualFileRef) => StringString(x.id, x.contentHashStr)
given Conversion[File, StringString] =
(x: File) => StringString(x.toString(), HashUtil.farmHashStr(x.toPath()))
(x: File) => StringString(x.toString(), HashUtil.imohash64Str(x.toPath()))
given Conversion[Path, StringString] =
(x: Path) => StringString(x.toString(), HashUtil.farmHashStr(x))
(x: Path) => StringString(x.toString(), HashUtil.imohash64Str(x))
given Conversion[VirtualFile, StringString] =
(x: VirtualFile) => StringString(x.id, s"farm64-${x.contentHash.toHexString}")
(x: VirtualFile) => StringString(x.id, s"xx64-${x.contentHash.toHexString}")
given HashWriter[StringString] = new HashWriter[StringString]:
def write[J](obj: StringString, builder: Builder[J]): Unit =

View File

@ -42,6 +42,34 @@ object DigestTest extends verify.BasicTestSuite:
testEmptyFile("sha512", expected)
}
test("imoxx64") {
val expected = Digest(
"imoxx64-ef46db3751d8e999/0"
)
testEmptyFile("imoxx64", expected)
}
test("imowy64") {
val expected = Digest(
"imowy64-0409638ee2bde459/0"
)
testEmptyFile("imowy64", expected)
}
test("xx64") {
val expected = Digest(
"xx64-ef46db3751d8e999/0"
)
testEmptyFile("xx64", expected)
}
test("wy64") {
val expected = Digest(
"wy64-0409638ee2bde459/0"
)
testEmptyFile("wy64", expected)
}
test("digest composition") {
val dummy1 = Digest.dummy(0L)
val dummy2 = Digest.dummy(0L)

View File

@ -11,6 +11,7 @@ object HasherTest extends BasicTestSuite:
final val blankContentHash = -7286425919675154353L
val blankContentHashStr = "farm64-9ae16a3b2f90404f"
final val blankATxtHash = 1166939303L
final val blankATxtXX64 = -541480681L
test("The IntJsonFormat should convert an Int to an int hash") {
import BasicJsonProtocol.given
@ -36,7 +37,7 @@ object HasherTest extends BasicTestSuite:
import PathHashWriters.given
val x = StringVirtualFile1("a.txt", "")
val actual = Hasher.hashUnsafe(x)
assert(actual == blankATxtHash)
assert(actual == blankATxtXX64)
}
test("tuple") {