diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4b281bfd9..28e333964 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,6 +56,10 @@ jobs: java: 17 distribution: zulu jobtype: 12 + # - os: ubuntu-latest + # java: 17 + # distribution: temurin + # jobtype: 13 runs-on: ${{ matrix.os }} timeout-minutes: 25 env: @@ -202,4 +206,9 @@ jobs: if: ${{ matrix.jobtype == 12 }} shell: bash run: | - ./sbt -v "scripted cache/*" \ No newline at end of file + ./sbt -v "scripted cache/*" + # - name: Hash Benchmark + # if: ${{ matrix.jobtype == 13 }} + # shell: bash + # run: | + # ./sbt -v "hashBenchmark/Jmh/run -i 5 -wi 3 -f1 -t1" diff --git a/build.sbt b/build.sbt index 3da599f28..08c6a883d 100644 --- a/build.sbt +++ b/build.sbt @@ -292,11 +292,18 @@ lazy val utilInterface = (project in file("internal") / "util-interface").settin mimaSettings, ) -lazy val utilControl = (project in file("internal") / "util-control").settings( - utilCommonSettings, - name := "Util Control", - mimaSettings, -) +lazy val utilControl = (project in file("internal") / "util-control") + .settings( + utilCommonSettings, + name := "Util Control", + libraryDependencies ++= Seq( + scalacheck % Test, + scalaVerify % Test, + hedgehog % Test, + ), + mimaSettings, + ) + .configure(addSbtIOForTest) lazy val utilPosition = (project in file("internal") / "util-position") .settings( @@ -369,7 +376,7 @@ lazy val utilCache = project // we generate JsonCodec only for actionresult.contra JsonCodecPlugin, ) - .dependsOn(utilLogging) + .dependsOn(utilLogging, utilControl) .settings( testedBaseSettings, name := "Util Cache", @@ -383,6 +390,9 @@ lazy val utilCache = project contrabandSettings, mimaSettings, mimaBinaryIssueFilters ++= Seq( + exclude[DirectMissingMethodProblem]("sbt.util.HashUtil.farmHash"), + exclude[DirectMissingMethodProblem]("sbt.util.HashUtil.farmHashStr"), + exclude[DirectMissingMethodProblem]("sbt.util.HashUtil.toFarmHashString"), ), Test / fork := true, ) @@ -391,6 +401,18 @@ lazy val utilCache = project addSbtCompilerInterface, ) +lazy val hashBenchmark = (project in file("internal") / "hash-benchmark") + .dependsOn(utilControl, utilCache) + .enablePlugins(JmhPlugin) + .settings( + utilCommonSettings, + name := "Hash Benchmark", + Jmh / run / javaOptions ++= Seq("-Xmx1G", "-Dfile.encoding=UTF8"), + libraryDependencies += blake3, + mimaSettings, + publish / skip := true, + ) + // Builds on cache to provide caching for filesystem-related operations lazy val utilTracking = (project in file("util-tracking")) .dependsOn(utilCache) diff --git a/internal/hash-benchmark/src/main/scala/sbt/internal/util/FileHashBenchmark.scala b/internal/hash-benchmark/src/main/scala/sbt/internal/util/FileHashBenchmark.scala new file mode 100644 index 000000000..42a5a6cce --- /dev/null +++ b/internal/hash-benchmark/src/main/scala/sbt/internal/util/FileHashBenchmark.scala @@ -0,0 +1,60 @@ +package sbt.internal.util + +import java.util.concurrent.TimeUnit + +import java.nio.file.{ Files, Path as NioPath } +import sbt.io.IO +import sbt.io.syntax.* +import sbt.util.Digest +import scala.util.Using +import org.openjdk.jmh.annotations.* +import pt.kcry.blake3.{ Blake3 as Blake3Impl } + +@State(Scope.Benchmark) +abstract class AbstractFileHashBenchmark: + val tempDir = IO.createTemporaryDirectory + val temp = tempDir / "test.txt" + val buf: Array[Byte] = Array.fill[Byte](1024)(0.toByte) + for i <- 0 until 1024 do IO.append(temp, buf) + + def hash(path: NioPath): String + + @Benchmark + @BenchmarkMode(Array(Mode.AverageTime)) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + def hashFile: Unit = + hash(temp.toPath()) +end AbstractFileHashBenchmark + +class XXHash64FileHashBenchmark extends AbstractFileHashBenchmark: + override def hash(path: NioPath): String = + Digest.xx64Hash(path).toString + +class WyHash64FileHashBenchmark extends AbstractFileHashBenchmark: + override def hash(path: NioPath): String = + Digest.wy64Hash(path).toString + +class ImoXXHash64FileHashBenchmark extends AbstractFileHashBenchmark: + override def hash(path: NioPath): String = + Digest.imoxx64Hash(path).toString + +class ImoWyHash64FileHashBenchmark extends AbstractFileHashBenchmark: + override def hash(path: NioPath): String = + Digest.imowy64Hash(path).toString + +class Sha1FileHashBenchmark extends AbstractFileHashBenchmark: + override def hash(path: NioPath): String = + Digest.sha1Hash(path).toString + +class Sha256FileHashBenchmark extends AbstractFileHashBenchmark: + override def hash(path: NioPath): String = + Digest.sha256Hash(path).toString + +class Blake3FileHashBenchmark extends AbstractFileHashBenchmark: + override def hash(path: NioPath): String = + Using.resource(Files.newInputStream(path)) { input => + val digest = Blake3Impl.newHasher() + digest.update(input) + val h = digest.doneHex(64) + s"blake3-$h/${Files.size(path)}" + } diff --git a/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala b/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala new file mode 100644 index 000000000..5f19269f2 --- /dev/null +++ b/internal/hash-benchmark/src/main/scala/sbt/internal/util/HashBenchmark.scala @@ -0,0 +1,65 @@ +package sbt.internal.util + +import java.util.concurrent.{ ThreadLocalRandom, TimeUnit } +import net.openhft.hashing.LongHashFunction +import org.openjdk.jmh.annotations.* +import pt.kcry.blake3.Blake3 +import sbt.util.Digest +import sbt.internal.util.hashing.Hashing +import scala.util.hashing.MurmurHash3 + +@State(Scope.Benchmark) +abstract class AbstractHashBenchmark: + def hash(buf: Array[Byte]): String + + val buf: Array[Byte] = new Array[Byte](2048) + ThreadLocalRandom.current().nextBytes(buf) + + @Benchmark + @BenchmarkMode(Array(Mode.AverageTime)) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + def hashByteArray: Unit = + hash(buf) +end AbstractHashBenchmark + +class XXHash64HashBenchmark extends AbstractHashBenchmark: + override def hash(buf: Array[Byte]): String = + val h = Hashing.xxhash64 + val hash = h.hash(buf, 0, buf.size, 0) + java.lang.Long.toHexString(hash) + +class WyHash64HashBenchmark extends AbstractHashBenchmark: + override def hash(buf: Array[Byte]): String = + val h = Hashing.wyhash64 + val hash = h.hash(buf, 0, buf.size, 0) + java.lang.Long.toHexString(hash) + +class FarmHashHashBenchmark extends AbstractHashBenchmark: + override def hash(buf: Array[Byte]): String = + val hash = LongHashFunction.farmNa().hashBytes(buf) + java.lang.Long.toHexString(hash) + +class MurmurHash32HashBenchmark extends AbstractHashBenchmark: + override def hash(buf: Array[Byte]): String = + val lo = MurmurHash3.bytesHash(buf, 0x85ebca6b) + val hash = lo.toLong & 0xffffffffL + java.lang.Long.toHexString(hash) + +class MurmurHash64HashBenchmark extends AbstractHashBenchmark: + override def hash(buf: Array[Byte]): String = + val hi = MurmurHash3.bytesHash(buf, 0x9747b28c) + val lo = MurmurHash3.bytesHash(buf, 0x85ebca6b) + val hash = (hi.toLong << 32) | (lo.toLong & 0xffffffffL) + java.lang.Long.toHexString(hash) + +class Md5HashBenchmark extends AbstractHashBenchmark: + override def hash(buf: Array[Byte]): String = + Digest.md5Hash(buf).toString + +class Sha256HashBenchmark extends AbstractHashBenchmark: + override def hash(buf: Array[Byte]): String = + Digest.sha256Hash(buf).toString + +class Blake3HashBenchmark extends AbstractHashBenchmark: + override def hash(buf: Array[Byte]): String = + Blake3.hex(buf, 64) diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/AbstractStreamingXXHash64Scala.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/AbstractStreamingXXHash64Scala.scala new file mode 100644 index 000000000..d75c8f0a6 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/AbstractStreamingXXHash64Scala.scala @@ -0,0 +1,33 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import XXHashConstants.PRIME64_1 +import XXHashConstants.PRIME64_2 + +abstract class AbstractStreamingXXHash64Scala(seed: Long) extends StreamingHashAlgo(seed): + protected var memSize: Int = 0 + protected var v1: Long = 0 + protected var v2: Long = 0 + protected var v3: Long = 0 + protected var v4: Long = 0 + protected var totalLen: Long = 0 + protected val memory = new Array[Byte](32) + reset() + + override def reset(): Unit = + v1 = seed + PRIME64_1 + PRIME64_2 + v2 = seed + PRIME64_2 + v3 = seed + 0 + v4 = seed - PRIME64_1 + totalLen = 0 + memSize = 0 + +end AbstractStreamingXXHash64Scala diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/ByteBufferUtils.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/ByteBufferUtils.scala new file mode 100644 index 000000000..dacb5a733 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/ByteBufferUtils.scala @@ -0,0 +1,29 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.nio.{ ByteBuffer, ByteOrder } + +object ByteBufferUtils: + def checkRange(buf: ByteBuffer, off: Int): Unit = + if off < 0 || off >= buf.capacity() then throw new ArrayIndexOutOfBoundsException(off) + else () + + def checkRange(buf: ByteBuffer, off: Int, len: Int): Unit = + SafeUtils.checkLength(len) + if len > 0 then + checkRange(buf, off) + checkRange(buf, off + len - 1) + else () + + def inLittleEndianOrder(buf: ByteBuffer): ByteBuffer = + if buf.order() == ByteOrder.LITTLE_ENDIAN then buf + else buf.duplicate().order(ByteOrder.LITTLE_ENDIAN) +end ByteBufferUtils diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/FileHash.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/FileHash.scala new file mode 100644 index 000000000..c72fecd99 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/FileHash.scala @@ -0,0 +1,20 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.io.File +import java.nio.file.{ Path as NioPath } + +trait FileHash: + def hash(file: File): Long + def hash(file: NioPath): Long + override def toString(): String = + getClass().getSimpleName() +end FileHash diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/FileSampleHash.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/FileSampleHash.scala new file mode 100644 index 000000000..46ccd23be --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/FileSampleHash.scala @@ -0,0 +1,75 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.io.{ File, RandomAccessFile } +import java.nio.ByteBuffer +import java.nio.file.{ Path as NioPath } +import scala.util.Using + +object FileSampleHash: + final val defaultSampleBytes = 16 * 1024 + final val defaultThresoldBytes = 128L * 1024L + + def apply(underlying: StreamingHashAlgo): FileSampleHash = + new FileSampleHash(defaultSampleBytes, defaultThresoldBytes, underlying) +end FileSampleHash + +/** + * Based on Imohash https://github.com/kalafut/imohash/blob/master/algorithm.md + */ +class FileSampleHash(sampleBytes: Int, thresholdBytes: Long, underlying: StreamingHashAlgo) + extends FileHash: + require(sampleBytes >= 0) + + val buffer: Array[Byte] = new Array[Byte](4096) + + override def hash(file: NioPath): Long = + hash(file.toFile()) + + override def hash(file: File): Long = + Using.resource(new RandomAccessFile(file, "r")): raf => + hash(raf, raf.length()) + + private def hash(input: RandomAccessFile, fileLength: Long): Long = + underlying.reset() + if fileLength < thresholdBytes || sampleBytes < 1 then hashBytes(input, fileLength) + else + hashBytes(input, sampleBytes) + // skip to halfway point + input.seek(fileLength / 2) + hashBytes(input, sampleBytes) + input.seek(fileLength - sampleBytes) + hashBytes(input, sampleBytes) + + // write file size + if fileLength > 0 then + val sizeBuf = ByteBuffer.allocate(java.lang.Long.BYTES) + sizeBuf.putLong(fileLength) + underlying.update(sizeBuf.array(), 0, sizeBuf.array().size) + + underlying.getValue + end hash + + private def hashBytes(input: RandomAccessFile, toHash: Long): Unit = + var remaining: Long = toHash + var pos = 0 + while remaining > 0 do + val toread = math.min(buffer.size - pos, remaining).toInt + val bytesRead = input.read(buffer, pos, toread) + if bytesRead < 0 then sys.error("unexpected EOF") + pos += bytesRead + remaining -= bytesRead + if pos >= buffer.length then + underlying.update(buffer, 0, buffer.length) + pos = 0 + if pos > 0 then underlying.update(buffer, 0, pos) + end hashBytes +end FileSampleHash diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/HashAlgo.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/HashAlgo.scala new file mode 100644 index 000000000..c3fb0c3f4 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/HashAlgo.scala @@ -0,0 +1,60 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.nio.ByteBuffer + +/** + * Hash algorithm interface + */ +trait HashAlgo: + + /** + * Computes the 64-bits hash of buf[off:off+len] using the seed. + * + * @param buf the input data + * @param off the start offset in buf + * @param len the number of bytes to hash + * @param seed the seed to use + * @return the hash value + */ + def hash(buf: Array[Byte], off: Int, len: Int, seed: Long): Long + + /** + * Computes the hash of the given slice of the ByteBuffer. + * ByteBuffer#position() position and ByteBuffer#limit() limit + * are not modified. + * + * @param buf the input data + * @param off the start offset in buf + * @param len the number of bytes to hash + * @param seed the seed to use + * @return the hash value + */ + def hash(buf: ByteBuffer, off: Int, len: Int, seed: Long): Long + + /** + * Computes the hash of the given ByteBuffer. The + * ByteBuffer#position() position is moved in order to reflect bytes + * which have been read. + * + * @param buf the input data + * @param seed the seed to use + * @return the hash value + */ + def hash(buf: ByteBuffer, seed: Long): Long = + val r = hash(buf, buf.position(), buf.remaining(), seed) + buf.position(buf.limit()) + r + + override def toString(): String = + getClass().getSimpleName() + +end HashAlgo diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala new file mode 100644 index 000000000..781e21948 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/Hashing.scala @@ -0,0 +1,24 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +object Hashing: + def xxhash64: HashAlgo = XXHash64VarHandle.INSTANCE + def wyhash64: HashAlgo = WyHash64VarHandle.INSTANCE + + def newStreamingXXHash64(seed: Long): StreamingHashAlgo = + new StreamingXXHash64VarHandle(seed) + def newStreamingWyHash64(seed: Long): StreamingHashAlgo = + new StreamingWyHash64VarHandle(seed) + def samplingFileHashXXHash64(seed: Long): FileHash = + FileSampleHash(newStreamingXXHash64(seed)) + def samplingFileHashWyHash64(seed: Long): FileHash = + FileSampleHash(newStreamingWyHash64(seed)) +end Hashing diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/SafeUtils.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/SafeUtils.scala new file mode 100644 index 000000000..f407cd5bb --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/SafeUtils.scala @@ -0,0 +1,27 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +object SafeUtils: + def checkRange(buf: Array[Byte], off: Int): Unit = + if off < 0 || off >= buf.length then throw new ArrayIndexOutOfBoundsException(off) + else () + + def checkRange(buf: Array[Byte], off: Int, len: Int): Unit = + checkLength(len) + if len > 0 then + checkRange(buf, off) + checkRange(buf, off + len - 1) + else () + + def checkLength(len: Int): Unit = + if len < 0 then throw new IllegalArgumentException("lengths must be >= 0") + else () +end SafeUtils diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingHashAlgo.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingHashAlgo.scala new file mode 100644 index 000000000..892817d64 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingHashAlgo.scala @@ -0,0 +1,55 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.io.Closeable + +/** + * Streaming interface for hashing. + * The implementation is based on lz4-java. + * Copyright 2020 Linnaea Von Lavia and the lz4-java contributors. + * Licensed under the Apache License. + * + * Instances of this class are **not** thread-safe. + */ +abstract class StreamingHashAlgo(val seed: Long) extends Closeable: + /** + * Returns the value of the checksum. + * + * @return the checksum + */ + def getValue: Long + + /** + * Updates the value of the hash with buf[off:off+len]. + * + * @param buf the input data + * @param off the start offset in buf + * @param len the number of bytes to hash + */ + def update(buf: Array[Byte], off: Int, len: Int): Unit + + /** + * Resets this instance to the state it had right after instantiation. The + * seed remains unchanged. + */ + def reset(): Unit + + /** + * Releases any system resources associated with this instance. + * It is not mandatory to call this method after using this instance + * because the system resources are released anyway when this instance + * is reclaimed by GC. + */ + override def close(): Unit = () + + override def toString: String = + getClass().getSimpleName() + "(seed=" + seed + ")" +end StreamingHashAlgo diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingWyHash64VarHandle.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingWyHash64VarHandle.scala new file mode 100644 index 000000000..8130c5ce7 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingWyHash64VarHandle.scala @@ -0,0 +1,120 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import WyHash64VarHandle.* +import WyHashConstants.* +import VarHandleUtils.* + +class StreamingWyHash64VarHandle(seed: Long) extends StreamingHashAlgo(seed): + protected var a: Long = 0 + protected var b: Long = 0 + protected val state: Array[Long] = new Array[Long](3) + protected var v0: Long = 0 + protected var v1: Long = 0 + protected var v2: Long = 0 + protected var totalLen: Long = 0L + protected val memory = new Array[Byte](48) + protected var memoryLen: Int = 0 + reset() + + override def reset(): Unit = + val s: Long = initSeed(seed) + this.v0 = s + this.v1 = s + this.v2 = s + this.totalLen = 0 + this.memoryLen = 0 + + def getValue: Long = + var _a: Long = this.a + var _b: Long = this.b + var v0: Long = this.v0 + var v1: Long = this.v1 + var v2: Long = this.v2 + + var input = this.memory + var inputLen = this.memoryLen + + if this.totalLen <= 16 then + if inputLen >= 4 then + val end = inputLen - 4 + val quarter = (inputLen >> 3) << 2 + _a = (readIntLE(input, 0).toLong << 32) + | (readIntLE(input, quarter) & 0xffffffffL) + _b = (readIntLE(input, end) << 32).toLong + | (readIntLE(input, end - quarter) & 0xffffffffL) + else if inputLen > 0 then + _a = ((input(0) & 0xffL) << 16) | ((input(inputLen >> 1) & 0xffL) << 8) + | (input(inputLen - 1) & 0xffL) + _b = 0 + else + _a = 0 + _b = 0 + end if + else + var scratch: Array[Byte] = null + if inputLen < 16 then + val rem = 16 - inputLen + scratch = new Array[Byte](16) + System.arraycopy(memory, 48 - rem, scratch, 0, rem) + System.arraycopy(memory, 0, scratch, rem, inputLen) + input = scratch + inputLen = 16 + + if this.totalLen >= 48 then v0 ^= v1 ^ v2 + + var i = 0 + while i + 16 < inputLen do + v0 = mix(readLongLE(input, i) ^ PRIME64_1, readLongLE(input, i + 8) ^ v0) + i += 16 + + _a = readLongLE(input, inputLen - 16) + _b = readLongLE(input, inputLen - 8) + end if + + finishHash(_a, _b, v0, this.totalLen) + end getValue + + def update(buf: Array[Byte], off: Int, len: Int): Unit = + this.totalLen += len + + if len <= 48 - this.memoryLen then + System.arraycopy(buf, off, this.memory, this.memoryLen, len) + this.memoryLen += len + else + var i: Int = 0 + if this.memoryLen > 0 then + i = 48 - this.memoryLen + System.arraycopy(buf, off, this.memory, this.memoryLen, i) + round(this.memory, 0) + this.memoryLen = 0 + end if + + while i + 48 < len do + round(buf, off + i) + i += 48 + + val remaining = len - i + if remaining < 16 && i >= 48 then + val rem = 16 - remaining + System.arraycopy(buf, off + i - rem, this.memory, 48 - rem, rem) + + System.arraycopy(buf, off + i, this.memory, 0, remaining) + this.memoryLen = remaining + end if + end update + + private def round(buf: Array[Byte], p: Int): Unit = + this.v0 = mix(readLongLE(buf, p) ^ PRIME64_1, readLongLE(buf, p + 8) ^ this.v0) + this.v1 = mix(readLongLE(buf, p + 16) ^ PRIME64_2, readLongLE(buf, p + 24) ^ this.v1) + this.v2 = mix(readLongLE(buf, p + 32) ^ PRIME64_3, readLongLE(buf, p + 40) ^ this.v2) + +end StreamingWyHash64VarHandle diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingXXHash64VarHandle.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingXXHash64VarHandle.scala new file mode 100644 index 000000000..fe35fe2cb --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/StreamingXXHash64VarHandle.scala @@ -0,0 +1,168 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.lang.Long.rotateLeft +import SafeUtils.checkRange +import VarHandleUtils.* +import XXHashConstants.* + +/** + * The implementation is based on lz4-java. + * Copyright 2020 Linnaea Von Lavia and the lz4-java contributors. + * Licensed under the Apache License. + * + * Streaming xxhash. + */ +class StreamingXXHash64VarHandle(seed: Long) extends AbstractStreamingXXHash64Scala(seed): + + override def getValue: Long = + var h64: Long = 0L + if totalLen >= 32 then + var v1: Long = this.v1 + var v2: Long = this.v2 + var v3: Long = this.v3 + var v4: Long = this.v4 + + h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18); + + v1 *= PRIME64_2 + v1 = rotateLeft(v1, 31) + v1 *= PRIME64_1; h64 ^= v1 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v2 *= PRIME64_2 + v2 = rotateLeft(v2, 31) + v2 *= PRIME64_1 + h64 ^= v2 + h64 = h64 * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2 + v3 = rotateLeft(v3, 31) + v3 *= PRIME64_1 + h64 ^= v3 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v4 *= PRIME64_2 + v4 = rotateLeft(v4, 31) + v4 *= PRIME64_1 + h64 ^= v4 + h64 = h64 * PRIME64_1 + PRIME64_4 + else h64 = seed + PRIME64_5 + + h64 += totalLen + + var off: Int = 0 + while off <= memSize - 8 do + var k1: Long = readLongLE(memory, off) + k1 *= PRIME64_2 + k1 = rotateLeft(k1, 31) + k1 *= PRIME64_1 + h64 ^= k1 + h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4 + off += 8 + + if off <= memSize - 4 then + h64 ^= (readIntLE(memory, off) & 0xffffffffL) * PRIME64_1 + h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3 + off += 4 + else () + + while off < memSize do + h64 ^= (memory(off) & 0xff) * PRIME64_5 + h64 = rotateLeft(h64, 11) * PRIME64_1 + off += 1 + + h64 ^= h64 >>> 33 + h64 *= PRIME64_2 + h64 ^= h64 >>> 29 + h64 *= PRIME64_3 + h64 ^= h64 >>> 32 + + h64 + end getValue + + override def update(buf: Array[Byte], offset: Int, len: Int): Unit = + var off = offset + checkRange(buf, off, len) + + totalLen += len + + if memSize + len < 32 then // fill in tmp buffer + System.arraycopy(buf, off, memory, memSize, len) + memSize += len + else + val end: Int = off + len + + if memSize > 0 then // data left from previous update + System.arraycopy(buf, off, memory, memSize, 32 - memSize) + + v1 += readLongLE(memory, 0) * PRIME64_2 + v1 = rotateLeft(v1, 31) + v1 *= PRIME64_1 + + v2 += readLongLE(memory, 8) * PRIME64_2 + v2 = rotateLeft(v2, 31) + v2 *= PRIME64_1 + + v3 += readLongLE(memory, 16) * PRIME64_2 + v3 = rotateLeft(v3, 31) + v3 *= PRIME64_1 + + v4 += readLongLE(memory, 24) * PRIME64_2 + v4 = rotateLeft(v4, 31) + v4 *= PRIME64_1 + + off += 32 - memSize + memSize = 0 + else () + + { + val limit: Int = end - 32 + var v1: Long = this.v1 + var v2: Long = this.v2 + var v3: Long = this.v3 + var v4: Long = this.v4 + + while off <= limit do + v1 += readLongLE(buf, off) * PRIME64_2 + v1 = rotateLeft(v1, 31) + v1 *= PRIME64_1 + off += 8 + + v2 += readLongLE(buf, off) * PRIME64_2 + v2 = rotateLeft(v2, 31) + v2 *= PRIME64_1 + off += 8 + + v3 += readLongLE(buf, off) * PRIME64_2 + v3 = rotateLeft(v3, 31) + v3 *= PRIME64_1 + off += 8 + + v4 += readLongLE(buf, off) * PRIME64_2 + v4 = rotateLeft(v4, 31) + v4 *= PRIME64_1 + off += 8 + + this.v1 = v1 + this.v2 = v2 + this.v3 = v3 + this.v4 = v4 + } + + if off < end then + System.arraycopy(buf, off, memory, 0, end - off) + memSize = end - off + else () + end if + end update + +end StreamingXXHash64VarHandle diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/VarHandleUtils.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/VarHandleUtils.scala new file mode 100644 index 000000000..bcdbb5847 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/VarHandleUtils.scala @@ -0,0 +1,41 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.lang.invoke.{ MethodHandles, VarHandle } +import java.nio.{ ByteBuffer, ByteOrder } + +object VarHandleUtils: + private def getArrayClass(c: Class[?]): Class[?] = + java.lang.reflect.Array.newInstance(c, 0).getClass + private val LONG_HANDLE: VarHandle = + MethodHandles.byteArrayViewVarHandle(getArrayClass(classOf[Long]), ByteOrder.LITTLE_ENDIAN) + private val INT_HANDLE: VarHandle = + MethodHandles.byteArrayViewVarHandle(getArrayClass(classOf[Int]), ByteOrder.LITTLE_ENDIAN) + private val BB_LONG_HANDLE: VarHandle = + MethodHandles.byteBufferViewVarHandle(getArrayClass(classOf[Long]), ByteOrder.LITTLE_ENDIAN) + private val BB_INT_HANDLE: VarHandle = + MethodHandles.byteBufferViewVarHandle(getArrayClass(classOf[Int]), ByteOrder.LITTLE_ENDIAN) + + inline def readByte(buf: Array[Byte], off: Int): Byte = + buf(off) + inline def readIntLE(buf: Array[Byte], off: Int): Int = + INT_HANDLE.get(buf, off).asInstanceOf[Int] + inline def readLongLE(buf: Array[Byte], off: Int): Long = + LONG_HANDLE.get(buf, off).asInstanceOf[Long] + inline def readByte(buf: ByteBuffer, i: Int): Byte = + buf.get(i) + inline def readIntLE(buf: ByteBuffer, i: Int): Int = + assert(buf.order() == ByteOrder.LITTLE_ENDIAN) + BB_INT_HANDLE.get(buf, i).asInstanceOf[Int] + inline def readLongLE(buf: ByteBuffer, i: Int): Long = + assert(buf.order() == ByteOrder.LITTLE_ENDIAN) + BB_LONG_HANDLE.get(buf, i).asInstanceOf[Long] +end VarHandleUtils diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64VarHandle.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64VarHandle.scala new file mode 100644 index 000000000..86413f3a8 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHash64VarHandle.scala @@ -0,0 +1,162 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.nio.ByteBuffer +import WyHashConstants.* +import VarHandleUtils.* + +object WyHash64VarHandle: + private[hashing] val INSTANCE = new WyHash64VarHandle() + + private[hashing] inline def initSeed(seed: Long): Long = + seed ^ mix(seed ^ PRIME64_0, PRIME64_1) + + private[hashing] def mix(a: Long, b: Long): Long = + val low = a * b + val high = unsignedMultiplyHigh(a, b) + low ^ high + + private[hashing] inline def unsignedMultiplyHigh(a: Long, b: Long): Long = + Math.multiplyHigh(a, b) + ((a >> 63) & b) + ((b >> 63) & a) + + private[hashing] inline def wyr3(buf: Array[Byte], off: Int, k: Int): Long = + ((buf(off) & 0xffL) << 16) + | ((buf(off + (k >> 1)) & 0xffL) << 8) + | (buf(off + k - 1) & 0xffL) + + private[hashing] inline def wyr3(buf: ByteBuffer, off: Int, k: Int): Long = + ((buf.get(off) & 0xffL) << 16) + | ((buf.get(off + (k >> 1)) & 0xffL) << 8) + | (buf.get(off + k - 1) & 0xffL) + + private[hashing] inline def finishHash(a: Long, b: Long, seed: Long, len: Long): Long = + val _a = a ^ PRIME64_1 + val _b = b ^ seed + val low = _a * _b + val high = unsignedMultiplyHigh(_a, _b) + mix(low ^ PRIME64_0 ^ len, high ^ PRIME64_1) + +end WyHash64VarHandle + +/** + * Wyhash matching Zig 0.15 std.hash.Wyhash. + */ +class WyHash64VarHandle extends HashAlgo: + import WyHash64VarHandle.* + + override def hash(buf: Array[Byte], offset: Int, len: Int, seed: Long): Long = + SafeUtils.checkRange(buf, offset, len) + + var off = offset + var s: Long = initSeed(seed) + val secret1 = PRIME64_1 + val secret2 = PRIME64_2 + val secret3 = PRIME64_3 + var a: Long = 0L + var b: Long = 0L + + if len <= 16 then + if len >= 4 then + a = (readIntLE(buf, off).toLong << 32) + | (readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL) + b = (readIntLE(buf, off + len - 4).toLong << 32) + | (readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL) + else if len > 0 then + a = wyr3(buf, off, len) + b = 0 + else + a = 0 + b = 0 + else + var i = len + var p = off + var see0 = s + var see1 = s + var see2 = s + + while i > 48 do + see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) + see1 = mix(readLongLE(buf, p + 16) ^ secret2, readLongLE(buf, p + 24) ^ see1) + see2 = mix(readLongLE(buf, p + 32) ^ secret3, readLongLE(buf, p + 40) ^ see2) + p += 48 + i -= 48 + end while + + see0 ^= see1 ^ see2 + while i > 16 do + see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) + i -= 16 + p += 16 + end while + + a = readLongLE(buf, off + len - 16) + b = readLongLE(buf, off + len - 8) + s = see0 + end if + finishHash(a, b, s, len) + end hash + + override def hash(buffer: ByteBuffer, offset: Int, len: Int, seed: Long): Long = + if buffer.hasArray() then hash(buffer.array(), offset + buffer.arrayOffset(), len, seed) + else + var off = offset + ByteBufferUtils.checkRange(buffer, off, len) + val buf = ByteBufferUtils.inLittleEndianOrder(buffer) + var s: Long = initSeed(seed) + val secret1 = PRIME64_1 + val secret2 = PRIME64_2 + val secret3 = PRIME64_3 + var a: Long = 0L + var b: Long = 0L + + if len <= 16 then + if len >= 4 then + a = (readIntLE(buf, off).toLong << 32) + | (readIntLE(buf, off + ((len >> 3) << 2)) & 0xffffffffL) + b = (readIntLE(buf, off + len - 4).toLong << 32) + | (readIntLE(buf, off + len - 4 - ((len >> 3) << 2)) & 0xffffffffL) + else if len > 0 then + a = wyr3(buf, off, len) + b = 0 + else + a = 0 + b = 0 + else + var i = len + var p = off + var see0 = s + var see1 = s + var see2 = s + + while i > 48 do + see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) + see1 = mix(readLongLE(buf, p + 16) ^ secret2, readLongLE(buf, p + 24) ^ see1) + see2 = mix(readLongLE(buf, p + 32) ^ secret3, readLongLE(buf, p + 40) ^ see2) + p += 48 + i -= 48 + end while + + see0 ^= see1 ^ see2 + while i > 16 do + see0 = mix(readLongLE(buf, p) ^ secret1, readLongLE(buf, p + 8) ^ see0) + i -= 16 + p += 16 + end while + + a = readLongLE(buf, off + len - 16) + b = readLongLE(buf, off + len - 8) + s = see0 + end if + finishHash(a, b, s, len) + end if + end hash + +end WyHash64VarHandle diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHashConstants.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHashConstants.scala new file mode 100644 index 000000000..3a10f8dbc --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/WyHashConstants.scala @@ -0,0 +1,17 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +object WyHashConstants: + final val PRIME64_0 = 0xa0761d6478bd642fL + final val PRIME64_1 = 0xe7037ed1a0b428dbL + final val PRIME64_2 = 0x8ebc6af09c88c6e3L + final val PRIME64_3 = 0x589965cc75374cc3L +end WyHashConstants diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64VarHandle.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64VarHandle.scala new file mode 100644 index 000000000..8418a8c4b --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHash64VarHandle.scala @@ -0,0 +1,222 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.lang.Long.rotateLeft +import java.nio.ByteBuffer +import VarHandleUtils.* +import XXHashConstants.* + +object XXHash64VarHandle: + private[sbt] val INSTANCE = new XXHash64VarHandle() +end XXHash64VarHandle + +/** + * The implementation is based on lz4-java. + * Copyright 2020 Linnaea Von Lavia and the lz4-java contributors. + * Licensed under the Apache License. + * + * Instances of this class are **not** thread-safe. + */ +class XXHash64VarHandle extends HashAlgo: + override def hash(buf: Array[Byte], offset: Int, len: Int, seed: Long): Long = + SafeUtils.checkRange(buf, offset, len) + + var off = offset + val end: Int = off + len + var h64: Long = 0L + + if len >= 32 then + val limit = end - 32 + var v1: Long = seed + PRIME64_1 + PRIME64_2 + var v2: Long = seed + PRIME64_2 + var v3: Long = seed + 0 + var v4: Long = seed - PRIME64_1 + while + v1 += readLongLE(buf, off) * PRIME64_2 + v1 = rotateLeft(v1, 31) + v1 *= PRIME64_1 + off += 8 + + v2 += readLongLE(buf, off) * PRIME64_2 + v2 = rotateLeft(v2, 31) + v2 *= PRIME64_1 + off += 8 + + v3 += readLongLE(buf, off) * PRIME64_2 + v3 = rotateLeft(v3, 31) + v3 *= PRIME64_1 + off += 8 + + v4 += readLongLE(buf, off) * PRIME64_2 + v4 = rotateLeft(v4, 31) + v4 = v4 * PRIME64_1 + off += 8 + off <= limit + do () + + h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18) + + v1 *= PRIME64_2 + v1 = rotateLeft(v1, 31) + v1 *= PRIME64_1 + h64 ^= v1 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v2 *= PRIME64_2 + v2 = rotateLeft(v2, 31) + v2 *= PRIME64_1 + h64 ^= v2 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v3 *= PRIME64_2 + v3 = rotateLeft(v3, 31) + v3 *= PRIME64_1 + h64 ^= v3 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v4 *= PRIME64_2 + v4 = rotateLeft(v4, 31) + v4 *= PRIME64_1 + h64 ^= v4 + h64 = h64 * PRIME64_1 + PRIME64_4 + else h64 = seed + PRIME64_5 + + h64 += len + + while off <= end - 8 do + var k1: Long = readLongLE(buf, off) + k1 *= PRIME64_2 + k1 = rotateLeft(k1, 31) + k1 *= PRIME64_1 + h64 ^= k1 + h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4 + off += 8 + + if off <= end - 4 then + h64 ^= (readIntLE(buf, off) & 0xffffffffL) * PRIME64_1 + h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3 + off += 4 + else () + + while off < end do + h64 ^= (readByte(buf, off) & 0xff) * PRIME64_5 + h64 = rotateLeft(h64, 11) * PRIME64_1 + off += 1 + + h64 ^= (h64 >>> 33) + h64 *= PRIME64_2 + h64 ^= (h64 >>> 29) + h64 *= PRIME64_3 + h64 ^= (h64 >>> 32) + + h64 + end hash + + override def hash(buffer: ByteBuffer, offset: Int, len: Int, seed: Long): Long = + if buffer.hasArray() then hash(buffer.array(), offset + buffer.arrayOffset(), len, seed) + else + var off = offset + ByteBufferUtils.checkRange(buffer, off, len) + val buf = ByteBufferUtils.inLittleEndianOrder(buffer) + + val end: Int = off + len + var h64: Long = 0L + + if len >= 32 then + val limit: Int = end - 32 + var v1: Long = seed + PRIME64_1 + PRIME64_2 + var v2: Long = seed + PRIME64_2 + var v3: Long = seed + 0 + var v4: Long = seed - PRIME64_1 + while + v1 = v1 + readLongLE(buf, off) * PRIME64_2 + v1 = rotateLeft(v1, 31) + v1 = v1 * PRIME64_1 + off = off + 8 + + v2 += readLongLE(buf, off) * PRIME64_2 + v2 = rotateLeft(v2, 31) + v2 *= PRIME64_1 + off = off + 8 + + v3 += readLongLE(buf, off) * PRIME64_2 + v3 = rotateLeft(v3, 31) + v3 *= PRIME64_1 + off = off + 8 + + v4 += readLongLE(buf, off) * PRIME64_2 + v4 = rotateLeft(v4, 31) + v4 *= PRIME64_1 + off = off + 8 + + off <= limit + do () + + h64 = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18) + + v1 *= PRIME64_2 + v1 = rotateLeft(v1, 31) + v1 *= PRIME64_1 + h64 ^= v1 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v2 *= PRIME64_2 + v2 = rotateLeft(v2, 31) + v2 *= PRIME64_1 + h64 ^= v2 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v3 *= PRIME64_2 + v3 = rotateLeft(v3, 31) + v3 *= PRIME64_1 + h64 ^= v3 + h64 = h64 * PRIME64_1 + PRIME64_4 + + v4 *= PRIME64_2 + v4 = rotateLeft(v4, 31) + v4 *= PRIME64_1 + h64 ^= v4 + h64 = h64 * PRIME64_1 + PRIME64_4 + else h64 = seed + PRIME64_5 + + h64 += len + + while off <= end - 8 do + var k1: Long = readLongLE(buf, off) + k1 *= PRIME64_2 + k1 = rotateLeft(k1, 31) + k1 *= PRIME64_1 + h64 ^= k1 + h64 = rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4 + off = off + 8 + + if off <= end - 4 then + h64 ^= (readIntLE(buf, off) & 0xffffffffL) * PRIME64_1 + h64 = rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3 + off = off + 4 + else () + + while off < end do + h64 ^= (readByte(buf, off) & 0xff) * PRIME64_5 + h64 = rotateLeft(h64, 11) * PRIME64_1 + off += 1 + + h64 ^= h64 >>> 33 + h64 *= PRIME64_2 + h64 ^= h64 >>> 29 + h64 *= PRIME64_3 + h64 ^= h64 >>> 32 + + h64 + end if + end hash + +end XXHash64VarHandle diff --git a/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHashConstants.scala b/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHashConstants.scala new file mode 100644 index 000000000..c5da28109 --- /dev/null +++ b/internal/util-control/src/main/scala/sbt/internal/util/hashing/XXHashConstants.scala @@ -0,0 +1,24 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +object XXHashConstants: + final val PRIME1 = -1640531535 + final val PRIME2 = -2048144777 + final val PRIME3 = -1028477379 + final val PRIME4 = 668265263 + final val PRIME5 = 374761393 + + final val PRIME64_1 = -7046029288634856825L // 11400714785074694791 + final val PRIME64_2 = -4417276706812531889L // 14029467366897019727 + final val PRIME64_3 = 1609587929392839161L + final val PRIME64_4 = -8796714831421723037L // 9650029242287828579 + final val PRIME64_5 = 2870177450012600261L +end XXHashConstants diff --git a/internal/util-control/src/test/scala/sbt/internal/util/AbstractHashTest.scala b/internal/util-control/src/test/scala/sbt/internal/util/AbstractHashTest.scala new file mode 100644 index 000000000..bd2e93ed3 --- /dev/null +++ b/internal/util-control/src/test/scala/sbt/internal/util/AbstractHashTest.scala @@ -0,0 +1,56 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import java.nio.ByteBuffer +import verify.BasicTestSuite + +abstract class AbstractHashTest extends BasicTestSuite: + def hash64: HashAlgo + def newStreaming(seed: Int): StreamingHashAlgo + def emptyHash: Long + def zeroHash: Long + + test("Hash empty array"): + val buf: Array[Byte] = Array[Byte](0) + val r = hash64.hash(buf, 0, 0, 0) + assert(r == emptyHash) + + test("Hash empty ByteBuffer"): + val buf: ByteBuffer = ByteBuffer.allocate(0) + val r = hash64.hash(buf, 0, 0, 0) + assert(r == emptyHash) + + test("Hash one byte array"): + val buf: Array[Byte] = Array[Byte](0) + val r = hash64.hash(buf, 0, 1, 0) + assert(r == zeroHash) + + test("Hash one byte ByteBuffer"): + val buf: ByteBuffer = ByteBuffer.allocate(1) + buf.put(0: Byte) + buf.rewind() + val r = hash64.hash(buf, 0, 1, 0) + assert(r == zeroHash) + + test("Streaming hash empty ByteBuffer"): + val hash = newStreaming(0) + try + assert(hash.getValue == emptyHash) + finally hash.close() + + test("Streaming one byte array"): + val hash = newStreaming(0) + try + val buf: Array[Byte] = Array[Byte](0) + hash.update(buf, 0, 1) + assert(hash.getValue == zeroHash) + finally hash.close() +end AbstractHashTest diff --git a/internal/util-control/src/test/scala/sbt/internal/util/FileSampleHashTest.scala b/internal/util-control/src/test/scala/sbt/internal/util/FileSampleHashTest.scala new file mode 100644 index 000000000..92fe09e0a --- /dev/null +++ b/internal/util-control/src/test/scala/sbt/internal/util/FileSampleHashTest.scala @@ -0,0 +1,44 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +import verify.BasicTestSuite +import sbt.io.IO +import sbt.io.syntax.* + +object FileSampleHashTest extends BasicTestSuite: + val emptyHash = -1205034819632174695L + val testHash = 2563739794714397383L + + test("Hash empty file"): + val hash64 = Hashing.samplingFileHashXXHash64(0) + IO.withTemporaryDirectory: dir => + val temp = dir / "test.txt" + IO.touch(temp) + val h = hash64.hash(temp) + assert(h == emptyHash) + + test("Hash small file"): + val hash64 = Hashing.samplingFileHashXXHash64(0) + IO.withTemporaryDirectory: dir => + val temp = dir / "test.txt" + IO.write(temp, "test") + val h = hash64.hash(temp) + assert(h == testHash) + + test("Hash medium file (1MB)"): + val hash64 = Hashing.samplingFileHashXXHash64(0) + IO.withTemporaryDirectory: dir => + val temp = dir / "test.txt" + val buf: Array[Byte] = Array.fill[Byte](1024)(0.toByte) + for i <- 0 until 1024 do IO.append(temp, buf) + val h = hash64.hash(temp) + assert(h == -5176567862428962592L) +end FileSampleHashTest diff --git a/internal/util-control/src/test/scala/sbt/internal/util/WyHashTest.scala b/internal/util-control/src/test/scala/sbt/internal/util/WyHashTest.scala new file mode 100644 index 000000000..b744c7784 --- /dev/null +++ b/internal/util-control/src/test/scala/sbt/internal/util/WyHashTest.scala @@ -0,0 +1,18 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +object WyHashTest extends AbstractHashTest: + override val hash64: HashAlgo = Hashing.wyhash64 + override def newStreaming(seed: Int): StreamingHashAlgo = + Hashing.newStreamingWyHash64(seed) + override val emptyHash = 290873116282709081L + override val zeroHash = -295637713410278011L +end WyHashTest diff --git a/internal/util-control/src/test/scala/sbt/internal/util/XXHashTest.scala b/internal/util-control/src/test/scala/sbt/internal/util/XXHashTest.scala new file mode 100644 index 000000000..e56d7994b --- /dev/null +++ b/internal/util-control/src/test/scala/sbt/internal/util/XXHashTest.scala @@ -0,0 +1,18 @@ +/* + * sbt + * Copyright 2023, Scala center + * Copyright 2011 - 2022, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + * + */ + +package sbt.internal.util.hashing + +object XXHashTest extends AbstractHashTest: + override val hash64: HashAlgo = Hashing.xxhash64 + override def newStreaming(seed: Int): StreamingHashAlgo = + Hashing.newStreamingXXHash64(seed) + override val emptyHash = -1205034819632174695L + override val zeroHash = -1642502924627794072L +end XXHashTest diff --git a/main-command/src/main/java/sbt/internal/BootServerSocket.java b/main-command/src/main/java/sbt/internal/BootServerSocket.java index 744b5116e..ace29c12a 100644 --- a/main-command/src/main/java/sbt/internal/BootServerSocket.java +++ b/main-command/src/main/java/sbt/internal/BootServerSocket.java @@ -28,7 +28,6 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; -import net.openhft.hashing.LongHashFunction; import org.scalasbt.ipcsocket.UnixDomainServerSocket; import org.scalasbt.ipcsocket.Win32NamedPipeServerSocket; import org.scalasbt.ipcsocket.Win32SecurityLevel; @@ -303,7 +302,9 @@ public class BootServerSocket implements AutoCloseable { public static String socketLocation(final Path base) throws UnsupportedEncodingException, IOException { final Path target = base.resolve("project").resolve("target"); - long hash = LongHashFunction.farmNa().hashBytes(target.toString().getBytes("UTF-8")); + long hash = + ((long) target.toString().hashCode() << 32) + | (target.toString().length() * 31 & 0xffffffffL); if (isWindows) { return "sbt-load" + hash; } else { diff --git a/project/Dependencies.scala b/project/Dependencies.scala index b5d164557..08b629b73 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -108,6 +108,7 @@ object Dependencies { val scalaCollectionCompat = "org.scala-lang.modules" %% "scala-collection-compat" % "2.14.0" val caffeine = "com.github.ben-manes.caffeine" % "caffeine" % "2.8.5" + val blake3 = "pt.kcry" %% "blake3" % "3.1.2" val hedgehog = "qa.hedgehog" %% "hedgehog-sbt" % "0.13.0" val disruptor = "com.lmax" % "disruptor" % "3.4.2" diff --git a/project/plugins.sbt b/project/plugins.sbt index a6a70315a..43bfde102 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -13,5 +13,6 @@ addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.4.0") addDependencyTreePlugin addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.14.5") addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.11.7") +addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.8") // libraryDependencies += "org.scala-sbt" %% "scripted-plugin" % sbtVersion.value diff --git a/util-cache/src/main/scala/sbt/internal/util/StringVirtualFile1.scala b/util-cache/src/main/scala/sbt/internal/util/StringVirtualFile1.scala index 9aebe0f4c..e235f0a3b 100644 --- a/util-cache/src/main/scala/sbt/internal/util/StringVirtualFile1.scala +++ b/util-cache/src/main/scala/sbt/internal/util/StringVirtualFile1.scala @@ -7,7 +7,7 @@ import xsbti.{ BasicVirtualFileRef, VirtualFile } case class StringVirtualFile1(path: String, content: String) extends BasicVirtualFileRef(path) with VirtualFile: - override def contentHash: Long = HashUtil.farmHash(content.getBytes("UTF-8")) + override def contentHash: Long = HashUtil.xxhash64(content.getBytes("UTF-8")) override def sizeBytes: Long = content.getBytes("UTF-8").size override def contentHashStr: String = import Digest.* diff --git a/util-cache/src/main/scala/sbt/util/Digest.scala b/util-cache/src/main/scala/sbt/util/Digest.scala index 2e95324f1..ac662a3f1 100644 --- a/util-cache/src/main/scala/sbt/util/Digest.scala +++ b/util-cache/src/main/scala/sbt/util/Digest.scala @@ -2,6 +2,7 @@ package sbt.util import sjsonnew.IsoString import sbt.io.Hash +import sbt.internal.util.hashing.Hashing import xsbti.HashedVirtualFileRef import java.io.{ BufferedInputStream, InputStream } import java.nio.ByteBuffer @@ -18,6 +19,10 @@ object Digest: private[sbt] val Sha256 = "sha256" private[sbt] val Sha384 = "sha384" private[sbt] val Sha512 = "sha512" + private[sbt] val Imoxx64 = "imoxx64" + private[sbt] val Imowy64 = "imowy64" + private[sbt] val Xx64 = "xx64" + private[sbt] val Wy64 = "wy64" extension (d: Digest) def contentHashStr: String = @@ -44,9 +49,24 @@ object Digest: apply(ref.contentHashStr() + "/" + ref.sizeBytes.toString) def apply(algo: String, path: Path): Digest = - Using.resource(Files.newInputStream(path)) { input => - apply(algo, hashBytes(algo, input), Files.size(path)) - } + algo match + case Imoxx64 => + val hash64 = Hashing.samplingFileHashXXHash64(0) + val h = hash64.hash(path) + apply(algo, longsToBytes(Array(h)), Files.size(path)) + case Imowy64 => + val hash64 = Hashing.samplingFileHashWyHash64(0) + val h = hash64.hash(path) + apply(algo, longsToBytes(Array(h)), Files.size(path)) + case Xx64 | Wy64 => + Using.resource(Files.newInputStream(path)) { input => + val h = hashBytesInternal(algo, input) + apply(algo, longsToBytes(Array(h)), Files.size(path)) + } + case _ => + Using.resource(Files.newInputStream(path)) { input => + apply(algo, hashBytes(algo, input), Files.size(path)) + } // used to wrap a Long value as a fake Digest, which will // later be hashed using sha256 anyway. @@ -55,6 +75,9 @@ object Digest: lazy val zero: Digest = dummy(0L) + private[sbt] def sha1Hash(path: Path): Digest = + apply(Sha1, path) + def sha256Hash(path: Path): Digest = apply(Sha256, path) def sha256Hash(bytes: Array[Byte]): Digest = @@ -67,6 +90,17 @@ object Digest: def sha256Hash(digests: Digest*): Digest = sha256Hash(digests.toSeq.map(_.toBytes).flatten.toArray[Byte]) + def imoxx64Hash(path: Path): Digest = apply(Imoxx64, path) + + def imowy64Hash(path: Path): Digest = apply(Imowy64, path) + + def xx64Hash(path: Path): Digest = apply(Xx64, path) + + def wy64Hash(path: Path): Digest = apply(Wy64, path) + + private[sbt] def md5Hash(bytes: Array[Byte]): Digest = + apply(Md5, hashBytes(Md5, bytes), bytes.length) + // first check the file size, then the hash def sameDigest(path: Path, digest: Digest): Boolean = if Files.size(path) != digest.sizeBytes then false @@ -90,6 +124,24 @@ object Digest: digest.digest } + // using our own hashing algorithms + private def hashBytesInternal(algo: String, input: InputStream): Long = + val BufferSize = 8192 + Using.resource(BufferedInputStream(input)) { bis => + val digest = algo match + case Xx64 => Hashing.newStreamingXXHash64(0) + case Wy64 => Hashing.newStreamingWyHash64(0) + val buf = new Array[Byte](BufferSize) + while + val readBytes = input.read(buf) + if readBytes >= 0 then digest.update(buf, 0, readBytes) + readBytes >= 0 + do () + val h = digest.getValue + digest.close() + h + } + private def validateString(s: String): Unit = parse(s) () @@ -100,6 +152,14 @@ object Digest: case head :: rest :: Nil => val subtokens = head :: rest.split("/").toList subtokens match + case (a @ Xx64) :: value :: sizeBytes :: Nil => + (a, value, sizeBytes.toLong, parseHex(value, 64)) + case (a @ Wy64) :: value :: sizeBytes :: Nil => + (a, value, sizeBytes.toLong, parseHex(value, 64)) + case (a @ Imoxx64) :: value :: sizeBytes :: Nil => + (a, value, sizeBytes.toLong, parseHex(value, 64)) + case (a @ Imowy64) :: value :: sizeBytes :: Nil => + (a, value, sizeBytes.toLong, parseHex(value, 64)) case (a @ Murmur3) :: value :: sizeBytes :: Nil => (a, value, sizeBytes.toLong, parseHex(value, 128)) case (a @ Md5) :: value :: sizeBytes :: Nil => diff --git a/util-cache/src/main/scala/sbt/util/HashUtil.scala b/util-cache/src/main/scala/sbt/util/HashUtil.scala index 3a5f976ea..ff26e9eba 100644 --- a/util-cache/src/main/scala/sbt/util/HashUtil.scala +++ b/util-cache/src/main/scala/sbt/util/HashUtil.scala @@ -1,24 +1,16 @@ package sbt.util -import java.nio.file.{ Files, Path } -import net.openhft.hashing.LongHashFunction +import java.nio.file.{ Path as NioPath } +import sbt.internal.util.hashing.Hashing object HashUtil: - private[sbt] def farmHash(bytes: Array[Byte]): Long = - LongHashFunction.farmNa().hashBytes(bytes) + private[sbt] def xxhash64(bytes: Array[Byte]): Long = + Hashing.xxhash64.hash(bytes, 0, bytes.size, 0) - private[sbt] def farmHash(path: Path): Long = - import sbt.io.Hash - // allocating many byte arrays for large files may lead to OOME - // but it is more efficient for small files - val largeFileLimit = 10 * 1024 * 1024 + private[sbt] def imohash64(path: NioPath): Long = + val hash64 = Hashing.samplingFileHashWyHash64(0) + hash64.hash(path) - if Files.size(path) < largeFileLimit then farmHash(Files.readAllBytes(path)) - else farmHash(Hash(path.toFile)) - - private[sbt] def farmHashStr(path: Path): String = - "farm64-" + farmHash(path).toHexString - - private[sbt] def toFarmHashString(digest: Long): String = - s"farm64-${digest.toHexString}" + private[sbt] def imohash64Str(path: NioPath): String = + "imoxx64-" + imohash64(path).toHexString end HashUtil diff --git a/util-cache/src/main/scala/sbt/util/PathHashWriters.scala b/util-cache/src/main/scala/sbt/util/PathHashWriters.scala index f9549db6c..a86f8474c 100644 --- a/util-cache/src/main/scala/sbt/util/PathHashWriters.scala +++ b/util-cache/src/main/scala/sbt/util/PathHashWriters.scala @@ -24,11 +24,11 @@ object StringStrings: given Conversion[HashedVirtualFileRef, StringString] = (x: HashedVirtualFileRef) => StringString(x.id, x.contentHashStr) given Conversion[File, StringString] = - (x: File) => StringString(x.toString(), HashUtil.farmHashStr(x.toPath())) + (x: File) => StringString(x.toString(), HashUtil.imohash64Str(x.toPath())) given Conversion[Path, StringString] = - (x: Path) => StringString(x.toString(), HashUtil.farmHashStr(x)) + (x: Path) => StringString(x.toString(), HashUtil.imohash64Str(x)) given Conversion[VirtualFile, StringString] = - (x: VirtualFile) => StringString(x.id, s"farm64-${x.contentHash.toHexString}") + (x: VirtualFile) => StringString(x.id, s"xx64-${x.contentHash.toHexString}") given HashWriter[StringString] = new HashWriter[StringString]: def write[J](obj: StringString, builder: Builder[J]): Unit = diff --git a/util-cache/src/test/scala/sbt/util/DigestTest.scala b/util-cache/src/test/scala/sbt/util/DigestTest.scala index 15c5de7d2..57b9a2c7b 100644 --- a/util-cache/src/test/scala/sbt/util/DigestTest.scala +++ b/util-cache/src/test/scala/sbt/util/DigestTest.scala @@ -42,6 +42,34 @@ object DigestTest extends verify.BasicTestSuite: testEmptyFile("sha512", expected) } + test("imoxx64") { + val expected = Digest( + "imoxx64-ef46db3751d8e999/0" + ) + testEmptyFile("imoxx64", expected) + } + + test("imowy64") { + val expected = Digest( + "imowy64-0409638ee2bde459/0" + ) + testEmptyFile("imowy64", expected) + } + + test("xx64") { + val expected = Digest( + "xx64-ef46db3751d8e999/0" + ) + testEmptyFile("xx64", expected) + } + + test("wy64") { + val expected = Digest( + "wy64-0409638ee2bde459/0" + ) + testEmptyFile("wy64", expected) + } + test("digest composition") { val dummy1 = Digest.dummy(0L) val dummy2 = Digest.dummy(0L) diff --git a/util-cache/src/test/scala/sbt/util/HasherTest.scala b/util-cache/src/test/scala/sbt/util/HasherTest.scala index ad881a43d..67528c06f 100644 --- a/util-cache/src/test/scala/sbt/util/HasherTest.scala +++ b/util-cache/src/test/scala/sbt/util/HasherTest.scala @@ -11,6 +11,7 @@ object HasherTest extends BasicTestSuite: final val blankContentHash = -7286425919675154353L val blankContentHashStr = "farm64-9ae16a3b2f90404f" final val blankATxtHash = 1166939303L + final val blankATxtXX64 = -541480681L test("The IntJsonFormat should convert an Int to an int hash") { import BasicJsonProtocol.given @@ -36,7 +37,7 @@ object HasherTest extends BasicTestSuite: import PathHashWriters.given val x = StringVirtualFile1("a.txt", "") val actual = Hasher.hashUnsafe(x) - assert(actual == blankATxtHash) + assert(actual == blankATxtXX64) } test("tuple") {