mirror of https://github.com/sbt/sbt.git
Port imohash to Scala
This commit is contained in:
parent
c40beae1ff
commit
47c73b701d
24
build.sbt
24
build.sbt
|
|
@ -292,16 +292,18 @@ lazy val utilInterface = (project in file("internal") / "util-interface").settin
|
|||
mimaSettings,
|
||||
)
|
||||
|
||||
lazy val utilControl = (project in file("internal") / "util-control").settings(
|
||||
utilCommonSettings,
|
||||
name := "Util Control",
|
||||
libraryDependencies ++= Seq(
|
||||
scalacheck % Test,
|
||||
scalaVerify % Test,
|
||||
hedgehog % Test,
|
||||
),
|
||||
mimaSettings,
|
||||
)
|
||||
lazy val utilControl = (project in file("internal") / "util-control")
|
||||
.settings(
|
||||
utilCommonSettings,
|
||||
name := "Util Control",
|
||||
libraryDependencies ++= Seq(
|
||||
scalacheck % Test,
|
||||
scalaVerify % Test,
|
||||
hedgehog % Test,
|
||||
),
|
||||
mimaSettings,
|
||||
)
|
||||
.configure(addSbtIOForTest)
|
||||
|
||||
lazy val utilPosition = (project in file("internal") / "util-position")
|
||||
.settings(
|
||||
|
|
@ -374,7 +376,7 @@ lazy val utilCache = project
|
|||
// we generate JsonCodec only for actionresult.contra
|
||||
JsonCodecPlugin,
|
||||
)
|
||||
.dependsOn(utilLogging)
|
||||
.dependsOn(utilLogging, utilControl)
|
||||
.settings(
|
||||
testedBaseSettings,
|
||||
name := "Util Cache",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* sbt
|
||||
* Copyright 2023, Scala center
|
||||
* Copyright 2011 - 2022, Lightbend, Inc.
|
||||
* Copyright 2008 - 2010, Mark Harrah
|
||||
* Licensed under Apache License 2.0 (see LICENSE)
|
||||
*
|
||||
*/
|
||||
|
||||
package sbt.internal.util.hashing
|
||||
|
||||
import java.io.File
|
||||
import java.nio.file.{ Path as NioPath }
|
||||
|
||||
trait FileHash:
|
||||
def hash(file: File): Long
|
||||
def hash(file: NioPath): Long
|
||||
override def toString(): String =
|
||||
getClass().getSimpleName()
|
||||
end FileHash
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* sbt
|
||||
* Copyright 2023, Scala center
|
||||
* Copyright 2011 - 2022, Lightbend, Inc.
|
||||
* Copyright 2008 - 2010, Mark Harrah
|
||||
* Licensed under Apache License 2.0 (see LICENSE)
|
||||
*
|
||||
*/
|
||||
|
||||
package sbt.internal.util.hashing
|
||||
|
||||
import java.io.{ File, RandomAccessFile }
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.file.{ Path as NioPath }
|
||||
import scala.util.Using
|
||||
|
||||
object FileSampleHash:
|
||||
final val defaultSampleBytes = 16 * 1024
|
||||
final val defaultThresoldBytes = 128L * 1024L
|
||||
|
||||
def apply(underlying: StreamingHashAlgo): FileSampleHash =
|
||||
new FileSampleHash(defaultSampleBytes, defaultThresoldBytes, underlying)
|
||||
end FileSampleHash
|
||||
|
||||
/**
|
||||
* Based on Imohash https://github.com/kalafut/imohash/blob/master/algorithm.md
|
||||
*/
|
||||
class FileSampleHash(sampleBytes: Int, thresholdBytes: Long, underlying: StreamingHashAlgo)
|
||||
extends FileHash:
|
||||
require(sampleBytes >= 0)
|
||||
|
||||
val buffer: Array[Byte] = new Array[Byte](4096)
|
||||
|
||||
override def hash(file: NioPath): Long =
|
||||
hash(file.toFile())
|
||||
|
||||
override def hash(file: File): Long =
|
||||
Using.resource(new RandomAccessFile(file, "r")): raf =>
|
||||
hash(raf, raf.length())
|
||||
|
||||
private def hash(input: RandomAccessFile, fileLength: Long): Long =
|
||||
underlying.reset()
|
||||
if fileLength < thresholdBytes || sampleBytes < 1 then hashBytes(input, fileLength)
|
||||
else
|
||||
hashBytes(input, sampleBytes)
|
||||
// skip to halfway point
|
||||
input.seek(fileLength / 2)
|
||||
hashBytes(input, sampleBytes)
|
||||
input.seek(fileLength - sampleBytes)
|
||||
hashBytes(input, sampleBytes)
|
||||
|
||||
// write file size
|
||||
if fileLength > 0 then
|
||||
val sizeBuf = ByteBuffer.allocate(java.lang.Long.BYTES)
|
||||
sizeBuf.putLong(fileLength)
|
||||
underlying.update(sizeBuf.array(), 0, sizeBuf.array().size)
|
||||
|
||||
underlying.getValue
|
||||
end hash
|
||||
|
||||
private def hashBytes(input: RandomAccessFile, toHash: Long): Unit =
|
||||
var remaining: Long = toHash
|
||||
var pos = 0
|
||||
while remaining > 0 do
|
||||
val toread = math.min(buffer.size - pos, remaining).toInt
|
||||
val bytesRead = input.read(buffer, pos, toread)
|
||||
if bytesRead < 0 then sys.error("unexpected EOF")
|
||||
pos += bytesRead
|
||||
remaining -= bytesRead
|
||||
if pos >= buffer.length then
|
||||
underlying.update(buffer, 0, buffer.length)
|
||||
pos = 0
|
||||
if pos > 0 then underlying.update(buffer, 0, pos)
|
||||
end hashBytes
|
||||
end FileSampleHash
|
||||
|
|
@ -17,4 +17,8 @@ object Hashing:
|
|||
new StreamingXXHash64VarHandle(seed)
|
||||
def newStreamingWyHash64(seed: Long): StreamingHashAlgo =
|
||||
new StreamingWyHash64VarHandle(seed)
|
||||
def samplingFileHashXXHash64(seed: Long): FileHash =
|
||||
FileSampleHash(newStreamingXXHash64(seed))
|
||||
def samplingFileHashWyHash64(seed: Long): FileHash =
|
||||
FileSampleHash(newStreamingWyHash64(seed))
|
||||
end Hashing
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* sbt
|
||||
* Copyright 2023, Scala center
|
||||
* Copyright 2011 - 2022, Lightbend, Inc.
|
||||
* Copyright 2008 - 2010, Mark Harrah
|
||||
* Licensed under Apache License 2.0 (see LICENSE)
|
||||
*
|
||||
*/
|
||||
|
||||
package sbt.internal.util.hashing
|
||||
|
||||
import verify.BasicTestSuite
|
||||
import sbt.io.IO
|
||||
import sbt.io.syntax.*
|
||||
|
||||
object FileSampleHashTest extends BasicTestSuite:
|
||||
val emptyHash = -1205034819632174695L
|
||||
val testHash = 2563739794714397383L
|
||||
|
||||
test("Hash empty file"):
|
||||
val hash64 = Hashing.samplingFileHashXXHash64(0)
|
||||
IO.withTemporaryDirectory: dir =>
|
||||
val temp = dir / "test.txt"
|
||||
IO.touch(temp)
|
||||
val h = hash64.hash(temp)
|
||||
assert(h == emptyHash)
|
||||
|
||||
test("Hash small file"):
|
||||
val hash64 = Hashing.samplingFileHashXXHash64(0)
|
||||
IO.withTemporaryDirectory: dir =>
|
||||
val temp = dir / "test.txt"
|
||||
IO.write(temp, "test")
|
||||
val h = hash64.hash(temp)
|
||||
assert(h == testHash)
|
||||
|
||||
test("Hash medium file (1MB)"):
|
||||
val hash64 = Hashing.samplingFileHashXXHash64(0)
|
||||
IO.withTemporaryDirectory: dir =>
|
||||
val temp = dir / "test.txt"
|
||||
val buf: Array[Byte] = Array.fill[Byte](1024)(0.toByte)
|
||||
for i <- 0 until 1024 do IO.append(temp, buf)
|
||||
val h = hash64.hash(temp)
|
||||
assert(h == -5176567862428962592L)
|
||||
end FileSampleHashTest
|
||||
|
|
@ -2,6 +2,7 @@ package sbt.util
|
|||
|
||||
import sjsonnew.IsoString
|
||||
import sbt.io.Hash
|
||||
import sbt.internal.util.hashing.Hashing
|
||||
import xsbti.HashedVirtualFileRef
|
||||
import java.io.{ BufferedInputStream, InputStream }
|
||||
import java.nio.ByteBuffer
|
||||
|
|
@ -18,6 +19,10 @@ object Digest:
|
|||
private[sbt] val Sha256 = "sha256"
|
||||
private[sbt] val Sha384 = "sha384"
|
||||
private[sbt] val Sha512 = "sha512"
|
||||
private[sbt] val Imoxx64 = "imoxx64"
|
||||
private[sbt] val Imowy64 = "imowy64"
|
||||
private[sbt] val Xx64 = "xx64"
|
||||
private[sbt] val Wy64 = "wy64"
|
||||
|
||||
extension (d: Digest)
|
||||
def contentHashStr: String =
|
||||
|
|
@ -44,9 +49,24 @@ object Digest:
|
|||
apply(ref.contentHashStr() + "/" + ref.sizeBytes.toString)
|
||||
|
||||
def apply(algo: String, path: Path): Digest =
|
||||
Using.resource(Files.newInputStream(path)) { input =>
|
||||
apply(algo, hashBytes(algo, input), Files.size(path))
|
||||
}
|
||||
algo match
|
||||
case Imoxx64 =>
|
||||
val hash64 = Hashing.samplingFileHashXXHash64(0)
|
||||
val h = hash64.hash(path)
|
||||
apply(algo, longsToBytes(Array(h)), Files.size(path))
|
||||
case Imowy64 =>
|
||||
val hash64 = Hashing.samplingFileHashWyHash64(0)
|
||||
val h = hash64.hash(path)
|
||||
apply(algo, longsToBytes(Array(h)), Files.size(path))
|
||||
case Xx64 | Wy64 =>
|
||||
Using.resource(Files.newInputStream(path)) { input =>
|
||||
val h = hashBytesInternal(algo, input)
|
||||
apply(algo, longsToBytes(Array(h)), Files.size(path))
|
||||
}
|
||||
case _ =>
|
||||
Using.resource(Files.newInputStream(path)) { input =>
|
||||
apply(algo, hashBytes(algo, input), Files.size(path))
|
||||
}
|
||||
|
||||
// used to wrap a Long value as a fake Digest, which will
|
||||
// later be hashed using sha256 anyway.
|
||||
|
|
@ -67,6 +87,10 @@ object Digest:
|
|||
def sha256Hash(digests: Digest*): Digest =
|
||||
sha256Hash(digests.toSeq.map(_.toBytes).flatten.toArray[Byte])
|
||||
|
||||
def imoxx64Hash(path: Path): Digest = apply(Imoxx64, path)
|
||||
|
||||
def xx64Hash(path: Path): Digest = apply(Xx64, path)
|
||||
|
||||
private[sbt] def md5Hash(bytes: Array[Byte]): Digest =
|
||||
apply(Md5, hashBytes(Md5, bytes), bytes.length)
|
||||
|
||||
|
|
@ -93,6 +117,24 @@ object Digest:
|
|||
digest.digest
|
||||
}
|
||||
|
||||
// using our own hashing algorithms
|
||||
private def hashBytesInternal(algo: String, input: InputStream): Long =
|
||||
val BufferSize = 8192
|
||||
Using.resource(BufferedInputStream(input)) { bis =>
|
||||
val digest = algo match
|
||||
case Xx64 => Hashing.newStreamingXXHash64(0)
|
||||
case Wy64 => Hashing.newStreamingWyHash64(0)
|
||||
val buf = new Array[Byte](BufferSize)
|
||||
while
|
||||
val readBytes = input.read(buf)
|
||||
if readBytes >= 0 then digest.update(buf, 0, readBytes)
|
||||
readBytes >= 0
|
||||
do ()
|
||||
val h = digest.getValue
|
||||
digest.close()
|
||||
h
|
||||
}
|
||||
|
||||
private def validateString(s: String): Unit =
|
||||
parse(s)
|
||||
()
|
||||
|
|
@ -103,6 +145,14 @@ object Digest:
|
|||
case head :: rest :: Nil =>
|
||||
val subtokens = head :: rest.split("/").toList
|
||||
subtokens match
|
||||
case (a @ Xx64) :: value :: sizeBytes :: Nil =>
|
||||
(a, value, sizeBytes.toLong, parseHex(value, 64))
|
||||
case (a @ Wy64) :: value :: sizeBytes :: Nil =>
|
||||
(a, value, sizeBytes.toLong, parseHex(value, 64))
|
||||
case (a @ Imoxx64) :: value :: sizeBytes :: Nil =>
|
||||
(a, value, sizeBytes.toLong, parseHex(value, 64))
|
||||
case (a @ Imowy64) :: value :: sizeBytes :: Nil =>
|
||||
(a, value, sizeBytes.toLong, parseHex(value, 64))
|
||||
case (a @ Murmur3) :: value :: sizeBytes :: Nil =>
|
||||
(a, value, sizeBytes.toLong, parseHex(value, 128))
|
||||
case (a @ Md5) :: value :: sizeBytes :: Nil =>
|
||||
|
|
|
|||
|
|
@ -42,6 +42,34 @@ object DigestTest extends verify.BasicTestSuite:
|
|||
testEmptyFile("sha512", expected)
|
||||
}
|
||||
|
||||
test("imoxx64") {
|
||||
val expected = Digest(
|
||||
"imoxx64-ef46db3751d8e999/0"
|
||||
)
|
||||
testEmptyFile("imoxx64", expected)
|
||||
}
|
||||
|
||||
test("imowy64") {
|
||||
val expected = Digest(
|
||||
"imowy64-0409638ee2bde459/0"
|
||||
)
|
||||
testEmptyFile("imowy64", expected)
|
||||
}
|
||||
|
||||
test("xx64") {
|
||||
val expected = Digest(
|
||||
"xx64-ef46db3751d8e999/0"
|
||||
)
|
||||
testEmptyFile("xx64", expected)
|
||||
}
|
||||
|
||||
test("wy64") {
|
||||
val expected = Digest(
|
||||
"wy64-0409638ee2bde459/0"
|
||||
)
|
||||
testEmptyFile("wy64", expected)
|
||||
}
|
||||
|
||||
test("digest composition") {
|
||||
val dummy1 = Digest.dummy(0L)
|
||||
val dummy2 = Digest.dummy(0L)
|
||||
|
|
|
|||
Loading…
Reference in New Issue