Implement `managedChecksums` in ivy

Managed checksums tells ivy to forget about checking checksums for jar
files and just persist them in the cache.

The user that enables that option will take care of verifying they are
correct.

This is a big deal because:

1. Ivy takes *a lot of time* checking timestamps for big jars, and does
   it sequentially. The user (sbt) can do better by running these checks
   in parallel, speeding up the whole resolution process!
2. The fact that the sha files are not present in the cache means that
   build tools cannot check if a checksum is correct without preprocessing
   the jar.

Note that the user is responsible for keeping the cache consistent. If
the checksum is not correct, the user should report it and overwrite the
checksum file with the valid digest.
This commit is contained in:
jvican 2017-05-26 12:10:29 +02:00
parent 4c8036c7cb
commit 7bf60557d9
No known key found for this signature in database
GPG Key ID: 42DAFA0F112E8050
6 changed files with 159 additions and 47 deletions

View File

@ -141,18 +141,24 @@ private[sbt] object ConvertResolver {
def apply(r: Resolver, settings: IvySettings, log: Logger): DependencyResolver =
apply(r, settings, UpdateOptions(), log)
private[librarymanagement] val ManagedChecksums = "managedChecksums"
/** Converts the given sbt resolver into an Ivy resolver. */
def apply(
r: Resolver,
settings: IvySettings,
updateOptions: UpdateOptions,
log: Logger
): DependencyResolver =
): DependencyResolver = {
// Pass in to the resolver converter the update options via ivy settings
settings.setVariable(ManagedChecksums, updateOptions.managedChecksums.toString)
(updateOptions.resolverConverter orElse defaultConvert)((r, settings, log))
}
/** The default implementation of converter. */
lazy val defaultConvert: ResolverConverter = {
case (r, settings, log) =>
val managedChecksums = settings.getVariable(ManagedChecksums).toBoolean
r match {
case repo: MavenRepository => {
val pattern = Collections.singletonList(
@ -162,6 +168,7 @@ private[sbt] object ConvertResolver {
extends IBiblioResolver
with ChecksumFriendlyURLResolver
with DescriptorRequired {
override val managedChecksumsEnabled: Boolean = managedChecksums
override def getResource(resource: Resource, dest: File): Long = get(resource, dest)
def setPatterns(): Unit = {
// done this way for access to protected methods.
@ -178,6 +185,7 @@ private[sbt] object ConvertResolver {
}
case repo: SshRepository => {
val resolver = new SshResolver with DescriptorRequired {
override val managedChecksumsEnabled: Boolean = managedChecksums
override def getResource(resource: Resource, dest: File): Long = get(resource, dest)
}
initializeSSHResolver(resolver, repo, settings)
@ -196,6 +204,7 @@ private[sbt] object ConvertResolver {
// in local files for non-changing revisions.
// This will be fully enforced in sbt 1.0.
setRepository(new WarnOnOverwriteFileRepo())
override val managedChecksumsEnabled: Boolean = managedChecksums
override def getResource(resource: Resource, dest: File): Long = get(resource, dest)
}
resolver.setName(repo.name)
@ -207,6 +216,7 @@ private[sbt] object ConvertResolver {
}
case repo: URLRepository => {
val resolver = new URLResolver with ChecksumFriendlyURLResolver with DescriptorRequired {
override val managedChecksumsEnabled: Boolean = managedChecksums
override def getResource(resource: Resource, dest: File): Long = get(resource, dest)
}
resolver.setName(repo.name)
@ -223,55 +233,61 @@ private[sbt] object ConvertResolver {
// Works around implementation restriction to access protected method `get`
def getResource(resource: Resource, dest: File): Long
override def getAndCheck(resource: Resource, dest: File): Long = {
// Follows the same semantics that private method `check` as defined in ivy `BasicResolver`
def check(resource: Resource, destination: File, algorithm: String) = {
if (!ChecksumHelper.isKnownAlgorithm(algorithm)) {
throw new IllegalArgumentException(s"Unknown checksum algorithm: $algorithm")
}
val checksumResource = resource.clone(s"${resource.getName}.$algorithm")
if (checksumResource.exists) {
Message.debug(s"$algorithm file found for $resource: checking...")
val checksumFile = File.createTempFile("ivytmp", algorithm)
try {
getResource(checksumResource, checksumFile)
try {
ChecksumHelper.check(dest, checksumFile, algorithm)
Message.verbose(s"$algorithm OK for $resource")
true
} catch {
case e: IOException =>
dest.delete()
throw e
}
} finally {
checksumFile.delete()
}
} else false
}
/**
* Defines an option to tell ivy to disable checksums when downloading and
* let the user handle verifying these checksums.
*
* This means that the checksums are stored in the ivy cache directory. This
* is good for reproducibility from outside ivy. Sbt can check that jars are
* not corrupted, ever, independently of trusting whatever it's there in the
* local directory.
*/
def managedChecksumsEnabled: Boolean
val size = getResource(resource, dest)
val checksums = getChecksumAlgorithms
checksums.foldLeft(false) { (failed, checksum) =>
// Continue checking until we hit a failure
if (failed) failed
else check(resource, dest, checksum)
import sbt.io.syntax._
private def downloadChecksum(resource: Resource,
target: File,
targetChecksumFile: File,
algorithm: String): Boolean = {
if (!ChecksumHelper.isKnownAlgorithm(algorithm))
throw new IllegalArgumentException(s"Unknown checksum algorithm: $algorithm")
val checksumResource = resource.clone(s"${resource.getName}.$algorithm")
if (!checksumResource.exists) false
else {
Message.debug(s"$algorithm file found for $resource: downloading...")
// Resource must be cleaned up outside of this function if it's invalid
getResource(checksumResource, targetChecksumFile)
true
}
size
}
var i = 0
private final val PartEnd = ".part"
private final val JarEnd = ".jar"
private final val TemporaryJar = JarEnd + PartEnd
override def getAndCheck(resource: Resource, target: File): Long = {
val targetPath = target.getAbsolutePath
if (!managedChecksumsEnabled || !targetPath.endsWith(TemporaryJar)) {
super.getAndCheck(resource, target)
} else {
// This is where we differ from ivy behaviour
val size = getResource(resource, target)
val checksumAlgorithms = getChecksumAlgorithms
checksumAlgorithms.foldLeft(false) { (checked, algorithm) =>
// Continue checking until we hit a failure
val checksumFile = new File(targetPath.stripSuffix(PartEnd) + s".$algorithm")
if (checked) checked
else downloadChecksum(resource, target, checksumFile, algorithm)
}
size
}
}
override def getDependency(dd: DependencyDescriptor, data: ResolveData) = {
val moduleID = IvyRetrieve.toModuleID(dd.getDependencyRevisionId)
print(" " * i)
println(s"Downloading and checking for $moduleID")
i += 2
val prev = descriptorString(isAllownomd)
setDescriptor(descriptorString(hasExplicitURL(dd)))
val t = try super.getDependency(dd, data)
finally setDescriptor(prev)
i -= 2
print(" " * i)
println(s"End $moduleID")
t
}
def descriptorString(optional: Boolean) =

View File

@ -540,6 +540,7 @@ object IvyActions {
report: UpdateReport,
config: RetrieveConfiguration
): UpdateReport = {
val copyChecksums = ivy.getVariable(ConvertResolver.ManagedChecksums).toBoolean
val toRetrieve = config.configurationsToRetrieve
val base = config.retrieveDirectory
val pattern = config.outputPattern
@ -551,9 +552,9 @@ object IvyActions {
val toCopy = new collection.mutable.HashSet[(File, File)]
val retReport = report retrieve { (conf, mid, art, cached) =>
configurationNames match {
case None => performRetrieve(conf, mid, art, base, pattern, cached, toCopy)
case None => performRetrieve(conf, mid, art, base, pattern, cached, copyChecksums, toCopy)
case Some(names) if names(conf) =>
performRetrieve(conf, mid, art, base, pattern, cached, toCopy)
performRetrieve(conf, mid, art, base, pattern, cached, copyChecksums, toCopy)
case _ => cached
}
}
@ -577,10 +578,27 @@ object IvyActions {
base: File,
pattern: String,
cached: File,
copyChecksums: Boolean,
toCopy: collection.mutable.HashSet[(File, File)]
): File = {
val to = retrieveTarget(conf, mid, art, base, pattern)
toCopy += ((cached, to))
if (copyChecksums) {
// Copy over to the lib managed directory any checksum for a jar if it exists
// TODO(jvican): Support user-provided checksums
val cachePath = cached.getAbsolutePath
IvySbt.DefaultChecksums.foreach { checksum =>
if (cachePath.endsWith(".jar")) {
val cacheChecksum = new File(s"$cachePath.$checksum")
if (cacheChecksum.exists()) {
val toChecksum = new File(s"${to.getAbsolutePath}.$checksum")
toCopy += ((cacheChecksum, toChecksum))
}
}
}
}
to
}

View File

@ -9,6 +9,9 @@ import sbt.util.Logger
* While UpdateConfiguration is passed into update at runtime,
* UpdateOption is intended to be used while setting up the Ivy object.
*
* @param managedChecksums Managed checksums tells ivy whether it should only download the
* checksum files and let the caller handle the verification.
*
* See also UpdateConfiguration in IvyActions.scala.
*/
final class UpdateOptions private[sbt] (
@ -22,7 +25,7 @@ final class UpdateOptions private[sbt] (
val consolidatedResolution: Boolean,
// If set to true, use cached resolution.
val cachedResolution: Boolean,
// If set to true, use cached resolution.
// If set to true, use managed checksums.
val managedChecksums: Boolean,
// Extension point for an alternative resolver converter.
val resolverConverter: UpdateOptions.ResolverConverter,

View File

@ -53,7 +53,7 @@ trait BaseIvySpecification extends UnitSpec {
def mkIvyConfiguration(uo: UpdateOptions): IvyConfiguration = {
val paths = IvyPaths(currentBase, Some(currentTarget))
val other = Vector.empty
val check = IvySbt.DefaultChecksums.headOption.toVector
val check = Vector.empty
val moduleConfs = Vector(ModuleConfiguration("*", chainResolver))
val resCacheDir = currentTarget / "resolution-cache"
new InlineIvyConfiguration(paths,

View File

@ -3,7 +3,7 @@ package sbt.librarymanagement
import org.scalatest.Assertion
import sbt.internal.librarymanagement._
import sbt.internal.librarymanagement.impl.DependencyBuilders
import sbt.io.IO
import sbt.io.{ FileFilter, IO, Path }
class OfflineModeSpec extends BaseIvySpecification with DependencyBuilders {
private final def targetDir = Some(currentDependency)

View File

@ -0,0 +1,75 @@
package sbt.librarymanagement
import java.io.File
import org.apache.ivy.util.Message
import org.scalatest.Assertion
import sbt.internal.librarymanagement.{
BaseIvySpecification,
InlineIvyConfiguration,
IvyActions,
IvyConfiguration,
IvyPaths,
IvySbt,
LogicalClock,
UnresolvedWarningConfiguration
}
import sbt.internal.librarymanagement.impl.DependencyBuilders
import sbt.io.IO
class ManagedChecksumsSpec extends BaseIvySpecification with DependencyBuilders {
private final def targetDir = Some(currentDependency)
private final def onlineConf = makeUpdateConfiguration(false)
private final def warningConf = UnresolvedWarningConfiguration()
private final def noClock = LogicalClock.unknown
private final val Checksum = "sha1"
def avro177 = ModuleID("org.apache.avro", "avro", "1.7.7")
def dataAvro1940 = ModuleID("com.linkedin.pegasus", "data-avro", "1.9.40")
def netty320 = ModuleID("org.jboss.netty", "netty", "3.2.0.Final")
final def dependencies: Vector[ModuleID] =
Vector(avro177, dataAvro1940, netty320).map(_.withConfigurations(Some("compile")))
import sbt.io.syntax._
override def mkIvyConfiguration(uo: UpdateOptions): IvyConfiguration = {
val paths = IvyPaths(currentBase, Some(currentTarget))
val other = Vector.empty
val check = Vector(Checksum)
val moduleConfs = Vector(ModuleConfiguration("*", chainResolver))
val resCacheDir = currentTarget / "resolution-cache"
new InlineIvyConfiguration(paths,
resolvers,
other,
moduleConfs,
None,
check,
Some(resCacheDir),
uo,
log)
}
def cleanAll(): Unit = {
cleanIvyCache()
IO.delete(currentTarget)
IO.delete(currentManaged)
IO.delete(currentDependency)
}
def assertChecksumExists(file: File) = {
val shaFile = new File(file.getAbsolutePath + s".$Checksum")
Message.info(s"Checking $shaFile exists...")
assert(shaFile.exists(), s"The checksum $Checksum for $file does not exist")
}
"Managed checksums" should "should download the checksum files" in {
cleanAll()
val updateOptions = UpdateOptions().withManagedChecksums(true)
val toResolve = module(defaultModuleId, dependencies, None, updateOptions)
val res = IvyActions.updateEither(toResolve, onlineConf, warningConf, noClock, targetDir, log)
assert(res.isRight, s"Resolution with managed checksums failed! $res")
val updateReport = res.right.get
val allModuleReports = updateReport.configurations.flatMap(_.modules)
val allArtifacts: Seq[File] = allModuleReports.flatMap(_.artifacts.map(_._2))
allArtifacts.foreach(assertChecksumExists)
}
}