Move assembly generation stuff to coursier.cli.util.Assembly

This commit is contained in:
Alexandre Archambault 2018-04-30 00:02:03 +02:00
parent 23b8ef67f7
commit 7d40ba0134
3 changed files with 138 additions and 130 deletions

View File

@ -6,7 +6,7 @@ import java.net.URLClassLoader
import caseapp._
import coursier.Dependency
import coursier.cli.options.SparkSubmitOptions
import coursier.cli.spark.{Assembly, Submit}
import coursier.cli.spark.{SparkAssembly, Submit}
/**
@ -85,7 +85,7 @@ object SparkSubmit extends CaseApp[SparkSubmitOptions] {
val (sparkYarnExtraConf, sparkBaseJars) =
if (!options.autoAssembly || sparkVersion.startsWith("2.")) {
val assemblyJars = Assembly.sparkJars(
val assemblyJars = SparkAssembly.sparkJars(
scalaVersion,
sparkVersion,
options.yarnVersion,
@ -107,7 +107,7 @@ object SparkSubmit extends CaseApp[SparkSubmitOptions] {
(extraConf, assemblyJars)
} else {
val assemblyAndJarsOrError = Assembly.spark(
val assemblyAndJarsOrError = SparkAssembly.spark(
scalaVersion,
sparkVersion,
options.yarnVersion,

View File

@ -1,142 +1,28 @@
package coursier.cli.spark
import java.io.{File, FileInputStream, FileOutputStream, OutputStream}
import java.io.{File, FileOutputStream}
import java.math.BigInteger
import java.nio.charset.StandardCharsets.UTF_8
import java.nio.file.{Files, StandardCopyOption}
import java.security.MessageDigest
import java.util.jar.{Attributes, JarFile, JarOutputStream, Manifest}
import java.util.regex.Pattern
import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream}
import java.util.jar.JarFile
import coursier.Cache
import coursier.cli.Helper
import coursier.cli.options.CommonOptions
import coursier.cli.util.Zip
import coursier.cli.util.Assembly
import scala.collection.mutable
object SparkAssembly {
object Assembly {
sealed abstract class Rule extends Product with Serializable
object Rule {
sealed abstract class PathRule extends Rule {
def path: String
}
final case class Exclude(path: String) extends PathRule
final case class ExcludePattern(path: Pattern) extends Rule
object ExcludePattern {
def apply(s: String): ExcludePattern =
ExcludePattern(Pattern.compile(s))
}
// TODO Accept a separator: Array[Byte] argument in these
// (to separate content with a line return in particular)
final case class Append(path: String) extends PathRule
final case class AppendPattern(path: Pattern) extends Rule
object AppendPattern {
def apply(s: String): AppendPattern =
AppendPattern(Pattern.compile(s))
}
}
def make(jars: Seq[File], output: OutputStream, attributes: Seq[(Attributes.Name, String)], rules: Seq[Rule]): Unit = {
val rulesMap = rules.collect { case r: Rule.PathRule => r.path -> r }.toMap
val excludePatterns = rules.collect { case Rule.ExcludePattern(p) => p }
val appendPatterns = rules.collect { case Rule.AppendPattern(p) => p }
val manifest = new Manifest
manifest.getMainAttributes.put(Attributes.Name.MANIFEST_VERSION, "1.0")
for ((k, v) <- attributes)
manifest.getMainAttributes.put(k, v)
var zos: ZipOutputStream = null
try {
zos = new JarOutputStream(output, manifest)
val concatenedEntries = new mutable.HashMap[String, ::[(ZipEntry, Array[Byte])]]
var ignore = Set.empty[String]
for (jar <- jars) {
var fis: FileInputStream = null
var zis: ZipInputStream = null
try {
fis = new FileInputStream(jar)
zis = new ZipInputStream(fis)
for ((ent, content) <- Zip.zipEntries(zis)) {
def append() =
concatenedEntries += ent.getName -> ::((ent, content), concatenedEntries.getOrElse(ent.getName, Nil))
rulesMap.get(ent.getName) match {
case Some(Rule.Exclude(_)) =>
// ignored
case Some(Rule.Append(_)) =>
append()
case None =>
if (!excludePatterns.exists(_.matcher(ent.getName).matches())) {
if (appendPatterns.exists(_.matcher(ent.getName).matches()))
append()
else if (!ignore(ent.getName)) {
ent.setCompressedSize(-1L)
zos.putNextEntry(ent)
zos.write(content)
zos.closeEntry()
ignore += ent.getName
}
}
}
}
} finally {
if (zis != null)
zis.close()
if (fis != null)
fis.close()
}
}
for ((_, entries) <- concatenedEntries) {
val (ent, _) = entries.head
ent.setCompressedSize(-1L)
if (entries.tail.nonEmpty)
ent.setSize(entries.map(_._2.length).sum)
zos.putNextEntry(ent)
// for ((_, b) <- entries.reverse)
// zos.write(b)
zos.write(entries.reverse.toArray.flatMap(_._2))
zos.closeEntry()
}
} finally {
if (zos != null)
zos.close()
}
}
val assemblyRules = Seq[Rule](
Rule.Append("META-INF/services/org.apache.hadoop.fs.FileSystem"),
Rule.Append("reference.conf"),
Rule.AppendPattern("META-INF/services/.*"),
Rule.Exclude("log4j.properties"),
Rule.Exclude(JarFile.MANIFEST_NAME),
Rule.ExcludePattern("META-INF/.*\\.[sS][fF]"),
Rule.ExcludePattern("META-INF/.*\\.[dD][sS][aA]"),
Rule.ExcludePattern("META-INF/.*\\.[rR][sS][aA]")
val assemblyRules = Seq[Assembly.Rule](
Assembly.Rule.Append("META-INF/services/org.apache.hadoop.fs.FileSystem"),
Assembly.Rule.Append("reference.conf"),
Assembly.Rule.AppendPattern("META-INF/services/.*"),
Assembly.Rule.Exclude("log4j.properties"),
Assembly.Rule.Exclude(JarFile.MANIFEST_NAME),
Assembly.Rule.ExcludePattern("META-INF/.*\\.[sS][fF]"),
Assembly.Rule.ExcludePattern("META-INF/.*\\.[dD][sS][aA]"),
Assembly.Rule.ExcludePattern("META-INF/.*\\.[rR][sS][aA]")
)
def sparkBaseDependencies(

View File

@ -0,0 +1,122 @@
package coursier.cli.util
import java.io.{File, FileInputStream, OutputStream}
import java.util.jar.{Attributes, JarOutputStream, Manifest}
import java.util.regex.Pattern
import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream}
import scala.collection.mutable
object Assembly {
sealed abstract class Rule extends Product with Serializable
object Rule {
sealed abstract class PathRule extends Rule {
def path: String
}
final case class Exclude(path: String) extends PathRule
final case class ExcludePattern(path: Pattern) extends Rule
object ExcludePattern {
def apply(s: String): ExcludePattern =
ExcludePattern(Pattern.compile(s))
}
// TODO Accept a separator: Array[Byte] argument in these
// (to separate content with a line return in particular)
final case class Append(path: String) extends PathRule
final case class AppendPattern(path: Pattern) extends Rule
object AppendPattern {
def apply(s: String): AppendPattern =
AppendPattern(Pattern.compile(s))
}
}
def make(jars: Seq[File], output: OutputStream, attributes: Seq[(Attributes.Name, String)], rules: Seq[Rule]): Unit = {
val rulesMap = rules.collect { case r: Rule.PathRule => r.path -> r }.toMap
val excludePatterns = rules.collect { case Rule.ExcludePattern(p) => p }
val appendPatterns = rules.collect { case Rule.AppendPattern(p) => p }
val manifest = new Manifest
manifest.getMainAttributes.put(Attributes.Name.MANIFEST_VERSION, "1.0")
for ((k, v) <- attributes)
manifest.getMainAttributes.put(k, v)
var zos: ZipOutputStream = null
try {
zos = new JarOutputStream(output, manifest)
val concatenedEntries = new mutable.HashMap[String, ::[(ZipEntry, Array[Byte])]]
var ignore = Set.empty[String]
for (jar <- jars) {
var fis: FileInputStream = null
var zis: ZipInputStream = null
try {
fis = new FileInputStream(jar)
zis = new ZipInputStream(fis)
for ((ent, content) <- Zip.zipEntries(zis)) {
def append() =
concatenedEntries += ent.getName -> ::((ent, content), concatenedEntries.getOrElse(ent.getName, Nil))
rulesMap.get(ent.getName) match {
case Some(Rule.Exclude(_)) =>
// ignored
case Some(Rule.Append(_)) =>
append()
case None =>
if (!excludePatterns.exists(_.matcher(ent.getName).matches())) {
if (appendPatterns.exists(_.matcher(ent.getName).matches()))
append()
else if (!ignore(ent.getName)) {
ent.setCompressedSize(-1L)
zos.putNextEntry(ent)
zos.write(content)
zos.closeEntry()
ignore += ent.getName
}
}
}
}
} finally {
if (zis != null)
zis.close()
if (fis != null)
fis.close()
}
}
for ((_, entries) <- concatenedEntries) {
val (ent, _) = entries.head
ent.setCompressedSize(-1L)
if (entries.tail.nonEmpty)
ent.setSize(entries.map(_._2.length).sum)
zos.putNextEntry(ent)
// for ((_, b) <- entries.reverse)
// zos.write(b)
zos.write(entries.reverse.toArray.flatMap(_._2))
zos.closeEntry()
}
} finally {
if (zos != null)
zos.close()
}
}
}