From 7d40ba0134d5e990da3dfac4b1cb9f3124757cdf Mon Sep 17 00:00:00 2001 From: Alexandre Archambault Date: Mon, 30 Apr 2018 00:02:03 +0200 Subject: [PATCH] Move assembly generation stuff to coursier.cli.util.Assembly --- .../scala-2.12/coursier/cli/SparkSubmit.scala | 6 +- .../{Assembly.scala => SparkAssembly.scala} | 140 ++---------------- .../coursier/cli/util/Assembly.scala | 122 +++++++++++++++ 3 files changed, 138 insertions(+), 130 deletions(-) rename cli/src/main/scala-2.12/coursier/cli/spark/{Assembly.scala => SparkAssembly.scala} (55%) create mode 100644 cli/src/main/scala-2.12/coursier/cli/util/Assembly.scala diff --git a/cli/src/main/scala-2.12/coursier/cli/SparkSubmit.scala b/cli/src/main/scala-2.12/coursier/cli/SparkSubmit.scala index 5a4dd9939..731b7d4ea 100644 --- a/cli/src/main/scala-2.12/coursier/cli/SparkSubmit.scala +++ b/cli/src/main/scala-2.12/coursier/cli/SparkSubmit.scala @@ -6,7 +6,7 @@ import java.net.URLClassLoader import caseapp._ import coursier.Dependency import coursier.cli.options.SparkSubmitOptions -import coursier.cli.spark.{Assembly, Submit} +import coursier.cli.spark.{SparkAssembly, Submit} /** @@ -85,7 +85,7 @@ object SparkSubmit extends CaseApp[SparkSubmitOptions] { val (sparkYarnExtraConf, sparkBaseJars) = if (!options.autoAssembly || sparkVersion.startsWith("2.")) { - val assemblyJars = Assembly.sparkJars( + val assemblyJars = SparkAssembly.sparkJars( scalaVersion, sparkVersion, options.yarnVersion, @@ -107,7 +107,7 @@ object SparkSubmit extends CaseApp[SparkSubmitOptions] { (extraConf, assemblyJars) } else { - val assemblyAndJarsOrError = Assembly.spark( + val assemblyAndJarsOrError = SparkAssembly.spark( scalaVersion, sparkVersion, options.yarnVersion, diff --git a/cli/src/main/scala-2.12/coursier/cli/spark/Assembly.scala b/cli/src/main/scala-2.12/coursier/cli/spark/SparkAssembly.scala similarity index 55% rename from cli/src/main/scala-2.12/coursier/cli/spark/Assembly.scala rename to cli/src/main/scala-2.12/coursier/cli/spark/SparkAssembly.scala index e75e048ca..1fd8ab549 100644 --- a/cli/src/main/scala-2.12/coursier/cli/spark/Assembly.scala +++ b/cli/src/main/scala-2.12/coursier/cli/spark/SparkAssembly.scala @@ -1,142 +1,28 @@ package coursier.cli.spark -import java.io.{File, FileInputStream, FileOutputStream, OutputStream} +import java.io.{File, FileOutputStream} import java.math.BigInteger import java.nio.charset.StandardCharsets.UTF_8 import java.nio.file.{Files, StandardCopyOption} import java.security.MessageDigest -import java.util.jar.{Attributes, JarFile, JarOutputStream, Manifest} -import java.util.regex.Pattern -import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream} +import java.util.jar.JarFile import coursier.Cache import coursier.cli.Helper import coursier.cli.options.CommonOptions -import coursier.cli.util.Zip +import coursier.cli.util.Assembly -import scala.collection.mutable +object SparkAssembly { -object Assembly { - - sealed abstract class Rule extends Product with Serializable - - object Rule { - sealed abstract class PathRule extends Rule { - def path: String - } - - final case class Exclude(path: String) extends PathRule - final case class ExcludePattern(path: Pattern) extends Rule - - object ExcludePattern { - def apply(s: String): ExcludePattern = - ExcludePattern(Pattern.compile(s)) - } - - // TODO Accept a separator: Array[Byte] argument in these - // (to separate content with a line return in particular) - final case class Append(path: String) extends PathRule - final case class AppendPattern(path: Pattern) extends Rule - - object AppendPattern { - def apply(s: String): AppendPattern = - AppendPattern(Pattern.compile(s)) - } - } - - def make(jars: Seq[File], output: OutputStream, attributes: Seq[(Attributes.Name, String)], rules: Seq[Rule]): Unit = { - - val rulesMap = rules.collect { case r: Rule.PathRule => r.path -> r }.toMap - val excludePatterns = rules.collect { case Rule.ExcludePattern(p) => p } - val appendPatterns = rules.collect { case Rule.AppendPattern(p) => p } - - val manifest = new Manifest - manifest.getMainAttributes.put(Attributes.Name.MANIFEST_VERSION, "1.0") - for ((k, v) <- attributes) - manifest.getMainAttributes.put(k, v) - - var zos: ZipOutputStream = null - - try { - zos = new JarOutputStream(output, manifest) - - val concatenedEntries = new mutable.HashMap[String, ::[(ZipEntry, Array[Byte])]] - - var ignore = Set.empty[String] - - for (jar <- jars) { - var fis: FileInputStream = null - var zis: ZipInputStream = null - - try { - fis = new FileInputStream(jar) - zis = new ZipInputStream(fis) - - for ((ent, content) <- Zip.zipEntries(zis)) { - - def append() = - concatenedEntries += ent.getName -> ::((ent, content), concatenedEntries.getOrElse(ent.getName, Nil)) - - rulesMap.get(ent.getName) match { - case Some(Rule.Exclude(_)) => - // ignored - - case Some(Rule.Append(_)) => - append() - - case None => - if (!excludePatterns.exists(_.matcher(ent.getName).matches())) { - if (appendPatterns.exists(_.matcher(ent.getName).matches())) - append() - else if (!ignore(ent.getName)) { - ent.setCompressedSize(-1L) - zos.putNextEntry(ent) - zos.write(content) - zos.closeEntry() - - ignore += ent.getName - } - } - } - } - - } finally { - if (zis != null) - zis.close() - if (fis != null) - fis.close() - } - } - - for ((_, entries) <- concatenedEntries) { - val (ent, _) = entries.head - - ent.setCompressedSize(-1L) - - if (entries.tail.nonEmpty) - ent.setSize(entries.map(_._2.length).sum) - - zos.putNextEntry(ent) - // for ((_, b) <- entries.reverse) - // zos.write(b) - zos.write(entries.reverse.toArray.flatMap(_._2)) - zos.closeEntry() - } - } finally { - if (zos != null) - zos.close() - } - } - - val assemblyRules = Seq[Rule]( - Rule.Append("META-INF/services/org.apache.hadoop.fs.FileSystem"), - Rule.Append("reference.conf"), - Rule.AppendPattern("META-INF/services/.*"), - Rule.Exclude("log4j.properties"), - Rule.Exclude(JarFile.MANIFEST_NAME), - Rule.ExcludePattern("META-INF/.*\\.[sS][fF]"), - Rule.ExcludePattern("META-INF/.*\\.[dD][sS][aA]"), - Rule.ExcludePattern("META-INF/.*\\.[rR][sS][aA]") + val assemblyRules = Seq[Assembly.Rule]( + Assembly.Rule.Append("META-INF/services/org.apache.hadoop.fs.FileSystem"), + Assembly.Rule.Append("reference.conf"), + Assembly.Rule.AppendPattern("META-INF/services/.*"), + Assembly.Rule.Exclude("log4j.properties"), + Assembly.Rule.Exclude(JarFile.MANIFEST_NAME), + Assembly.Rule.ExcludePattern("META-INF/.*\\.[sS][fF]"), + Assembly.Rule.ExcludePattern("META-INF/.*\\.[dD][sS][aA]"), + Assembly.Rule.ExcludePattern("META-INF/.*\\.[rR][sS][aA]") ) def sparkBaseDependencies( diff --git a/cli/src/main/scala-2.12/coursier/cli/util/Assembly.scala b/cli/src/main/scala-2.12/coursier/cli/util/Assembly.scala new file mode 100644 index 000000000..62b834d5a --- /dev/null +++ b/cli/src/main/scala-2.12/coursier/cli/util/Assembly.scala @@ -0,0 +1,122 @@ +package coursier.cli.util + +import java.io.{File, FileInputStream, OutputStream} +import java.util.jar.{Attributes, JarOutputStream, Manifest} +import java.util.regex.Pattern +import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream} + +import scala.collection.mutable + +object Assembly { + + sealed abstract class Rule extends Product with Serializable + + object Rule { + sealed abstract class PathRule extends Rule { + def path: String + } + + final case class Exclude(path: String) extends PathRule + final case class ExcludePattern(path: Pattern) extends Rule + + object ExcludePattern { + def apply(s: String): ExcludePattern = + ExcludePattern(Pattern.compile(s)) + } + + // TODO Accept a separator: Array[Byte] argument in these + // (to separate content with a line return in particular) + final case class Append(path: String) extends PathRule + final case class AppendPattern(path: Pattern) extends Rule + + object AppendPattern { + def apply(s: String): AppendPattern = + AppendPattern(Pattern.compile(s)) + } + } + + def make(jars: Seq[File], output: OutputStream, attributes: Seq[(Attributes.Name, String)], rules: Seq[Rule]): Unit = { + + val rulesMap = rules.collect { case r: Rule.PathRule => r.path -> r }.toMap + val excludePatterns = rules.collect { case Rule.ExcludePattern(p) => p } + val appendPatterns = rules.collect { case Rule.AppendPattern(p) => p } + + val manifest = new Manifest + manifest.getMainAttributes.put(Attributes.Name.MANIFEST_VERSION, "1.0") + for ((k, v) <- attributes) + manifest.getMainAttributes.put(k, v) + + var zos: ZipOutputStream = null + + try { + zos = new JarOutputStream(output, manifest) + + val concatenedEntries = new mutable.HashMap[String, ::[(ZipEntry, Array[Byte])]] + + var ignore = Set.empty[String] + + for (jar <- jars) { + var fis: FileInputStream = null + var zis: ZipInputStream = null + + try { + fis = new FileInputStream(jar) + zis = new ZipInputStream(fis) + + for ((ent, content) <- Zip.zipEntries(zis)) { + + def append() = + concatenedEntries += ent.getName -> ::((ent, content), concatenedEntries.getOrElse(ent.getName, Nil)) + + rulesMap.get(ent.getName) match { + case Some(Rule.Exclude(_)) => + // ignored + + case Some(Rule.Append(_)) => + append() + + case None => + if (!excludePatterns.exists(_.matcher(ent.getName).matches())) { + if (appendPatterns.exists(_.matcher(ent.getName).matches())) + append() + else if (!ignore(ent.getName)) { + ent.setCompressedSize(-1L) + zos.putNextEntry(ent) + zos.write(content) + zos.closeEntry() + + ignore += ent.getName + } + } + } + } + + } finally { + if (zis != null) + zis.close() + if (fis != null) + fis.close() + } + } + + for ((_, entries) <- concatenedEntries) { + val (ent, _) = entries.head + + ent.setCompressedSize(-1L) + + if (entries.tail.nonEmpty) + ent.setSize(entries.map(_._2.length).sum) + + zos.putNextEntry(ent) + // for ((_, b) <- entries.reverse) + // zos.write(b) + zos.write(entries.reverse.toArray.flatMap(_._2)) + zos.closeEntry() + } + } finally { + if (zos != null) + zos.close() + } + } + +}