From 014190a048283ef74096c47a169440e817d0e597 Mon Sep 17 00:00:00 2001 From: Ethan Atkins Date: Mon, 3 Jun 2019 13:48:48 -0700 Subject: [PATCH] Add spark scripted test This test ensures that a simple spark app will work with all of the classloader layering strategies. Spark is an important part of the scala ecosystem and heavily taxes classloading. The test just checks that the app runs. I verified manually that the first time that run is evaluated takes about 8 seconds regardless of the layering strategy. It drops to 6 seconds with the ScalaLibrary strategy. It drops to 1 seconds with AllLibraryJars. --- sbt/src/sbt-test/classloader-cache/spark/build.sbt | 7 +++++++ sbt/src/sbt-test/classloader-cache/spark/log.txt | 7 +++++++ .../spark/src/main/scala/spark/SimpleApp.scala | 14 ++++++++++++++ sbt/src/sbt-test/classloader-cache/spark/test | 11 +++++++++++ 4 files changed, 39 insertions(+) create mode 100644 sbt/src/sbt-test/classloader-cache/spark/build.sbt create mode 100644 sbt/src/sbt-test/classloader-cache/spark/log.txt create mode 100644 sbt/src/sbt-test/classloader-cache/spark/src/main/scala/spark/SimpleApp.scala create mode 100644 sbt/src/sbt-test/classloader-cache/spark/test diff --git a/sbt/src/sbt-test/classloader-cache/spark/build.sbt b/sbt/src/sbt-test/classloader-cache/spark/build.sbt new file mode 100644 index 000000000..33fdcaa77 --- /dev/null +++ b/sbt/src/sbt-test/classloader-cache/spark/build.sbt @@ -0,0 +1,7 @@ +name := "Simple Project" + +version := "1.0" + +scalaVersion := "2.12.8" + +libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.3" diff --git a/sbt/src/sbt-test/classloader-cache/spark/log.txt b/sbt/src/sbt-test/classloader-cache/spark/log.txt new file mode 100644 index 000000000..65b63b6d6 --- /dev/null +++ b/sbt/src/sbt-test/classloader-cache/spark/log.txt @@ -0,0 +1,7 @@ +a +b +c +d +e +f +g \ No newline at end of file diff --git a/sbt/src/sbt-test/classloader-cache/spark/src/main/scala/spark/SimpleApp.scala b/sbt/src/sbt-test/classloader-cache/spark/src/main/scala/spark/SimpleApp.scala new file mode 100644 index 000000000..4f7dcce5b --- /dev/null +++ b/sbt/src/sbt-test/classloader-cache/spark/src/main/scala/spark/SimpleApp.scala @@ -0,0 +1,14 @@ +import org.apache.spark.sql.SparkSession + +object SimpleApp { + def main(args: Array[String]) { + val logFile = "log.txt" + val spark = SparkSession.builder.appName("Simple Application").config("spark.master", "local").getOrCreate() + try { + val logData = spark.read.textFile(logFile).cache() + val numAs = logData.filter(line => line.contains("a")).count() + val numBs = logData.filter(line => line.contains("b")).count() + println(s"Lines with a: $numAs, Lines with b: $numBs") + } finally spark.stop() + } +} diff --git a/sbt/src/sbt-test/classloader-cache/spark/test b/sbt/src/sbt-test/classloader-cache/spark/test new file mode 100644 index 000000000..9b368a1b1 --- /dev/null +++ b/sbt/src/sbt-test/classloader-cache/spark/test @@ -0,0 +1,11 @@ +> set classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.AllLibraryJars + +> run + +> set classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.ScalaLibrary + +> run + +> set classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat + +> run