Add spark scripted test

This test ensures that a simple spark app will work with all of the
classloader layering strategies. Spark is an important part of the scala
ecosystem and heavily taxes classloading.

The test just checks that the app runs. I verified manually that the
first time that run is evaluated takes about 8 seconds regardless of the
layering strategy. It drops to 6 seconds with the ScalaLibrary strategy.
It drops to 1 seconds with AllLibraryJars.
This commit is contained in:
Ethan Atkins 2019-06-03 13:48:48 -07:00
parent 233307b696
commit 014190a048
4 changed files with 39 additions and 0 deletions

View File

@ -0,0 +1,7 @@
name := "Simple Project"
version := "1.0"
scalaVersion := "2.12.8"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.3"

View File

@ -0,0 +1,7 @@
a
b
c
d
e
f
g

View File

@ -0,0 +1,14 @@
import org.apache.spark.sql.SparkSession
object SimpleApp {
def main(args: Array[String]) {
val logFile = "log.txt"
val spark = SparkSession.builder.appName("Simple Application").config("spark.master", "local").getOrCreate()
try {
val logData = spark.read.textFile(logFile).cache()
val numAs = logData.filter(line => line.contains("a")).count()
val numBs = logData.filter(line => line.contains("b")).count()
println(s"Lines with a: $numAs, Lines with b: $numBs")
} finally spark.stop()
}
}

View File

@ -0,0 +1,11 @@
> set classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.AllLibraryJars
> run
> set classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.ScalaLibrary
> run
> set classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat
> run