mirror of https://github.com/sbt/sbt.git
Add spark scripted test
This test ensures that a simple spark app will work with all of the classloader layering strategies. Spark is an important part of the scala ecosystem and heavily taxes classloading. The test just checks that the app runs. I verified manually that the first time that run is evaluated takes about 8 seconds regardless of the layering strategy. It drops to 6 seconds with the ScalaLibrary strategy. It drops to 1 seconds with AllLibraryJars.
This commit is contained in:
parent
233307b696
commit
014190a048
|
|
@ -0,0 +1,7 @@
|
|||
name := "Simple Project"
|
||||
|
||||
version := "1.0"
|
||||
|
||||
scalaVersion := "2.12.8"
|
||||
|
||||
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.3"
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
e
|
||||
f
|
||||
g
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
import org.apache.spark.sql.SparkSession
|
||||
|
||||
object SimpleApp {
|
||||
def main(args: Array[String]) {
|
||||
val logFile = "log.txt"
|
||||
val spark = SparkSession.builder.appName("Simple Application").config("spark.master", "local").getOrCreate()
|
||||
try {
|
||||
val logData = spark.read.textFile(logFile).cache()
|
||||
val numAs = logData.filter(line => line.contains("a")).count()
|
||||
val numBs = logData.filter(line => line.contains("b")).count()
|
||||
println(s"Lines with a: $numAs, Lines with b: $numBs")
|
||||
} finally spark.stop()
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
> set classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.AllLibraryJars
|
||||
|
||||
> run
|
||||
|
||||
> set classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.ScalaLibrary
|
||||
|
||||
> run
|
||||
|
||||
> set classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat
|
||||
|
||||
> run
|
||||
Loading…
Reference in New Issue