From 196318e6192a167da1359ee77b4d6bba2d7afb06 Mon Sep 17 00:00:00 2001 From: Ethan Atkins Date: Sat, 27 Jul 2019 21:40:48 -0700 Subject: [PATCH 1/2] Fix multi parser performance regression It was reported in https://github.com/sbt/sbt/issues/4890 that cosmetic white space could cause problems for the paser. I tracked this down to primarily being because of the `val semi = token(OptSpace ~> ';' ~> OptSpace)` line. This would cause excessive backtracking. I added a test for a multi line command with a lot of cosmetic whitespace that was adapted from #4890 except that I made it even more taxing by running adding 100 commands instead of the roughly 10 in the report. Before the parser changes, the test would more or less block indefinitely. I never saw it successfully complete. After these changes, it completes in 30-50ms (which drops to about 2-3 ms if the number of commands is dropped from 100 to 3). I verified manually in a different project that a number of different multi command completions still worked. In particular, I tested that `~foo/test; foo/tes` would expand to `~foo/test; foo/test` which is one of the hardest cases to get right. I also added a few extra test cases for the parser since I wasn't sure what the impact of removing the OptSpace ~> from the semi parser would be. --- .../src/main/scala/sbt/BasicCommands.scala | 7 +++---- .../src/test/scala/sbt/MultiParserSpec.scala | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/main-command/src/main/scala/sbt/BasicCommands.scala b/main-command/src/main/scala/sbt/BasicCommands.scala index 73c0184e5..819a71b2e 100644 --- a/main-command/src/main/scala/sbt/BasicCommands.scala +++ b/main-command/src/main/scala/sbt/BasicCommands.scala @@ -155,15 +155,14 @@ object BasicCommands { private[sbt] def multiParserImpl(state: Option[State]): Parser[List[String]] = { val nonSemi = charClass(_ != ';', "not ';'") - val semi = token(OptSpace ~> ';' ~> OptSpace) + val semi = token(';') val nonDelim = charClass(c => c != '"' && c != '{' && c != '}', label = "not '\"', '{', '}'") val components = ((nonSemi & nonDelim) | StringEscapable | braces('{', '}')).+ val cmdPart = matched(components).examples() val completionParser: Option[Parser[String]] = - state.map(s => OptSpace ~> matched(s.nonMultiParser) <~ OptSpace) - val cmdParser = - completionParser.map(sp => (sp & cmdPart) | cmdPart).getOrElse(cmdPart).map(_.trim) + state.map(s => OptSpace ~> matched((s.nonMultiParser & cmdPart) | cmdPart) <~ OptSpace) + val cmdParser = completionParser.getOrElse(cmdPart).map(_.trim) val multiCmdParser: Parser[String] = semi ~> cmdParser /* * There are two cases that need to be handled separately: diff --git a/main-command/src/test/scala/sbt/MultiParserSpec.scala b/main-command/src/test/scala/sbt/MultiParserSpec.scala index 1beb4059d..1ea3c8556 100644 --- a/main-command/src/test/scala/sbt/MultiParserSpec.scala +++ b/main-command/src/test/scala/sbt/MultiParserSpec.scala @@ -7,6 +7,7 @@ package sbt +import scala.concurrent.duration._ import org.scalatest.FlatSpec import sbt.internal.util.complete.Parser @@ -28,6 +29,15 @@ class MultiParserSpec extends FlatSpec { } it should "parse single command with leading spaces" in { assert("; foo".parse == Seq("foo")) + assert(" ; foo".parse == Seq("foo")) + assert(" foo;".parse == Seq("foo")) + } + it should "parse single command with trailing spaces" in { + assert("; foo ".parse == Seq("foo")) + assert(";foo ".parse == Seq("foo")) + assert("foo; ".parse == Seq("foo")) + assert(" foo; ".parse == Seq("foo")) + assert(" foo ; ".parse == Seq("foo")) } it should "parse multiple commands with leading spaces" in { assert("; foo;bar".parse == Seq("foo", "bar")) @@ -103,4 +113,14 @@ class MultiParserSpec extends FlatSpec { val unmatchedEmptyBraces = "{{{{}}}" assert(s"compile; $unmatchedEmptyBraces".parseEither.isLeft) } + it should "handle cosmetic whitespace" in { + val commands = (1 to 100).map(_ => "compile") + val multiLine = commands.mkString(" \n ;", " \n ;", " \n ") + val start = System.nanoTime + assert(multiLine.parse == commands) + val elapsed = System.nanoTime - start + // Make sure this took less than 10 seconds. It takes about 30 milliseconds to run with + // 100 commands and 3 milliseconds with 3 commands. With a bad parser, it will run indefinitely. + assert(elapsed.nanoseconds < 10.seconds) + } } From c7ec97d18f7a1412362ce40d7268ac4f55053ec5 Mon Sep 17 00:00:00 2001 From: Ethan Atkins Date: Sun, 28 Jul 2019 11:33:17 -0700 Subject: [PATCH 2/2] Rework multi parser to exclude 'alias' There have been numerous issues with the multi parser incorrectly splitting commands like `alias foo = ; bar` into `"alias foo =" :: "bar" :: Nil`. To fix this, I update the multi parser implementation to accept a list of commands that cannot be part of a multi command. For now, the only excluded command is "alias", but if other issues come up, we can add more. I also thought about adding a system property for excluding more commands but it didn't seem worth the maintenance cost at this point. In addition to adding a filter for the excluded commands, I also reworked the multi parser so that I think its more clear (and should hopefully have more predictable performance). I changed the cmdPart parser to accept empty strings. Prior to this, the parser explicitly handled the non-leading semicolon and leading semicolon cases separately. With the relaxed cmdPart, we can handle both cases with a single parser. We just have to strip any empty commands at the beginning or end of the command list. --- .../src/main/scala/sbt/BasicCommands.scala | 66 ++++++++++++------- .../src/test/scala/sbt/MultiParserSpec.scala | 14 ++++ 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/main-command/src/main/scala/sbt/BasicCommands.scala b/main-command/src/main/scala/sbt/BasicCommands.scala index 819a71b2e..07cfef7b5 100644 --- a/main-command/src/main/scala/sbt/BasicCommands.scala +++ b/main-command/src/main/scala/sbt/BasicCommands.scala @@ -31,6 +31,8 @@ import BasicKeys._ import java.io.File import sbt.io.IO + +import scala.collection.mutable.ListBuffer import scala.util.control.NonFatal object BasicCommands { @@ -153,35 +155,51 @@ object BasicCommands { state } - private[sbt] def multiParserImpl(state: Option[State]): Parser[List[String]] = { + private[sbt] def multiParserImpl(state: Option[State]): Parser[List[String]] = + multiParserImpl(state, "alias" :: Nil) + private[sbt] def multiParserImpl( + state: Option[State], + exclude: Seq[String] + ): Parser[List[String]] = { val nonSemi = charClass(_ != ';', "not ';'") - val semi = token(';') val nonDelim = charClass(c => c != '"' && c != '{' && c != '}', label = "not '\"', '{', '}'") - val components = ((nonSemi & nonDelim) | StringEscapable | braces('{', '}')).+ - val cmdPart = matched(components).examples() + // Accept empty commands to simplify the parser. + val cmdPart = + matched(((nonSemi & nonDelim) | StringEscapable | braces('{', '}')).*).examples() val completionParser: Option[Parser[String]] = - state.map(s => OptSpace ~> matched((s.nonMultiParser & cmdPart) | cmdPart) <~ OptSpace) - val cmdParser = completionParser.getOrElse(cmdPart).map(_.trim) - val multiCmdParser: Parser[String] = semi ~> cmdParser - /* - * There are two cases that need to be handled separately: - * 1) leading semicolon with one or more commands separated by a semicolon - * 2) no leading semicolon and at least one command followed by a trailing semicolon - * and zero or more commands separated by a semicolon - * Both cases allow an optional trailing semi-colon. - * - * These have to be handled separately because the performance degrades badly if the first - * case is implemented with the following parser: - * (semi.? ~> ((combinedParser <~ semi).* ~ combinedParser <~ semi.?) - */ - val noLeadingSemi = (cmdParser ~ (multiCmdParser.+ | semi.map(_ => Nil))).map { - case (prefix, last) => (prefix :: Nil ::: last.toList).filter(_.nonEmpty) + state.map(s => (matched(s.nonMultiParser) & cmdPart) | cmdPart) + val cmdParser = { + val parser = completionParser.getOrElse(cmdPart).map(_.trim) + exclude.foldLeft(parser) { case (p, e) => p & not(OptSpace ~ s"$e ", s"!$e").examples() } } - val leadingSemi = multiCmdParser.+.map(_.toList.filter(_.nonEmpty)) - ((leadingSemi | noLeadingSemi) <~ semi.?).flatMap { - case Nil => Parser.failure("No commands were parsed") - case commands => Parser.success(commands) + val multiCmdParser: Parser[String] = token(';') ~> OptSpace ~> cmdParser + + /* + * We accept empty commands at the end of the the list as an implementation detail that allows + * for a trailing semi-colon without an extra parser since the cmdParser accepts an empty string + * and the multi parser is `token(';') ~ cmdParser`. We do not want to accept empty commands + * that occur in the middle of the sequence so if we find one, we return a failed parser. If + * we wanted to relax that restriction, then we could just replace the flatMap below with + * `rest.filterNot(_.isEmpty)`. + */ + def validateCommands(s: Seq[String]): Parser[List[String]] = { + val result = new ListBuffer[String] + val it = s.iterator + var fail = false + while (it.hasNext && !fail) { + it.next match { + case "" => fail = it.hasNext + case next => result += next + } + } + if (fail) Parser.failure(s"Couldn't parse empty commands in ${s.mkString(";")}") + else Parser.success(result.toList) + } + + (cmdParser ~ multiCmdParser.+).flatMap { + case ("", rest) => validateCommands(rest) + case (p, rest) => validateCommands(rest).map(p :: _) } } diff --git a/main-command/src/test/scala/sbt/MultiParserSpec.scala b/main-command/src/test/scala/sbt/MultiParserSpec.scala index 1ea3c8556..a2f580f10 100644 --- a/main-command/src/test/scala/sbt/MultiParserSpec.scala +++ b/main-command/src/test/scala/sbt/MultiParserSpec.scala @@ -123,4 +123,18 @@ class MultiParserSpec extends FlatSpec { // 100 commands and 3 milliseconds with 3 commands. With a bad parser, it will run indefinitely. assert(elapsed.nanoseconds < 10.seconds) } + it should "exclude alias" in { + val alias = """alias scalacFoo = ; set scalacOptions ++= Seq("-foo")""" + assert(alias.parseEither.isLeft) + assert(s" $alias".parseEither.isLeft) + assert(s" $alias;".parseEither.isLeft) + assert(s";$alias".parseEither.isLeft) + assert(s"; $alias".parseEither.isLeft) + assert(s";$alias;".parseEither.isLeft) + assert(s"; $alias;".parseEither.isLeft) + assert(s"foo; $alias".parseEither.isLeft) + assert(s"; foo;$alias".parseEither.isLeft) + assert(s"; foo;$alias; ".parseEither.isLeft) + assert(s"; foo; $alias; ".parseEither.isLeft) + } }