From 8b103271a7b7fc2603e633e7a6a84e37e574eb38 Mon Sep 17 00:00:00 2001 From: Mark Harrah Date: Fri, 16 Aug 2013 14:21:45 -0400 Subject: [PATCH] API docs for Parser(s). --- .../src/main/scala/sbt/complete/Parser.scala | 99 ++++++++++++-- .../src/main/scala/sbt/complete/Parsers.scala | 124 +++++++++++++++++- 2 files changed, 207 insertions(+), 16 deletions(-) diff --git a/util/complete/src/main/scala/sbt/complete/Parser.scala b/util/complete/src/main/scala/sbt/complete/Parser.scala index 8a6081c14..798ea6d49 100644 --- a/util/complete/src/main/scala/sbt/complete/Parser.scala +++ b/util/complete/src/main/scala/sbt/complete/Parser.scala @@ -7,6 +7,11 @@ package sbt.complete import sbt.Types.{const, left, right, some} import sbt.Util.{makeList,separate} +/** A String parser that provides semi-automatic tab completion. +* A successful parse results in a value of type `T`. +* The methods in this trait are what must be implemented to define a new Parser implementation, but are not typically useful for common usage. +* Instead, most useful methods for combining smaller parsers into larger parsers are implicitly added by the [[RichParser]] type. +*/ sealed trait Parser[+T] { def derive(i: Char): Parser[T] @@ -20,37 +25,57 @@ sealed trait Parser[+T] } sealed trait RichParser[A] { - /** Produces a Parser that applies the original Parser and then applies `next` (in order).*/ + /** Apply the original Parser and then apply `next` (in order). The result of both is provides as a pair. */ def ~[B](next: Parser[B]): Parser[(A,B)] - /** Produces a Parser that applies the original Parser one or more times.*/ + + /** Apply the original Parser one or more times and provide the non-empty sequence of results.*/ def + : Parser[Seq[A]] - /** Produces a Parser that applies the original Parser zero or more times.*/ + + /** Apply the original Parser zero or more times and provide the (potentially empty) sequence of results.*/ def * : Parser[Seq[A]] - /** Produces a Parser that applies the original Parser zero or one times.*/ + + /** Apply the original Parser zero or one times, returning None if it was applied zero times or the result wrapped in Some if it was applied once.*/ def ? : Parser[Option[A]] - /** Produces a Parser that applies either the original Parser or `b`.*/ + + /** Apply either the original Parser or `b`.*/ def |[B >: A](b: Parser[B]): Parser[B] - /** Produces a Parser that applies either the original Parser or `b`.*/ + + /** Apply either the original Parser or `b`.*/ def ||[B](b: Parser[B]): Parser[Either[A,B]] - /** Produces a Parser that applies the original Parser to the input and then applies `f` to the result.*/ + + /** Apply the original Parser to the input and then apply `f` to the result.*/ def map[B](f: A => B): Parser[B] + /** Returns the original parser. This is useful for converting literals to Parsers. * For example, `'c'.id` or `"asdf".id`*/ def id: Parser[A] + /** Apply the original Parser, but provide `value` as the result if it succeeds. */ def ^^^[B](value: B): Parser[B] + + /** Apply the original Parser, but provide `alt` as the result if it fails.*/ def ??[B >: A](alt: B): Parser[B] + + /** Produces a Parser that applies the original Parser and then applies `next` (in order), discarding the result of `next`. + * (The arrow point in the direction of the retained result.)*/ def <~[B](b: Parser[B]): Parser[A] + + /** Produces a Parser that applies the original Parser and then applies `next` (in order), discarding the result of the original parser. + * (The arrow point in the direction of the retained result.)*/ def ~>[B](b: Parser[B]): Parser[B] /** Uses the specified message if the original Parser fails.*/ def !!!(msg: String): Parser[A] + /** If an exception is thrown by the original Parser, * capture it and fail locally instead of allowing the exception to propagate up and terminate parsing.*/ def failOnException: Parser[A] @deprecated("Use `not` and explicitly provide the failure message", "0.12.2") def unary_- : Parser[Unit] + + /** Apply the original parser, but only succeed if `o` also succeeds. + * Note that `o` does not need to consume the same amount of input to satisfy this condition.*/ def & (o: Parser[_]): Parser[A] @deprecated("Use `and` and `not` and explicitly provide the failure message", "0.12.2") @@ -58,16 +83,23 @@ sealed trait RichParser[A] /** Explicitly defines the completions for the original Parser.*/ def examples(s: String*): Parser[A] + /** Explicitly defines the completions for the original Parser.*/ def examples(s: Set[String], check: Boolean = false): Parser[A] + /** Converts a Parser returning a Char sequence to a Parser returning a String.*/ def string(implicit ev: A <:< Seq[Char]): Parser[String] + /** Produces a Parser that filters the original parser. - * If 'f' is not true when applied to the output of the original parser, the Parser returned by this method fails.*/ + * If 'f' is not true when applied to the output of the original parser, the Parser returned by this method fails. + * The failure message is constructed by applying `msg` to the String that was successfully parsed by the original parser. */ def filter(f: A => Boolean, msg: String => String): Parser[A] + /** Applies the original parser, applies `f` to the result to get the next parser, and applies that parser and uses its result for the overall result. */ def flatMap[B](f: A => Parser[B]): Parser[B] } + +/** Contains Parser implementation helper methods not typically needed for using parsers. */ object Parser extends ParserMain { sealed abstract class Result[+T] { @@ -129,9 +161,11 @@ object Parser extends ParserMain val bad = completions.filter( apply(a)(_).resultEmpty.isFailure) if(!bad.isEmpty) sys.error("Invalid example completions: " + bad.mkString("'", "', '", "'")) } + def tuple[A,B](a: Option[A], b: Option[B]): Option[(A,B)] = (a,b) match { case (Some(av), Some(bv)) => Some((av, bv)); case _ => None } + def mapParser[A,B](a: Parser[A], f: A => B): Parser[B] = a.ifValid { a.result match @@ -227,6 +261,7 @@ object Parser extends ParserMain } trait ParserMain { + /** Provides combinators for Parsers.*/ implicit def richParser[A](a: Parser[A]): RichParser[A] = new RichParser[A] { def ~[B](b: Parser[B]) = seqParser(a, b) @@ -254,6 +289,7 @@ trait ParserMain def string(implicit ev: A <:< Seq[Char]): Parser[String] = map(_.mkString) def flatMap[B](f: A => Parser[B]) = bindParser(a, f) } + implicit def literalRichCharParser(c: Char): RichParser[Char] = richParser(c) implicit def literalRichStringParser(s: String): RichParser[String] = richParser(s) @@ -263,9 +299,16 @@ trait ParserMain * from the Parser constructed by the `softFailure` method. */ private[sbt] def softFailure(msg: => String, definitive: Boolean = false): Parser[Nothing] = SoftInvalid( mkFailures(msg :: Nil, definitive) ) - + + /** Defines a parser that always fails on any input with messages `msgs`. + * If `definitive` is `true`, any failures by later alternatives are discarded.*/ def invalid(msgs: => Seq[String], definitive: Boolean = false): Parser[Nothing] = Invalid(mkFailures(msgs, definitive)) + + /** Defines a parser that always fails on any input with message `msg`. + * If `definitive` is `true`, any failures by later alternatives are discarded.*/ def failure(msg: => String, definitive: Boolean = false): Parser[Nothing] = invalid(msg :: Nil, definitive) + + /** Defines a parser that always succeeds on empty input with the result `value`.*/ def success[T](value: T): Parser[T] = new ValidParser[T] { override def result = Some(value) def resultEmpty = Value(value) @@ -274,15 +317,22 @@ trait ParserMain override def toString = "success(" + value + ")" } + /** Presents a Char range as a Parser. A single Char is parsed only if it is in the given range.*/ implicit def range(r: collection.immutable.NumericRange[Char]): Parser[Char] = charClass(r contains _).examples(r.map(_.toString) : _*) + + /** Defines a Parser that parses a single character only if it is contained in `legal`.*/ def chars(legal: String): Parser[Char] = { val set = legal.toSet charClass(set, "character in '" + legal + "'") examples(set.map(_.toString)) } + + /** Defines a Parser that parses a single character only if the predicate `f` returns true for that character. + * If this parser fails, `label` is used as the failure message. */ def charClass(f: Char => Boolean, label: String = ""): Parser[Char] = new CharacterClass(f, label) + /** Presents a single Char `ch` as a Parser that only parses that exact character. */ implicit def literal(ch: Char): Parser[Char] = new ValidParser[Char] { def result = None def resultEmpty = mkFailure( "Expected '" + ch + "'" ) @@ -290,24 +340,44 @@ trait ParserMain def completions(level: Int) = Completions.single(Completion.suggestStrict(ch.toString)) override def toString = "'" + ch + "'" } + /** Presents a literal String `s` as a Parser that only parses that exact text and provides it as the result.*/ implicit def literal(s: String): Parser[String] = stringLiteral(s, 0) + + /** See [[unapply]]. */ object ~ { + /** Convenience for destructuring a tuple that mirrors the `~` combinator.*/ def unapply[A,B](t: (A,B)): Some[(A,B)] = Some(t) } + /** Parses input `str` using `parser`. If successful, the result is provided wrapped in `Right`. If unsuccesful, an error message is provided in `Left`.*/ def parse[T](str: String, parser: Parser[T]): Either[String, T] = Parser.result(parser, str).left.map { failures => val (msgs,pos) = failures() ProcessError(str, msgs, pos) } + /** Convenience method to use when developing a parser. + * `parser` is applied to the input `str`. + * If `completions` is true, the available completions for the input are displayed. + * Otherwise, the result of parsing is printed using the result's `toString` method. + * If parsing fails, the error message is displayed. + * + * See also [[sampleParse]] and [[sampleCompletions]]. */ def sample(str: String, parser: Parser[_], completions: Boolean = false): Unit = if(completions) sampleCompletions(str, parser) else sampleParse(str, parser) + + /** Convenience method to use when developing a parser. + * `parser` is applied to the input `str` and the result of parsing is printed using the result's `toString` method. + * If parsing fails, the error message is displayed. */ def sampleParse(str: String, parser: Parser[_]): Unit = parse(str, parser) match { case Left(msg) => println(msg) case Right(v) => println(v) } + + /** Convenience method to use when developing a parser. + * `parser` is applied to the input `str` and the available completions are displayed on separate lines. + * If parsing fails, the error message is displayed. */ def sampleCompletions(str: String, parser: Parser[_], level: Int = 1): Unit = Parser.completions(parser, str, level).get foreach println @@ -332,14 +402,21 @@ trait ParserMain loop(-1, p) } + /** Applies parser `p` to input `s`. */ def apply[T](p: Parser[T])(s: String): Parser[T] = (p /: s)(derive1) + /** Applies parser `p` to a single character of input. */ def derive1[T](p: Parser[T], c: Char): Parser[T] = if(p.valid) p.derive(c) else p - // The x Completions.empty removes any trailing token completions where append.isEmpty - def completions(p: Parser[_], s: String, level: Int): Completions = apply(p)(s).completions(level) x Completions.empty + /** Applies parser `p` to input `s` and returns the completions at verbosity `level`. + * The interpretation of `level` is up to parser definitions, but 0 is the default by convention, + * with increasing positive numbers corresponding to increasing verbosity. Typically no more than + * a few levels are defined. */ + def completions(p: Parser[_], s: String, level: Int): Completions = + // The x Completions.empty removes any trailing token completions where append.isEmpty + apply(p)(s).completions(level) x Completions.empty def examples[A](a: Parser[A], completions: Set[String], check: Boolean = false): Parser[A] = if(a.valid) { diff --git a/util/complete/src/main/scala/sbt/complete/Parsers.scala b/util/complete/src/main/scala/sbt/complete/Parsers.scala index c9184cfe8..6bc745285 100644 --- a/util/complete/src/main/scala/sbt/complete/Parsers.scala +++ b/util/complete/src/main/scala/sbt/complete/Parsers.scala @@ -8,29 +8,55 @@ package sbt.complete import java.net.URI import java.lang.Character.{getType, MATH_SYMBOL, OTHER_SYMBOL, DASH_PUNCTUATION, OTHER_PUNCTUATION, MODIFIER_SYMBOL, CURRENCY_SYMBOL} -// Some predefined parsers +/** Provides standard implementations of commonly useful [[Parser]]s. */ trait Parsers { + /** Matches the end of input, providing no useful result on success. */ lazy val EOF = not(any) + /** Parses any single character and provides that character as the result. */ lazy val any: Parser[Char] = charClass(_ => true, "any character") + /** Set that contains each digit in a String representation.*/ lazy val DigitSet = Set("0","1","2","3","4","5","6","7","8","9") + + /** Parses any single digit and provides that digit as a Char as the result.*/ lazy val Digit = charClass(_.isDigit, "digit") examples DigitSet + + /** Set containing Chars for hexadecimal digits 0-9 and A-F (but not a-f). */ lazy val HexDigitSet = Set('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F') + + /** Parses a single hexadecimal digit (0-9, a-f, A-F). */ lazy val HexDigit = charClass(c => HexDigitSet(c.toUpper), "hex digit") examples HexDigitSet.map(_.toString) + /** Parses a single letter, according to Char.isLetter, into a Char. */ lazy val Letter = charClass(_.isLetter, "letter") + + /** Parses the first Char in an sbt identifier, which must be a [[Letter]].*/ def IDStart = Letter + + /** Parses an identifier Char other than the first character. This includes letters, digits, dash `-`, and underscore `_`.*/ lazy val IDChar = charClass(isIDChar, "ID character") + + /** Parses an identifier String, which must start with [[IDStart]] and contain zero or more [[IDChar]]s after that. */ lazy val ID = identifier(IDStart, IDChar) + + /** Parses a single operator Char, as allowed by [[isOpChar]]. */ lazy val OpChar = charClass(isOpChar, "symbol") + + /** Parses a non-empty operator String, which consists only of characters allowed by [[OpChar]]. */ lazy val Op = OpChar.+.string + + /** Parses either an operator String defined by [[Op]] or a non-symbolic identifier defined by [[ID]]. */ lazy val OpOrID = ID | Op + /** Parses a single, non-symbolic Scala identifier Char. Valid characters are letters, digits, and the underscore character `_`. */ lazy val ScalaIDChar = charClass(isScalaIDChar, "Scala identifier character") + + /** Parses a non-symbolic Scala-like identifier. The identifier must start with [[IDStart]] and contain zero or more [[ScalaIDChar]]s after that.*/ lazy val ScalaID = identifier(IDStart, ScalaIDChar) + /** Parses a String that starts with `start` and is followed by zero or more characters parsed by `rep`.*/ def identifier(start: Parser[Char], rep: Parser[Char]): Parser[String] = start ~ rep.* map { case x ~ xs => (x +: xs).mkString } @@ -42,67 +68,143 @@ trait Parsers else any + /** Returns true if `c` an operator character. */ def isOpChar(c: Char) = !isDelimiter(c) && isOpType(getType(c)) def isOpType(cat: Int) = cat match { case MATH_SYMBOL | OTHER_SYMBOL | DASH_PUNCTUATION | OTHER_PUNCTUATION | MODIFIER_SYMBOL | CURRENCY_SYMBOL => true; case _ => false } + /** Returns true if `c` is a dash `-`, a letter, digit, or an underscore `_`. */ def isIDChar(c: Char) = isScalaIDChar(c) || c == '-' + + /** Returns true if `c` is a letter, digit, or an underscore `_`. */ def isScalaIDChar(c: Char) = c.isLetterOrDigit || c == '_' + def isDelimiter(c: Char) = c match { case '`' | '\'' | '\"' | /*';' | */',' | '.' => true ; case _ => false } + /** Matches a single character that is not a whitespace character. */ lazy val NotSpaceClass = charClass(!_.isWhitespace, "non-whitespace character") + + /** Matches a single whitespace character, as determined by Char.isWhitespace.*/ lazy val SpaceClass = charClass(_.isWhitespace, "whitespace character") + + /** Matches a non-empty String consisting of non-whitespace characters. */ lazy val NotSpace = NotSpaceClass.+.string + + /** Matches a possibly empty String consisting of non-whitespace characters. */ lazy val OptNotSpace = NotSpaceClass.*.string + + /** Matches a non-empty String consisting of whitespace characters. + * The suggested tab completion is a single, constant space character.*/ lazy val Space = SpaceClass.+.examples(" ") + + /** Matches a possibly empty String consisting of whitespace characters. + * The suggested tab completion is a single, constant space character.*/ lazy val OptSpace = SpaceClass.*.examples(" ") + + /** Parses a non-empty String that contains only valid URI characters, as defined by [[URIChar]].*/ lazy val URIClass = URIChar.+.string !!! "Invalid URI" + + /** Triple-quotes, as used for verbatim quoting.*/ lazy val VerbatimDQuotes = "\"\"\"" + + /** Double quote character. */ lazy val DQuoteChar = '\"' + + /** Backslash character. */ lazy val BackslashChar = '\\' + + /** Matches a single double quote. */ lazy val DQuoteClass = charClass(_ == DQuoteChar, "double-quote character") + + /** Matches any character except a double quote or whitespace. */ lazy val NotDQuoteSpaceClass = charClass({ c: Char => (c != DQuoteChar) && !c.isWhitespace }, "non-double-quote-space character") + + /** Matches any character except a double quote or backslash. */ lazy val NotDQuoteBackslashClass = charClass({ c: Char => (c != DQuoteChar) && (c != BackslashChar) }, "non-double-quote-backslash character") + /** Matches a single character that is valid somewhere in a URI. */ lazy val URIChar = charClass(alphanum) | chars("_-!.~'()*,;:$&+=?/[]@%#") + + /** Returns true if `c` is an ASCII letter or digit. */ def alphanum(c: Char) = ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') // TODO: implement def fileParser(base: File): Parser[File] = token(mapOrFail(NotSpace)(s => new File(s.mkString)), "") - + + /** Parses a port number. Currently, this accepts any integer and presents a tab completion suggestion of ``. */ lazy val Port = token(IntBasic, "") + + /** Parses a signed integer. */ lazy val IntBasic = mapOrFail( '-'.? ~ Digit.+ )( Function.tupled(toInt) ) + + /** Parses an unsigned integer. */ lazy val NatBasic = mapOrFail( Digit.+ )( _.mkString.toInt ) + private[this] def toInt(neg: Option[Char], digits: Seq[Char]): Int = (neg.toSeq ++ digits).mkString.toInt + + /** Parses the lower-case values `true` and `false` into their respesct Boolean values. */ lazy val Bool = ("true" ^^^ true) | ("false" ^^^ false) + + /** Parses a potentially quoted String value. The value may be verbatim quoted ([[StringVerbatim]]), + * quoted with interpreted escapes ([[StringEscapable]]), or unquoted ([[NotQuoted]]). */ lazy val StringBasic = StringVerbatim | StringEscapable | NotQuoted - lazy val StringVerbatim: Parser[String] = VerbatimDQuotes ~> - any.+.string.filter(!_.contains(VerbatimDQuotes), _ => "Invalid verbatim string") <~ - VerbatimDQuotes + + /** Parses a verbatim quoted String value, discarding the quotes in the result. This kind of quoted text starts with triple quotes `"""` + * and ends at the next triple quotes and may contain any character in between. */ + lazy val StringVerbatim: Parser[String] = VerbatimDQuotes ~> + any.+.string.filter(!_.contains(VerbatimDQuotes), _ => "Invalid verbatim string") <~ + VerbatimDQuotes + + /** Parses a string value, interpreting escapes and discarding the surrounding quotes in the result. + * See [[EscapeSequence]] for supported escapes. */ lazy val StringEscapable: Parser[String] = (DQuoteChar ~> (NotDQuoteBackslashClass | EscapeSequence).+.string <~ DQuoteChar | (DQuoteChar ~ DQuoteChar) ^^^ "") + + /** Parses a single escape sequence into the represented Char. + * Escapes start with a backslash and are followed by `u` for a [[UnicodeEscape]] or by `b`, `t`, `n`, `f`, `r`, `"`, `'`, `\` for standard escapes. */ lazy val EscapeSequence: Parser[Char] = BackslashChar ~> ('b' ^^^ '\b' | 't' ^^^ '\t' | 'n' ^^^ '\n' | 'f' ^^^ '\f' | 'r' ^^^ '\r' | '\"' ^^^ '\"' | '\'' ^^^ '\'' | '\\' ^^^ '\\' | UnicodeEscape) + + /** Parses a single unicode escape sequence into the represented Char. + * A unicode escape begins with a backslash, followed by a `u` and 4 hexadecimal digits representing the unicode value. */ lazy val UnicodeEscape: Parser[Char] = ("u" ~> repeat(HexDigit, 4, 4)) map { seq => Integer.parseInt(seq.mkString, 16).toChar } + + /** Parses an unquoted, non-empty String value that cannot start with a double quote and cannot contain whitespace.*/ lazy val NotQuoted = (NotDQuoteSpaceClass ~ OptNotSpace) map { case (c, s) => c.toString + s } + /** Applies `rep` zero or more times, separated by `sep`. + * The result is the (possibly empty) sequence of results from the multiple `rep` applications. The `sep` results are discarded. */ def repsep[T](rep: Parser[T], sep: Parser[_]): Parser[Seq[T]] = rep1sep(rep, sep) ?? Nil + + /** Applies `rep` one or more times, separated by `sep`. + * The result is the non-empty sequence of results from the multiple `rep` applications. The `sep` results are discarded. */ def rep1sep[T](rep: Parser[T], sep: Parser[_]): Parser[Seq[T]] = (rep ~ (sep ~> rep).*).map { case (x ~ xs) => x +: xs } + /** Wraps the result of `p` in `Some`.*/ def some[T](p: Parser[T]): Parser[Option[T]] = p map { v => Some(v) } + + /** Applies `f` to the result of `p`, transforming any exception when evaluating + * `f` into a parse failure with the exception `toString` as the message.*/ def mapOrFail[S,T](p: Parser[S])(f: S => T): Parser[T] = p flatMap { s => try { success(f(s)) } catch { case e: Exception => failure(e.toString) } } + /** Parses a space-delimited, possibly empty sequence of arguments. + * The arguments may use quotes and escapes according to [[StringBasic]]. */ def spaceDelimited(display: String): Parser[Seq[String]] = (token(Space) ~> token(StringBasic, display)).* <~ SpaceClass.* + /** Applies `p` and uses `true` as the result if it succeeds and turns failure into a result of `false`. */ def flag[T](p: Parser[T]): Parser[Boolean] = (p ^^^ true) ?? false + /** Defines a sequence parser where the parser used for each part depends on the previously parsed values. + * `p` is applied to the (possibly empty) sequence of already parsed values to obtain the next parser to use. + * The parsers obtained in this way are separated by `sep`, whose result is discarded and only the sequence + * of values from the parsers returned by `p` is used for the result. */ def repeatDep[A](p: Seq[A] => Parser[A], sep: Parser[Any]): Parser[Seq[A]] = { def loop(acc: Seq[A]): Parser[Seq[A]] = { @@ -112,14 +214,26 @@ trait Parsers p(Vector()) flatMap { first => loop(Seq(first)) } } + /** Applies String.trim to the result of `p`. */ def trimmed(p: Parser[String]) = p map { _.trim } + + /** Parses a URI that is valid according to the single argument java.net.URI constructor. */ lazy val basicUri = mapOrFail(URIClass)( uri => new URI(uri)) + + /** Parses a URI that is valid according to the single argument java.net.URI constructor, using `ex` as tab completion examples. */ def Uri(ex: Set[URI]) = basicUri examples(ex.map(_.toString)) } + +/** Provides standard [[Parser]] implementations. */ object Parsers extends Parsers + +/** Provides common [[Parser]] implementations and helper methods.*/ object DefaultParsers extends Parsers with ParserMain { + /** Applies parser `p` to input `s` and returns `true` if the parse was successful. */ def matches(p: Parser[_], s: String): Boolean = apply(p)(s).resultEmpty.isValid + + /** Returns `true` if `s` parses successfully according to [[ID]].*/ def validID(s: String): Boolean = matches(ID, s) } \ No newline at end of file