Merge pull request #396 from eed3si9n/string_literals

Parser for escaped and verbatim string literals
This commit is contained in:
Mark Harrah 2012-03-11 12:15:20 -07:00
commit 38941a4cfd
1 changed files with 24 additions and 1 deletions

View File

@ -17,6 +17,8 @@ trait Parsers
lazy val DigitSet = Set("0","1","2","3","4","5","6","7","8","9")
lazy val Digit = charClass(_.isDigit, "digit") examples DigitSet
lazy val HexDigitSet = Set('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F')
lazy val HexDigit = charClass(HexDigitSet, "hex") examples HexDigitSet.map(_.toString)
lazy val Letter = charClass(_.isLetter, "letter")
def IDStart = Letter
lazy val IDChar = charClass(isIDChar, "ID character")
@ -44,6 +46,14 @@ trait Parsers
lazy val Space = SpaceClass.+.examples(" ")
lazy val OptSpace = SpaceClass.*.examples(" ")
lazy val URIClass = URIChar.+.string !!! "Invalid URI"
lazy val VerbatimDQuotes = "\"\"\""
lazy val DQuoteChar = '\"'
lazy val BackslashChar = '\\'
lazy val DQuoteClass = charClass(_ == DQuoteChar, "double-quote character")
lazy val NotDQuoteSpaceClass =
charClass({ c: Char => (c != DQuoteChar) && !c.isWhitespace }, "non-double-quote-space character")
lazy val NotDQuoteBackslashClass =
charClass({ c: Char => (c != DQuoteChar) && (c != BackslashChar) }, "non-double-quote-backslash character")
lazy val URIChar = charClass(alphanum) | chars("_-!.~'()*,;:$&+=?/[]@%#")
def alphanum(c: Char) = ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')
@ -57,6 +67,19 @@ trait Parsers
private[this] def toInt(neg: Option[Char], digits: Seq[Char]): Int =
(neg.toSeq ++ digits).mkString.toInt
lazy val Bool = ("true" ^^^ true) | ("false" ^^^ false)
lazy val StringBasic = StringVerbatim | StringEscapable | NotQuoted
lazy val StringVerbatim: Parser[String] = VerbatimDQuotes ~>
any.+.string.filter(!_.contains(VerbatimDQuotes), _ => "Invalid verbatim string") <~
VerbatimDQuotes
lazy val StringEscapable: Parser[String] =
(DQuoteChar ~> (NotDQuoteBackslashClass | EscapeSequence).+.string <~ DQuoteChar |
(DQuoteChar ~ DQuoteChar) ^^^ "")
lazy val EscapeSequence: Parser[Char] =
BackslashChar ~> ('b' ^^^ '\b' | 't' ^^^ '\t' | 'n' ^^^ '\n' | 'f' ^^^ '\f' | 'r' ^^^ '\r' |
'\"' ^^^ '\"' | '\'' ^^^ '\'' | '\\' ^^^ '\\' | UnicodeEscape)
lazy val UnicodeEscape: Parser[Char] =
("u" ~> repeat(HexDigit, 4, 4)) map { seq => Integer.parseInt(seq.mkString, 16).toChar }
lazy val NotQuoted = (NotDQuoteSpaceClass ~ NotSpace) map { case (c, s) => c.toString + s }
def repsep[T](rep: Parser[T], sep: Parser[_]): Parser[Seq[T]] =
rep1sep(rep, sep) ?? Nil
@ -67,7 +90,7 @@ trait Parsers
def mapOrFail[S,T](p: Parser[S])(f: S => T): Parser[T] =
p flatMap { s => try { success(f(s)) } catch { case e: Exception => failure(e.toString) } }
def spaceDelimited(display: String): Parser[Seq[String]] = (token(Space) ~> token(NotSpace, display)).* <~ SpaceClass.*
def spaceDelimited(display: String): Parser[Seq[String]] = (token(Space) ~> token(StringBasic, display)).* <~ SpaceClass.*
def flag[T](p: Parser[T]): Parser[Boolean] = (p ^^^ true) ?? false