From 8a7a3228e86441d21a387a4b8da182f46e4faf36 Mon Sep 17 00:00:00 2001 From: "e.e d3si9n" Date: Sun, 11 Mar 2012 07:31:39 -0400 Subject: [PATCH 1/4] implemented parser for escaped string and verbatim string --- util/complete/Parsers.scala | 45 ++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/util/complete/Parsers.scala b/util/complete/Parsers.scala index 367f2fadb..43c4d5799 100644 --- a/util/complete/Parsers.scala +++ b/util/complete/Parsers.scala @@ -17,6 +17,10 @@ trait Parsers lazy val DigitSet = Set("0","1","2","3","4","5","6","7","8","9") lazy val Digit = charClass(_.isDigit, "digit") examples DigitSet + lazy val OctalDigitSet = Set("0","1","2","3","4","5","6","7") + lazy val OctalDigit = charClass(c => OctalDigitSet(c.toString), "octal") examples OctalDigitSet + lazy val HexDigitSet = Set("0","1","2","3","4","5","6","7","8","9", "A", "B", "C", "D", "E", "F") + lazy val HexDigit = charClass(c => HexDigitSet(c.toString.toUpperCase), "hex") examples HexDigitSet lazy val Letter = charClass(_.isLetter, "letter") def IDStart = Letter lazy val IDChar = charClass(isIDChar, "ID character") @@ -44,6 +48,12 @@ trait Parsers lazy val Space = SpaceClass.+.examples(" ") lazy val OptSpace = SpaceClass.*.examples(" ") lazy val URIClass = URIChar.+.string !!! "Invalid URI" + lazy val VerbatimDQuotes = "\"\"\"" + lazy val DQuoteChar = '\"' + lazy val DQuoteClass = charClass(_ == DQuoteChar, "double-quote character") + lazy val NotDQuoteClass = charClass(_ != DQuoteChar, "non-double-quote character") + lazy val NotDQuoteBackslashClass = charClass({ c: Char => + c != DQuoteChar && c != '\\' }, "non-double-quote character") lazy val URIChar = charClass(alphanum) | chars("_-!.~'()*,;:$&+=?/[]@%#") def alphanum(c: Char) = ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') @@ -57,6 +67,39 @@ trait Parsers private[this] def toInt(neg: Option[Char], digits: Seq[Char]): Int = (neg.toSeq ++ digits).mkString.toInt lazy val Bool = ("true" ^^^ true) | ("false" ^^^ false) + lazy val StringBasic = StringVerbatim | StringEscapable | NotQuoted + def StringVerbatim: Parser[String] = { + var dqcount = 0 + val p = VerbatimDQuotes ~ + charClass(_ match { + case DQuoteChar => + dqcount += 1 + dqcount < 3 + case _ => + dqcount = 0 + true + }).*.string ~ DQuoteChar + p map { case ((s, p), c) => s + p + c.toString } filter( + { _.endsWith(VerbatimDQuotes) }, _ => "Expected '%s'" format VerbatimDQuotes) map { s => + s.substring(3, s.length - 3) } + } + lazy val StringEscapable: Parser[String] = { + val p = DQuoteChar ~> + (EscapeSequence | NotDQuoteBackslashClass map {_.toString}).* <~ DQuoteChar + p map { _.mkString } + } + lazy val EscapeSequence: Parser[String] = + "\\" ~> ("b" ^^^ "\b" | "t" ^^^ "\t" | "n" ^^^ "\n" | "f" ^^^ "\f" | "r" ^^^ "\r" | + "\"" ^^^ "\"" | "'" ^^^ "\'" | "\\" ^^^ "\\" | OctalEscape | UnicodeEscape) + lazy val OctalEscape: Parser[String] = + repeat(OctalDigit, 1, 3) map { seq => + Integer.parseInt(seq.mkString, 8).asInstanceOf[Char].toString + } + lazy val UnicodeEscape: Parser[String] = + ("u" ~> repeat(HexDigit, 4, 4)) map { seq => + Integer.parseInt(seq.mkString, 16).asInstanceOf[Char].toString + } + lazy val NotQuoted = (NotDQuoteClass ~ NotSpace) map { case (c, s) => c.toString + s } def repsep[T](rep: Parser[T], sep: Parser[_]): Parser[Seq[T]] = rep1sep(rep, sep) ?? Nil @@ -67,7 +110,7 @@ trait Parsers def mapOrFail[S,T](p: Parser[S])(f: S => T): Parser[T] = p flatMap { s => try { success(f(s)) } catch { case e: Exception => failure(e.toString) } } - def spaceDelimited(display: String): Parser[Seq[String]] = (token(Space) ~> token(NotSpace, display)).* <~ SpaceClass.* + def spaceDelimited(display: String): Parser[Seq[String]] = (token(Space) ~> token(StringBasic, display)).* <~ SpaceClass.* def flag[T](p: Parser[T]): Parser[Boolean] = (p ^^^ true) ?? false From e6e778a1a31a17aefb6553d7562d36cd1c587bc6 Mon Sep 17 00:00:00 2001 From: "e.e d3si9n" Date: Sun, 11 Mar 2012 13:12:23 -0400 Subject: [PATCH 2/4] removed Octal --- util/complete/Parsers.scala | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/util/complete/Parsers.scala b/util/complete/Parsers.scala index 43c4d5799..226bd9b0b 100644 --- a/util/complete/Parsers.scala +++ b/util/complete/Parsers.scala @@ -17,10 +17,8 @@ trait Parsers lazy val DigitSet = Set("0","1","2","3","4","5","6","7","8","9") lazy val Digit = charClass(_.isDigit, "digit") examples DigitSet - lazy val OctalDigitSet = Set("0","1","2","3","4","5","6","7") - lazy val OctalDigit = charClass(c => OctalDigitSet(c.toString), "octal") examples OctalDigitSet - lazy val HexDigitSet = Set("0","1","2","3","4","5","6","7","8","9", "A", "B", "C", "D", "E", "F") - lazy val HexDigit = charClass(c => HexDigitSet(c.toString.toUpperCase), "hex") examples HexDigitSet + lazy val HexDigitSet = Set('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F') + lazy val HexDigit = charClass(HexDigitSet, "hex") examples HexDigitSet.map(_.toString) lazy val Letter = charClass(_.isLetter, "letter") def IDStart = Letter lazy val IDChar = charClass(isIDChar, "ID character") @@ -90,11 +88,7 @@ trait Parsers } lazy val EscapeSequence: Parser[String] = "\\" ~> ("b" ^^^ "\b" | "t" ^^^ "\t" | "n" ^^^ "\n" | "f" ^^^ "\f" | "r" ^^^ "\r" | - "\"" ^^^ "\"" | "'" ^^^ "\'" | "\\" ^^^ "\\" | OctalEscape | UnicodeEscape) - lazy val OctalEscape: Parser[String] = - repeat(OctalDigit, 1, 3) map { seq => - Integer.parseInt(seq.mkString, 8).asInstanceOf[Char].toString - } + "\"" ^^^ "\"" | "'" ^^^ "\'" | "\\" ^^^ "\\" | UnicodeEscape) lazy val UnicodeEscape: Parser[String] = ("u" ~> repeat(HexDigit, 4, 4)) map { seq => Integer.parseInt(seq.mkString, 16).asInstanceOf[Char].toString From feb315b878ff45761d62678c3cb9ad5d2d980077 Mon Sep 17 00:00:00 2001 From: "e.e d3si9n" Date: Sun, 11 Mar 2012 13:19:13 -0400 Subject: [PATCH 3/4] StringVerbatim is now stateless --- util/complete/Parsers.scala | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/util/complete/Parsers.scala b/util/complete/Parsers.scala index 226bd9b0b..9413a728e 100644 --- a/util/complete/Parsers.scala +++ b/util/complete/Parsers.scala @@ -66,21 +66,9 @@ trait Parsers (neg.toSeq ++ digits).mkString.toInt lazy val Bool = ("true" ^^^ true) | ("false" ^^^ false) lazy val StringBasic = StringVerbatim | StringEscapable | NotQuoted - def StringVerbatim: Parser[String] = { - var dqcount = 0 - val p = VerbatimDQuotes ~ - charClass(_ match { - case DQuoteChar => - dqcount += 1 - dqcount < 3 - case _ => - dqcount = 0 - true - }).*.string ~ DQuoteChar - p map { case ((s, p), c) => s + p + c.toString } filter( - { _.endsWith(VerbatimDQuotes) }, _ => "Expected '%s'" format VerbatimDQuotes) map { s => - s.substring(3, s.length - 3) } - } + lazy val StringVerbatim: Parser[String] = VerbatimDQuotes ~> + any.+.string.filter(!_.contains(VerbatimDQuotes), _ => "Invalid verbatim string") <~ + VerbatimDQuotes lazy val StringEscapable: Parser[String] = { val p = DQuoteChar ~> (EscapeSequence | NotDQuoteBackslashClass map {_.toString}).* <~ DQuoteChar From 9239e2fd464e72bc6454076b251871a4fdc91fce Mon Sep 17 00:00:00 2001 From: "e.e d3si9n" Date: Sun, 11 Mar 2012 15:02:50 -0400 Subject: [PATCH 4/4] fixes NotQuoted --- util/complete/Parsers.scala | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/util/complete/Parsers.scala b/util/complete/Parsers.scala index 9413a728e..7a84a13a1 100644 --- a/util/complete/Parsers.scala +++ b/util/complete/Parsers.scala @@ -48,10 +48,12 @@ trait Parsers lazy val URIClass = URIChar.+.string !!! "Invalid URI" lazy val VerbatimDQuotes = "\"\"\"" lazy val DQuoteChar = '\"' + lazy val BackslashChar = '\\' lazy val DQuoteClass = charClass(_ == DQuoteChar, "double-quote character") - lazy val NotDQuoteClass = charClass(_ != DQuoteChar, "non-double-quote character") - lazy val NotDQuoteBackslashClass = charClass({ c: Char => - c != DQuoteChar && c != '\\' }, "non-double-quote character") + lazy val NotDQuoteSpaceClass = + charClass({ c: Char => (c != DQuoteChar) && !c.isWhitespace }, "non-double-quote-space character") + lazy val NotDQuoteBackslashClass = + charClass({ c: Char => (c != DQuoteChar) && (c != BackslashChar) }, "non-double-quote-backslash character") lazy val URIChar = charClass(alphanum) | chars("_-!.~'()*,;:$&+=?/[]@%#") def alphanum(c: Char) = ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') @@ -69,19 +71,15 @@ trait Parsers lazy val StringVerbatim: Parser[String] = VerbatimDQuotes ~> any.+.string.filter(!_.contains(VerbatimDQuotes), _ => "Invalid verbatim string") <~ VerbatimDQuotes - lazy val StringEscapable: Parser[String] = { - val p = DQuoteChar ~> - (EscapeSequence | NotDQuoteBackslashClass map {_.toString}).* <~ DQuoteChar - p map { _.mkString } - } - lazy val EscapeSequence: Parser[String] = - "\\" ~> ("b" ^^^ "\b" | "t" ^^^ "\t" | "n" ^^^ "\n" | "f" ^^^ "\f" | "r" ^^^ "\r" | - "\"" ^^^ "\"" | "'" ^^^ "\'" | "\\" ^^^ "\\" | UnicodeEscape) - lazy val UnicodeEscape: Parser[String] = - ("u" ~> repeat(HexDigit, 4, 4)) map { seq => - Integer.parseInt(seq.mkString, 16).asInstanceOf[Char].toString - } - lazy val NotQuoted = (NotDQuoteClass ~ NotSpace) map { case (c, s) => c.toString + s } + lazy val StringEscapable: Parser[String] = + (DQuoteChar ~> (NotDQuoteBackslashClass | EscapeSequence).+.string <~ DQuoteChar | + (DQuoteChar ~ DQuoteChar) ^^^ "") + lazy val EscapeSequence: Parser[Char] = + BackslashChar ~> ('b' ^^^ '\b' | 't' ^^^ '\t' | 'n' ^^^ '\n' | 'f' ^^^ '\f' | 'r' ^^^ '\r' | + '\"' ^^^ '\"' | '\'' ^^^ '\'' | '\\' ^^^ '\\' | UnicodeEscape) + lazy val UnicodeEscape: Parser[Char] = + ("u" ~> repeat(HexDigit, 4, 4)) map { seq => Integer.parseInt(seq.mkString, 16).toChar } + lazy val NotQuoted = (NotDQuoteSpaceClass ~ NotSpace) map { case (c, s) => c.toString + s } def repsep[T](rep: Parser[T], sep: Parser[_]): Parser[Seq[T]] = rep1sep(rep, sep) ?? Nil