Decode utf-8 bytes in jline 3 reader

With sbt 1.4.x, non-ascii utf-8 characters are not handled correctly in
the console. It was not clear from the jline 3 documentation but the
NonBlockingReader.read method is supposed to return unicode points
rather than utf8 bytes. To fix this, we can decode the input and return
the code point rather than the directy byte from the input stream.
This commit is contained in:
Ethan Atkins 2020-11-17 16:11:22 -08:00
parent c52e9916e2
commit e00240a73b
2 changed files with 66 additions and 5 deletions

View File

@ -8,7 +8,8 @@
package sbt.internal.util
import java.io.{ InputStream, OutputStream, PrintWriter }
import java.nio.charset.Charset
import java.nio.ByteBuffer
import java.nio.charset.{ CharacterCodingException, Charset, CharsetDecoder }
import java.util.{ Arrays, EnumSet }
import java.util.concurrent.atomic.{ AtomicBoolean, AtomicReference }
import org.jline.utils.InfoCmp.Capability
@ -77,6 +78,27 @@ private[sbt] object JLine3 {
new DumbTerminal(term.inputStream, term.outputStream)
else wrapTerminal(term)
}
private[util] def decodeInput(decoder: CharsetDecoder, inputStream: InputStream): Int = {
val bytes = new Array[Byte](4)
var i = 0
var res = -2
do {
inputStream.read() match {
case -1 => res = -1
case byte =>
bytes(i) = byte.toByte
i += 1
val bb = ByteBuffer.wrap(bytes, 0, i)
try {
val cb = decoder.decode(bb)
val it = cb.codePoints().iterator
if (it.hasNext) res = it.next
} catch { case _: CharacterCodingException => }
}
} while (i < 4 && res == -2)
res
}
private[this] def wrapTerminal(term: Terminal): JTerminal = {
new AbstractTerminal(
term.name,
@ -140,10 +162,8 @@ private[sbt] object JLine3 {
val thread = new AtomicReference[Thread]
private def fillBuffer(): Unit = thread.synchronized {
thread.set(Thread.currentThread)
buffer.put(
try input.read()
catch { case _: InterruptedException => -3 }
)
try buffer.put(decodeInput(encoding.newDecoder, term.inputStream))
catch { case _: InterruptedException => buffer.put(-3) }
}
override def close(): Unit = thread.get match {
case null =>

View File

@ -0,0 +1,41 @@
/*
* sbt
* Copyright 2011 - 2018, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*/
package sbt.internal.util
import java.io.InputStream
import java.nio.charset.Charset
import org.scalatest.FlatSpec
import java.util.concurrent.LinkedBlockingQueue
class UTF8DecoderSpec extends FlatSpec {
val decoder = Charset.forName("UTF-8").newDecoder
"ascii characters" should "not be modified" in {
val inputStream = new InputStream {
override def read(): Int = 'c'.toInt
}
assert(JLine3.decodeInput(decoder, inputStream) == 'c'.toInt)
}
"swedish characters" should "be handled" in {
val bytes = new LinkedBlockingQueue[Int]
// these are the utf-8 codes for an umlauted a in swedish
Seq(195, 164).foreach(b => bytes.put(b))
val inputStream = new InputStream {
override def read(): Int = Option(bytes.poll).getOrElse(-1)
}
assert(JLine3.decodeInput(decoder, inputStream) == 228)
}
"emoji" should "be handled" in {
val bytes = new LinkedBlockingQueue[Int]
// laughing and crying emoji in utf8
Seq(0xF0, 0x9F, 0x98, 0x82).foreach(b => bytes.put(b))
val inputStream = new InputStream {
override def read(): Int = Option(bytes.poll).getOrElse(-1)
}
assert(JLine3.decodeInput(decoder, inputStream) == 128514)
}
}