Decode utf-8 bytes in jline 3 reader

With sbt 1.4.x, non-ascii utf-8 characters are not handled correctly in
the console. It was not clear from the jline 3 documentation but the
NonBlockingReader.read method is supposed to return unicode points
rather than utf8 bytes. To fix this, we can decode the input and return
the code point rather than the directy byte from the input stream.
This commit is contained in:
Ethan Atkins 2020-11-17 16:11:22 -08:00
parent c52e9916e2
commit e00240a73b
2 changed files with 66 additions and 5 deletions

View File

@ -8,7 +8,8 @@
package sbt.internal.util package sbt.internal.util
import java.io.{ InputStream, OutputStream, PrintWriter } import java.io.{ InputStream, OutputStream, PrintWriter }
import java.nio.charset.Charset import java.nio.ByteBuffer
import java.nio.charset.{ CharacterCodingException, Charset, CharsetDecoder }
import java.util.{ Arrays, EnumSet } import java.util.{ Arrays, EnumSet }
import java.util.concurrent.atomic.{ AtomicBoolean, AtomicReference } import java.util.concurrent.atomic.{ AtomicBoolean, AtomicReference }
import org.jline.utils.InfoCmp.Capability import org.jline.utils.InfoCmp.Capability
@ -77,6 +78,27 @@ private[sbt] object JLine3 {
new DumbTerminal(term.inputStream, term.outputStream) new DumbTerminal(term.inputStream, term.outputStream)
else wrapTerminal(term) else wrapTerminal(term)
} }
private[util] def decodeInput(decoder: CharsetDecoder, inputStream: InputStream): Int = {
val bytes = new Array[Byte](4)
var i = 0
var res = -2
do {
inputStream.read() match {
case -1 => res = -1
case byte =>
bytes(i) = byte.toByte
i += 1
val bb = ByteBuffer.wrap(bytes, 0, i)
try {
val cb = decoder.decode(bb)
val it = cb.codePoints().iterator
if (it.hasNext) res = it.next
} catch { case _: CharacterCodingException => }
}
} while (i < 4 && res == -2)
res
}
private[this] def wrapTerminal(term: Terminal): JTerminal = { private[this] def wrapTerminal(term: Terminal): JTerminal = {
new AbstractTerminal( new AbstractTerminal(
term.name, term.name,
@ -140,10 +162,8 @@ private[sbt] object JLine3 {
val thread = new AtomicReference[Thread] val thread = new AtomicReference[Thread]
private def fillBuffer(): Unit = thread.synchronized { private def fillBuffer(): Unit = thread.synchronized {
thread.set(Thread.currentThread) thread.set(Thread.currentThread)
buffer.put( try buffer.put(decodeInput(encoding.newDecoder, term.inputStream))
try input.read() catch { case _: InterruptedException => buffer.put(-3) }
catch { case _: InterruptedException => -3 }
)
} }
override def close(): Unit = thread.get match { override def close(): Unit = thread.get match {
case null => case null =>

View File

@ -0,0 +1,41 @@
/*
* sbt
* Copyright 2011 - 2018, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*/
package sbt.internal.util
import java.io.InputStream
import java.nio.charset.Charset
import org.scalatest.FlatSpec
import java.util.concurrent.LinkedBlockingQueue
class UTF8DecoderSpec extends FlatSpec {
val decoder = Charset.forName("UTF-8").newDecoder
"ascii characters" should "not be modified" in {
val inputStream = new InputStream {
override def read(): Int = 'c'.toInt
}
assert(JLine3.decodeInput(decoder, inputStream) == 'c'.toInt)
}
"swedish characters" should "be handled" in {
val bytes = new LinkedBlockingQueue[Int]
// these are the utf-8 codes for an umlauted a in swedish
Seq(195, 164).foreach(b => bytes.put(b))
val inputStream = new InputStream {
override def read(): Int = Option(bytes.poll).getOrElse(-1)
}
assert(JLine3.decodeInput(decoder, inputStream) == 228)
}
"emoji" should "be handled" in {
val bytes = new LinkedBlockingQueue[Int]
// laughing and crying emoji in utf8
Seq(0xF0, 0x9F, 0x98, 0x82).foreach(b => bytes.put(b))
val inputStream = new InputStream {
override def read(): Int = Option(bytes.poll).getOrElse(-1)
}
assert(JLine3.decodeInput(decoder, inputStream) == 128514)
}
}