mirror of https://github.com/sbt/sbt.git
Decode utf-8 bytes in jline 3 reader
With sbt 1.4.x, non-ascii utf-8 characters are not handled correctly in the console. It was not clear from the jline 3 documentation but the NonBlockingReader.read method is supposed to return unicode points rather than utf8 bytes. To fix this, we can decode the input and return the code point rather than the directy byte from the input stream.
This commit is contained in:
parent
c52e9916e2
commit
e00240a73b
|
|
@ -8,7 +8,8 @@
|
|||
package sbt.internal.util
|
||||
|
||||
import java.io.{ InputStream, OutputStream, PrintWriter }
|
||||
import java.nio.charset.Charset
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.charset.{ CharacterCodingException, Charset, CharsetDecoder }
|
||||
import java.util.{ Arrays, EnumSet }
|
||||
import java.util.concurrent.atomic.{ AtomicBoolean, AtomicReference }
|
||||
import org.jline.utils.InfoCmp.Capability
|
||||
|
|
@ -77,6 +78,27 @@ private[sbt] object JLine3 {
|
|||
new DumbTerminal(term.inputStream, term.outputStream)
|
||||
else wrapTerminal(term)
|
||||
}
|
||||
private[util] def decodeInput(decoder: CharsetDecoder, inputStream: InputStream): Int = {
|
||||
val bytes = new Array[Byte](4)
|
||||
var i = 0
|
||||
var res = -2
|
||||
do {
|
||||
inputStream.read() match {
|
||||
case -1 => res = -1
|
||||
case byte =>
|
||||
bytes(i) = byte.toByte
|
||||
i += 1
|
||||
val bb = ByteBuffer.wrap(bytes, 0, i)
|
||||
try {
|
||||
val cb = decoder.decode(bb)
|
||||
val it = cb.codePoints().iterator
|
||||
if (it.hasNext) res = it.next
|
||||
} catch { case _: CharacterCodingException => }
|
||||
}
|
||||
|
||||
} while (i < 4 && res == -2)
|
||||
res
|
||||
}
|
||||
private[this] def wrapTerminal(term: Terminal): JTerminal = {
|
||||
new AbstractTerminal(
|
||||
term.name,
|
||||
|
|
@ -140,10 +162,8 @@ private[sbt] object JLine3 {
|
|||
val thread = new AtomicReference[Thread]
|
||||
private def fillBuffer(): Unit = thread.synchronized {
|
||||
thread.set(Thread.currentThread)
|
||||
buffer.put(
|
||||
try input.read()
|
||||
catch { case _: InterruptedException => -3 }
|
||||
)
|
||||
try buffer.put(decodeInput(encoding.newDecoder, term.inputStream))
|
||||
catch { case _: InterruptedException => buffer.put(-3) }
|
||||
}
|
||||
override def close(): Unit = thread.get match {
|
||||
case null =>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* sbt
|
||||
* Copyright 2011 - 2018, Lightbend, Inc.
|
||||
* Copyright 2008 - 2010, Mark Harrah
|
||||
* Licensed under Apache License 2.0 (see LICENSE)
|
||||
*/
|
||||
|
||||
package sbt.internal.util
|
||||
|
||||
import java.io.InputStream
|
||||
import java.nio.charset.Charset
|
||||
import org.scalatest.FlatSpec
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
|
||||
class UTF8DecoderSpec extends FlatSpec {
|
||||
val decoder = Charset.forName("UTF-8").newDecoder
|
||||
"ascii characters" should "not be modified" in {
|
||||
val inputStream = new InputStream {
|
||||
override def read(): Int = 'c'.toInt
|
||||
}
|
||||
assert(JLine3.decodeInput(decoder, inputStream) == 'c'.toInt)
|
||||
}
|
||||
"swedish characters" should "be handled" in {
|
||||
val bytes = new LinkedBlockingQueue[Int]
|
||||
// these are the utf-8 codes for an umlauted a in swedish
|
||||
Seq(195, 164).foreach(b => bytes.put(b))
|
||||
val inputStream = new InputStream {
|
||||
override def read(): Int = Option(bytes.poll).getOrElse(-1)
|
||||
}
|
||||
assert(JLine3.decodeInput(decoder, inputStream) == 228)
|
||||
}
|
||||
"emoji" should "be handled" in {
|
||||
val bytes = new LinkedBlockingQueue[Int]
|
||||
// laughing and crying emoji in utf8
|
||||
Seq(0xF0, 0x9F, 0x98, 0x82).foreach(b => bytes.put(b))
|
||||
val inputStream = new InputStream {
|
||||
override def read(): Int = Option(bytes.poll).getOrElse(-1)
|
||||
}
|
||||
assert(JLine3.decodeInput(decoder, inputStream) == 128514)
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue