From e00240a73b617e159e23cc31b7a79d1959f8e73c Mon Sep 17 00:00:00 2001 From: Ethan Atkins Date: Tue, 17 Nov 2020 16:11:22 -0800 Subject: [PATCH] Decode utf-8 bytes in jline 3 reader With sbt 1.4.x, non-ascii utf-8 characters are not handled correctly in the console. It was not clear from the jline 3 documentation but the NonBlockingReader.read method is supposed to return unicode points rather than utf8 bytes. To fix this, we can decode the input and return the code point rather than the directy byte from the input stream. --- .../main/scala/sbt/internal/util/JLine3.scala | 30 +++++++++++--- .../sbt/internal/util/UTF8DecoderSpec.scala | 41 +++++++++++++++++++ 2 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 internal/util-logging/src/test/scala/sbt/internal/util/UTF8DecoderSpec.scala diff --git a/internal/util-logging/src/main/scala/sbt/internal/util/JLine3.scala b/internal/util-logging/src/main/scala/sbt/internal/util/JLine3.scala index 48046ae66..3cb4342eb 100644 --- a/internal/util-logging/src/main/scala/sbt/internal/util/JLine3.scala +++ b/internal/util-logging/src/main/scala/sbt/internal/util/JLine3.scala @@ -8,7 +8,8 @@ package sbt.internal.util import java.io.{ InputStream, OutputStream, PrintWriter } -import java.nio.charset.Charset +import java.nio.ByteBuffer +import java.nio.charset.{ CharacterCodingException, Charset, CharsetDecoder } import java.util.{ Arrays, EnumSet } import java.util.concurrent.atomic.{ AtomicBoolean, AtomicReference } import org.jline.utils.InfoCmp.Capability @@ -77,6 +78,27 @@ private[sbt] object JLine3 { new DumbTerminal(term.inputStream, term.outputStream) else wrapTerminal(term) } + private[util] def decodeInput(decoder: CharsetDecoder, inputStream: InputStream): Int = { + val bytes = new Array[Byte](4) + var i = 0 + var res = -2 + do { + inputStream.read() match { + case -1 => res = -1 + case byte => + bytes(i) = byte.toByte + i += 1 + val bb = ByteBuffer.wrap(bytes, 0, i) + try { + val cb = decoder.decode(bb) + val it = cb.codePoints().iterator + if (it.hasNext) res = it.next + } catch { case _: CharacterCodingException => } + } + + } while (i < 4 && res == -2) + res + } private[this] def wrapTerminal(term: Terminal): JTerminal = { new AbstractTerminal( term.name, @@ -140,10 +162,8 @@ private[sbt] object JLine3 { val thread = new AtomicReference[Thread] private def fillBuffer(): Unit = thread.synchronized { thread.set(Thread.currentThread) - buffer.put( - try input.read() - catch { case _: InterruptedException => -3 } - ) + try buffer.put(decodeInput(encoding.newDecoder, term.inputStream)) + catch { case _: InterruptedException => buffer.put(-3) } } override def close(): Unit = thread.get match { case null => diff --git a/internal/util-logging/src/test/scala/sbt/internal/util/UTF8DecoderSpec.scala b/internal/util-logging/src/test/scala/sbt/internal/util/UTF8DecoderSpec.scala new file mode 100644 index 000000000..957a95b4e --- /dev/null +++ b/internal/util-logging/src/test/scala/sbt/internal/util/UTF8DecoderSpec.scala @@ -0,0 +1,41 @@ +/* + * sbt + * Copyright 2011 - 2018, Lightbend, Inc. + * Copyright 2008 - 2010, Mark Harrah + * Licensed under Apache License 2.0 (see LICENSE) + */ + +package sbt.internal.util + +import java.io.InputStream +import java.nio.charset.Charset +import org.scalatest.FlatSpec +import java.util.concurrent.LinkedBlockingQueue + +class UTF8DecoderSpec extends FlatSpec { + val decoder = Charset.forName("UTF-8").newDecoder + "ascii characters" should "not be modified" in { + val inputStream = new InputStream { + override def read(): Int = 'c'.toInt + } + assert(JLine3.decodeInput(decoder, inputStream) == 'c'.toInt) + } + "swedish characters" should "be handled" in { + val bytes = new LinkedBlockingQueue[Int] + // these are the utf-8 codes for an umlauted a in swedish + Seq(195, 164).foreach(b => bytes.put(b)) + val inputStream = new InputStream { + override def read(): Int = Option(bytes.poll).getOrElse(-1) + } + assert(JLine3.decodeInput(decoder, inputStream) == 228) + } + "emoji" should "be handled" in { + val bytes = new LinkedBlockingQueue[Int] + // laughing and crying emoji in utf8 + Seq(0xF0, 0x9F, 0x98, 0x82).foreach(b => bytes.put(b)) + val inputStream = new InputStream { + override def read(): Int = Option(bytes.poll).getOrElse(-1) + } + assert(JLine3.decodeInput(decoder, inputStream) == 128514) + } +}