use UTF-8 across all platforms

- all inputs are now decoded as UTF-8, regardless of the platform
- decoding failures are now tolerated via transliteration given errant
  characters are generally expected to appear in comments
This commit is contained in:
Zachary Snow 2021-08-30 10:39:49 -06:00
parent bceec39339
commit d335d2ff25
3 changed files with 10 additions and 1 deletions

View File

@ -22,6 +22,8 @@
* Support parameters which use a type-of as the data type
* Support typed valued parameters declared in parameter port lists without
explicitly providing a leading `parameter` or `localparam` marker
* Use UTF-8 on all platforms and tolerate transcoding failures, enabling reading
files encoding using Latin-1 with special characters in comments
## v0.0.8

View File

@ -19,9 +19,11 @@ import Control.Monad.State.Strict
import Data.Char (ord)
import Data.List (tails, isPrefixOf, findIndex, intercalate)
import Data.Maybe (isJust, fromJust)
import GHC.IO.Encoding.Failure (CodingFailureMode(TransliterateCodingFailure))
import GHC.IO.Encoding.UTF8 (mkUTF8)
import System.Directory (findFile)
import System.FilePath (dropFileName)
import System.IO (hGetContents, openFile, stdin, IOMode(ReadMode))
import System.IO (hGetContents, hSetEncoding, openFile, stdin, IOMode(ReadMode))
import qualified Data.Map.Strict as Map
import Language.SystemVerilog.Parser.Tokens (Position(..))
@ -107,6 +109,7 @@ loadFile path = do
if path == "-"
then return stdin
else openFile path ReadMode
hSetEncoding handle $ mkUTF8 TransliterateCodingFailure
contents <- hGetContents handle
return $ normalize contents

4
test/lex/latin1.sv Normal file
View File

@ -0,0 +1,4 @@
module top;
// Áå
initial $display("Hi!");
endmodule