From d335d2ff25a5ffd4f49dce5cbedcae00e3115b51 Mon Sep 17 00:00:00 2001 From: Zachary Snow Date: Mon, 30 Aug 2021 10:39:49 -0600 Subject: [PATCH] use UTF-8 across all platforms - all inputs are now decoded as UTF-8, regardless of the platform - decoding failures are now tolerated via transliteration given errant characters are generally expected to appear in comments --- CHANGELOG.md | 2 ++ src/Language/SystemVerilog/Parser/Preprocess.hs | 5 ++++- test/lex/latin1.sv | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 test/lex/latin1.sv diff --git a/CHANGELOG.md b/CHANGELOG.md index e1a7f53..57d6675 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,8 @@ * Support parameters which use a type-of as the data type * Support typed valued parameters declared in parameter port lists without explicitly providing a leading `parameter` or `localparam` marker +* Use UTF-8 on all platforms and tolerate transcoding failures, enabling reading + files encoding using Latin-1 with special characters in comments ## v0.0.8 diff --git a/src/Language/SystemVerilog/Parser/Preprocess.hs b/src/Language/SystemVerilog/Parser/Preprocess.hs index a75dc7c..8130cb0 100644 --- a/src/Language/SystemVerilog/Parser/Preprocess.hs +++ b/src/Language/SystemVerilog/Parser/Preprocess.hs @@ -19,9 +19,11 @@ import Control.Monad.State.Strict import Data.Char (ord) import Data.List (tails, isPrefixOf, findIndex, intercalate) import Data.Maybe (isJust, fromJust) +import GHC.IO.Encoding.Failure (CodingFailureMode(TransliterateCodingFailure)) +import GHC.IO.Encoding.UTF8 (mkUTF8) import System.Directory (findFile) import System.FilePath (dropFileName) -import System.IO (hGetContents, openFile, stdin, IOMode(ReadMode)) +import System.IO (hGetContents, hSetEncoding, openFile, stdin, IOMode(ReadMode)) import qualified Data.Map.Strict as Map import Language.SystemVerilog.Parser.Tokens (Position(..)) @@ -107,6 +109,7 @@ loadFile path = do if path == "-" then return stdin else openFile path ReadMode + hSetEncoding handle $ mkUTF8 TransliterateCodingFailure contents <- hGetContents handle return $ normalize contents diff --git a/test/lex/latin1.sv b/test/lex/latin1.sv new file mode 100644 index 0000000..72b4b66 --- /dev/null +++ b/test/lex/latin1.sv @@ -0,0 +1,4 @@ +module top; +// Αε +initial $display("Hi!"); +endmodule