From d578aee5d93fee72eee2824c81e8f9e36ea5ea18 Mon Sep 17 00:00:00 2001 From: Zachary Snow Date: Fri, 29 Mar 2019 05:19:11 -0400 Subject: [PATCH] conflate the preprocessor and lexer This should make it much easier to add support for ``, `", macros with arguments, etc., in the future. --- src/Language/SystemVerilog/Parser.hs | 5 +- src/Language/SystemVerilog/Parser/Lex.x | 277 ++++++++++++++++-- .../SystemVerilog/Parser/Preprocess.hs | 185 ------------ src/Language/SystemVerilog/Parser/Tokens.hs | 1 - sv2v.cabal | 1 - 5 files changed, 257 insertions(+), 212 deletions(-) delete mode 100644 src/Language/SystemVerilog/Parser/Preprocess.hs diff --git a/src/Language/SystemVerilog/Parser.hs b/src/Language/SystemVerilog/Parser.hs index 6a71309..45ddac5 100644 --- a/src/Language/SystemVerilog/Parser.hs +++ b/src/Language/SystemVerilog/Parser.hs @@ -6,12 +6,11 @@ module Language.SystemVerilog.Parser ) where import Language.SystemVerilog.AST +import Language.SystemVerilog.Parser.Lex import Language.SystemVerilog.Parser.Parse -import Language.SystemVerilog.Parser.Preprocess -- parses a file given a table of predefined macros and the file name parseFile :: [String] -> [(String, String)] -> FilePath -> IO AST parseFile includePaths env file = - loadFile file >>= - preprocess includePaths env >>= + lexFile includePaths env file >>= return . descriptions diff --git a/src/Language/SystemVerilog/Parser/Lex.x b/src/Language/SystemVerilog/Parser/Lex.x index 27b18b5..56ac8a3 100644 --- a/src/Language/SystemVerilog/Parser/Lex.x +++ b/src/Language/SystemVerilog/Parser/Lex.x @@ -1,5 +1,29 @@ { -module Language.SystemVerilog.Parser.Lex (alexScanTokens) where +{- sv2v + - Author: Zachary Snow + - Original Lexer Author: Tom Hawkins + - + - Combined source lexing and preprocessing + - + - These procedures are combined so that we can simultaneously process macros in + - a sane way (something analogous to character-by-character) and have our + - lexemes properly tagged with source file positions. + - + - The scariest piece of this module is the use of `unsafePerformIO`. We want to + - be able to search for and read files whenever we see an include directive. + - Trying to thread the IO Monad through alex's interface would be very + - convoluted. The operations performed are not effectful, and are type safe. + -} +{-# OPTIONS_GHC -fno-warn-unused-imports #-} +-- The above pragma gets rid of annoying warning caused by alex 3.2.4. This has +-- been fixed on their development branch, so this can be removed once they roll +-- a new release. (no new release as of 3/29/2018) +module Language.SystemVerilog.Parser.Lex (lexFile) where + +import System.FilePath (dropFileName) +import System.Directory (findFile) +import System.IO.Unsafe (unsafePerformIO) +import qualified Data.Map.Strict as Map import Language.SystemVerilog.Parser.Tokens } @@ -255,28 +279,49 @@ tokens :- "<<<=" { tok Sym_lt_lt_lt_eq } ">>>=" { tok Sym_gt_gt_gt_eq } + "`include" { includeFile } + @directive { handleDirective } + @commentLine { removeUntil "\n" } @commentBlock { removeUntil "*/" } - @directive { tok Spe_Directive } - @newline { tok Spe_Newline } - @whitespace ; + $white ; . { tok Unknown } { -type AlexUserState = [Token] +data Cond + = CurrentlyTrue + | PreviouslyTrue + | NeverTrue + deriving (Eq, Show) + +data AlexUserState = LS + { lsToks :: [Token] + , lsCurrFile :: FilePath + , lsEnv :: Map.Map String String + , lsCondStack :: [Cond] + , lsIncludePaths :: [FilePath] + } deriving (Eq, Show) alexInitUserState :: AlexUserState -alexInitUserState = [] +alexInitUserState = LS [] "" Map.empty [] [] -alexScanTokens :: String -> [Token] -alexScanTokens str = - let result = runAlex str $ alexMonadScan >> get - in case result of - Left msg -> error $ "Lex Error: " ++ msg - Right tokens -> tokens +lexFile :: [String] -> [(String, String)] -> FilePath -> IO [Token] +lexFile includePaths env path = do + str <- readFile path + let result = runAlex str $ setEnv >> alexMonadScan >> get + return $ case result of + Left msg -> error $ "Lexical Error: " ++ msg + Right tokens -> lsToks tokens + where + initialEnv = Map.fromList env + setEnv = modify $ \s -> s + { lsEnv = initialEnv + , lsIncludePaths = includePaths + , lsCurrFile = path + } get :: Alex AlexUserState get = Alex $ \s -> Right (s, alex_ust s) @@ -289,11 +334,195 @@ modify f = Alex func where func s = Right (s { alex_ust = new }, ()) where new = f (alex_ust s) +getCurrentFile :: Alex String +getCurrentFile = gets lsCurrFile + +setCurrentFile :: String -> Alex () +setCurrentFile x = modify $ \s -> s { lsCurrFile = x } + alexEOF :: Alex () alexEOF = return () type Action = AlexInput -> Int -> Alex () +breakAfter :: (a -> Bool) -> [a] -> ([a], [a]) +breakAfter f l = (a ++ [b], bs) + where (a, b : bs) = break f l + +includeSearch :: FilePath -> Alex FilePath +includeSearch file = do + base <- getCurrentFile + includePaths <- gets lsIncludePaths + let directories = dropFileName base : includePaths + let result = unsafePerformIO $ findFile directories file + case result of + Just path -> return path + Nothing -> + alexError + $ "Could not find file " ++ file ++ " included from " ++ base + +loadFile :: String -> Alex String +loadFile s = return $ unsafePerformIO $ readFile s + +includeFile :: Action +includeFile (AlexPn f l c, _, _, str) len = do + let (dropped , rest1) = breakAfter (== '"') (drop len str) + let (filename, rest2) = break (== '"') rest1 + let rest3 = if null rest2 then [] else tail rest2 + let offset = len + length dropped + length filename + 1 + let inputFollow = (AlexPn (f + offset) l (c + offset), ' ', [], rest3) + fileFollow <- getCurrentFile + -- process the the included file + path <- includeSearch filename + content <- loadFile path + let inputIncluded = (AlexPn 0 0 0, ' ', [], content) + setCurrentFile path + alexSetInput inputIncluded + alexMonadScan + -- resume processing the original file + setCurrentFile fileFollow + alexSetInput inputFollow + alexMonadScan + +unskippableDirectives :: [String] +unskippableDirectives = ["else", "elsif", "endif", "ifdef", "ifndef"] + +isIdentChar :: Char -> Bool +isIdentChar ch = + ('a' <= ch && ch <= 'z') || + ('A' <= ch && ch <= 'Z') || + ('0' <= ch && ch <= '9') || + (ch == '_') || (ch == '$') + +takeString :: Alex String +takeString = do + (AlexPn f l c, _, _, str) <- alexGetInput + let (x, rest) = span isIdentChar str + let len = length x + alexSetInput (AlexPn (f+len) l (c+len), ' ', [], rest) + return x + +getCurrentPos :: Alex Position +getCurrentPos = do + (AlexPn _ l c, _, _, _) <- alexGetInput + file <- getCurrentFile + return $ Position file l c + +dropSpace :: Alex () +dropSpace = do + (AlexPn f l c, _, _, str) <- alexGetInput + case str of + [] -> return () + ' ' : rest -> alexSetInput (AlexPn (f+1) l (c+1), ' ', [], rest) + ch : _ -> do + pos <- getCurrentPos + alexError $ "dropSpace encountered bad char: " ++ show ch ++ + " at " ++ show pos + +-- read tokens after the name until the first (un-escaped) newline +takeUntilNewline :: Alex String +takeUntilNewline = do + (AlexPn f l c, _, _, str) <- alexGetInput + case str of + [] -> return "" + '\n' : _ -> do + return "" + '\\' : '\n' : rest -> do + alexSetInput (AlexPn (f+2) (l+1) 0, ' ', [], rest) + takeUntilNewline >>= return . (' ' :) + ch : rest -> do + alexSetInput (AlexPn (f+1) l (c+1), ' ', [], rest) + takeUntilNewline >>= return . (ch :) + +handleDirective :: Action +handleDirective (AlexPn fOrig lOrig cOrig, _, _, strOrig) len = do + let directive = tail $ take len strOrig + let newPos = AlexPn (fOrig + len) lOrig (cOrig + len) + alexSetInput (newPos, ' ', [], drop len strOrig) + + env <- gets lsEnv + tempInput <- alexGetInput + let dropUntilNewline = removeUntil "\n" tempInput 0 + + condStack <- gets lsCondStack + if not (null condStack) + && head condStack /= CurrentlyTrue + && not (elem directive unskippableDirectives) + then alexMonadScan + else case directive of + + "default_nettype" -> dropUntilNewline + "timescale" -> dropUntilNewline + + "ifdef" -> do + dropSpace + name <- takeString + let newCond = if Map.member name env + then CurrentlyTrue + else NeverTrue + modify $ \s -> s { lsCondStack = newCond : condStack } + alexMonadScan + "ifndef" -> do + dropSpace + name <- takeString + let newCond = if Map.notMember name env + then CurrentlyTrue + else NeverTrue + modify $ \s -> s { lsCondStack = newCond : condStack } + alexMonadScan + "else" -> do + let newCond = if head condStack == NeverTrue + then CurrentlyTrue + else NeverTrue + modify $ \s -> s { lsCondStack = newCond : tail condStack } + alexMonadScan + "elsif" -> do + dropSpace + name <- takeString + let currCond = head condStack + let newCond = + if currCond /= NeverTrue then + PreviouslyTrue + else if Map.member name env then + CurrentlyTrue + else + NeverTrue + modify $ \s -> s { lsCondStack = newCond : tail condStack } + alexMonadScan + "endif" -> do + modify $ \s -> s { lsCondStack = tail condStack } + alexMonadScan + + "define" -> do + -- TODO: We don't yet support macros with arguments! + dropSpace + name <- takeString + defn <- takeUntilNewline + modify $ \s -> s { lsEnv = Map.insert name defn env } + alexMonadScan + "undef" -> do + dropSpace + name <- takeString + modify $ \s -> s { lsEnv = Map.delete name env } + alexMonadScan + "undefineall" -> do + modify $ \s -> s { lsEnv = Map.empty } + alexMonadScan + + _ -> do + case Map.lookup directive env of + Nothing -> do + pos <- getCurrentPos >>= return . show + alexError $ "Undefined macro: " ++ directive ++ " at " ++ pos + Just replacement -> do + let size = length replacement + -- TODO: How should we track the file position when we + -- substitute in a macro? + (AlexPn f l c, ' ', [], str) <- alexGetInput + let pos = AlexPn (f - size) l (c - size) + alexSetInput (pos, ' ', [], replacement ++ str) + alexMonadScan + -- remove characters from the input until the pattern is reached removeUntil :: String -> Action @@ -306,11 +535,11 @@ removeUntil pattern _ _ = loop let found = (null str && wantNewline) || pattern == take patternLen str let nextPos = if head str == '\n' - then AlexPn f (l+1) 0 - else AlexPn f l (c+1) + then AlexPn (f+1) (l+1) 0 + else AlexPn (f+1) l (c+1) let afterPos = if wantNewline - then AlexPn f (l+1) 0 - else AlexPn f l (c + patternLen) + then AlexPn (f+1) (l+1) 0 + else AlexPn (f+1) l (c + patternLen) let (newPos, newStr) = if found then (afterPos, drop patternLen str) else (nextPos, drop 1 str) @@ -320,10 +549,14 @@ removeUntil pattern _ _ = loop else loop tok :: TokenName -> Action -tok tokId ((AlexPn _ l c), _, _, input) len = - modify (++ [t]) >> alexMonadScan - where - tokStr = take len input - tokPos = Position "" l c - t = Token tokId tokStr tokPos +tok tokId ((AlexPn _ l c), _, _, input) len = do + currFile <- gets lsCurrFile + let tokStr = take len input + let tokPos = Position currFile l c + condStack <- gets lsCondStack + () <- if not (null condStack) && head condStack /= CurrentlyTrue + then modify id + else modify (push $ Token tokId tokStr tokPos) + alexMonadScan + where push t s = s { lsToks = (lsToks s) ++ [t] } } diff --git a/src/Language/SystemVerilog/Parser/Preprocess.hs b/src/Language/SystemVerilog/Parser/Preprocess.hs deleted file mode 100644 index 9c99f71..0000000 --- a/src/Language/SystemVerilog/Parser/Preprocess.hs +++ /dev/null @@ -1,185 +0,0 @@ -{- sv2v - - Author: Zachary Snow - - - - Source file loading and preprocessing - -} -module Language.SystemVerilog.Parser.Preprocess -( loadFile -, preprocess -, PP (..) -) where - -import Control.Monad.State -import Data.List (intercalate) -import qualified Data.Map.Strict as Map -import System.FilePath (dropFileName) -import System.Directory (findFile) - -import Language.SystemVerilog.Parser.Lex -import Language.SystemVerilog.Parser.Tokens - - -isNewline :: Token -> Bool -isNewline (Token t _ _) = t == Spe_Newline - -unskippableDirectives :: [String] -unskippableDirectives = ["else", "elsif", "endif", "ifdef", "ifndef"] - --- a bit of a hack to allow things like: `WIDTH'b0 -combineNumbers :: [Token] -> [Token] -combineNumbers (Token Lit_number size pos : Token Lit_number ('\'' : num) _ : tokens) = - Token Lit_number (size ++ "'" ++ num) pos : combineNumbers tokens -combineNumbers (token : tokens) = token : combineNumbers tokens -combineNumbers [] = [] - -includeSearch :: FilePath -> FilePath -> (StateT PP IO) FilePath -includeSearch base file = do - includePaths <- gets ppIncludePaths - let directories = dropFileName base : includePaths - result <- lift $ findFile directories file - case result of - Just path -> return path - Nothing -> - error $ "Could not find file " ++ file ++ " included from " ++ base - -data Cond - = CurrentlyTrue - | PreviouslyTrue - | NeverTrue - deriving (Eq, Show) - -data PP = PP - { ppEnv :: Map.Map String [Token] - , ppCondStack :: [Cond] - , ppIncludePaths :: [FilePath] - } deriving (Eq, Show) - -pp :: [Token] -> (StateT PP IO) [Token] - -pp [] = do - condStack <- gets ppCondStack - if null condStack - then return [] - else error $ "have unfinished " ++ (show $ length condStack) - ++ " conditional directive(s)" - -pp (Token Spe_Directive str pos : tokens) = do - let directive = tail str - condStack <- gets ppCondStack - env <- gets ppEnv - if not (null condStack) - && head condStack /= CurrentlyTrue - && not (elem directive unskippableDirectives) - then pp tokens - else case directive of - - "default_nettype" -> do - let str' = str ++ " " ++ (tokenString $ head tokens) - let token' = Token Spe_Directive str' pos - tokens' <- pp $ tail tokens - return $ token' : tokens' - - "timescale" -> do - -- timescale must appear alone on a line - -- read tokens until the first (un-escaped) newline - let (defn, rest) = break isNewline $ tokens - let str' = str ++ " " ++ (intercalate " " $ map tokenString defn) - let token' = Token Spe_Directive str' pos - tokens' <- pp rest - return $ token' : tokens' - - "include" -> do - let file = init $ tail $ tokenString $ head tokens - let Position basePath _ _ = pos - filePath <- includeSearch basePath file - includedTokens <- lift $ loadFile filePath - pp $ includedTokens ++ tail tokens - - "ifdef" -> do - let name = tokenString $ head tokens - newCond <- return $ - if Map.member name env then CurrentlyTrue else NeverTrue - modify $ \s -> s { ppCondStack = newCond : condStack } - pp $ tail tokens - "ifndef" -> do - let name = tokenString $ head tokens - newCond <- return $ - if Map.notMember name env then CurrentlyTrue else NeverTrue - modify $ \s -> s { ppCondStack = newCond : condStack } - pp $ tail tokens - "else" -> do - newCond <- return $ - if head condStack == NeverTrue then CurrentlyTrue else NeverTrue - modify $ \s -> s { ppCondStack = newCond : tail condStack } - pp tokens - "elsif" -> do - let name = tokenString $ head tokens - let currCond = head condStack - newCond <- return $ - if currCond /= NeverTrue then - PreviouslyTrue - else if Map.member name env then - CurrentlyTrue - else - NeverTrue - modify $ \s -> s { ppCondStack = newCond : tail condStack } - pp $ tail tokens - "endif" -> do - modify $ \s -> s { ppCondStack = tail condStack } - pp tokens - - "define" -> do - -- read tokens after the name until the first (un-escaped) newline - let (defn, rest) = break isNewline $ tail tokens - -- macro definitions can contain macros, but no conditionals, so we - -- temporarily drop the condition stack while we preprocess it - modify' $ \s -> s { ppCondStack = [] } - defn' <- pp defn - modify' $ \s -> s { ppCondStack = condStack } - let env' = Map.insert (tokenString $ head tokens) defn' env - modify $ \s -> s { ppEnv = env' } - pp rest -- drop the macro, process the rest of the tokens - "undef" -> do - let name = tokenString $ head tokens - modify $ \s -> s { ppEnv = Map.delete name env } - pp $ tail tokens - "undefineall" -> do - modify $ \s -> s { ppEnv = Map.empty } - pp tokens - - _ -> do - case Map.lookup directive env of - Nothing -> do - error $ "Undefined macro: " ++ directive ++ " at " ++ (show pos) - Just replacement -> do - -- TODO: How should we track the position of tokens that are - -- substituted in? Using only one position or the other - -- doesn't tell the full story. - tokens' <- pp tokens - return $ replacement ++ tokens' - -pp (Token Spe_Newline _ _ : tokens) = pp tokens - -pp (token : tokens) = do - condStack <- gets ppCondStack - tokens' <- pp tokens - if not (null condStack) && head condStack /= CurrentlyTrue - then return tokens' - else return $ token : tokens' - --- loads and lexes the file at the given path -loadFile :: FilePath -> IO [Token] -loadFile file = do - content <- readFile file - let tokens = alexScanTokens content - return $ map relocate tokens - where - relocate :: Token -> Token - relocate (Token t s (Position _ l c)) = Token t s $ Position file l c - -preprocess :: [String] -> [(String, String)] -> [Token] -> IO [Token] -preprocess includePaths env tokens = do - let initialEnv = Map.map alexScanTokens $ Map.fromList env - let initialState = PP initialEnv [] includePaths - res <- evalStateT (pp tokens) initialState - return $ combineNumbers res diff --git a/src/Language/SystemVerilog/Parser/Tokens.hs b/src/Language/SystemVerilog/Parser/Tokens.hs index 6c22b84..51da1b0 100644 --- a/src/Language/SystemVerilog/Parser/Tokens.hs +++ b/src/Language/SystemVerilog/Parser/Tokens.hs @@ -335,6 +335,5 @@ data TokenName | Sym_lt_lt_lt_eq | Sym_gt_gt_gt_eq | Spe_Directive - | Spe_Newline | Unknown deriving (Show, Eq) diff --git a/sv2v.cabal b/sv2v.cabal index dc67e49..fba0376 100644 --- a/sv2v.cabal +++ b/sv2v.cabal @@ -46,7 +46,6 @@ executable sv2v Language.SystemVerilog.Parser.Lex Language.SystemVerilog.Parser.Parse Language.SystemVerilog.Parser.ParseDecl - Language.SystemVerilog.Parser.Preprocess Language.SystemVerilog.Parser.Tokens -- Conversion modules Convert