From acfbdb07f81ed768ddc944ccb3c3bc5f11b276bc Mon Sep 17 00:00:00 2001 From: Zachary Snow Date: Mon, 18 Mar 2019 05:00:23 -0400 Subject: [PATCH] completely rewrote preprocessor; more extensive directive support (include, timescale) --- src/Convert/Traverse.hs | 1 + src/Language/SystemVerilog/AST.hs | 2 + src/Language/SystemVerilog/Parser.hs | 25 +- src/Language/SystemVerilog/Parser/Lex.x | 23 +- src/Language/SystemVerilog/Parser/Parse.y | 6 + .../SystemVerilog/Parser/Preprocess.hs | 228 ++++++++++++------ src/Language/SystemVerilog/Parser/Tokens.hs | 5 + src/sv2v.hs | 3 +- sv2v.cabal | 1 + 9 files changed, 212 insertions(+), 82 deletions(-) diff --git a/src/Convert/Traverse.hs b/src/Convert/Traverse.hs index 89b0acf..70e39ee 100644 --- a/src/Convert/Traverse.hs +++ b/src/Convert/Traverse.hs @@ -95,6 +95,7 @@ traverseModuleItemsM mapper (PackageItem packageItem) = do return $ case item' of MIPackageItem packageItem' -> PackageItem packageItem' other -> error $ "encountered bad package module item: " ++ show other +traverseModuleItemsM _ (Directive str) = return $ Directive str traverseModuleItems :: Mapper ModuleItem -> Mapper Description traverseModuleItems = unmonad traverseModuleItemsM diff --git a/src/Language/SystemVerilog/AST.hs b/src/Language/SystemVerilog/AST.hs index 4392424..42afcf5 100644 --- a/src/Language/SystemVerilog/AST.hs +++ b/src/Language/SystemVerilog/AST.hs @@ -72,6 +72,7 @@ instance Show PackageItem where data Description = Part PartKW Identifier [Identifier] [ModuleItem] | PackageItem PackageItem + | Directive String deriving Eq instance Show Description where @@ -86,6 +87,7 @@ instance Show Description where then "" else indentedParenList ports show (PackageItem i) = show i + show (Directive str) = str data PartKW = Module diff --git a/src/Language/SystemVerilog/Parser.hs b/src/Language/SystemVerilog/Parser.hs index b6dc7f5..a3bdee9 100644 --- a/src/Language/SystemVerilog/Parser.hs +++ b/src/Language/SystemVerilog/Parser.hs @@ -1,19 +1,22 @@ +{- sv2v + - Author: Zachary Snow + -} module Language.SystemVerilog.Parser - ( parseFile - , preprocess - ) where +( parseFile +) where import Language.SystemVerilog.AST import Language.SystemVerilog.Parser.Lex import Language.SystemVerilog.Parser.Parse import Language.SystemVerilog.Parser.Preprocess -import Language.SystemVerilog.Parser.Tokens --- | Parses a file given a table of predefined macros, the file name, and the file contents. -parseFile :: [(String, String)] -> FilePath -> String -> AST -parseFile env file content = descriptions tokens - where - tokens = map relocate $ alexScanTokens $ preprocess env file content - relocate :: Token -> Token - relocate (Token t s (Position _ l c)) = Token t s $ Position file l c +import Control.Monad.State +import qualified Data.Map.Strict as Map +-- parses a file given a table of predefined macros and the file name +parseFile :: [(String, String)] -> FilePath -> IO AST +parseFile env file = do + let initialEnv = Map.map alexScanTokens $ Map.fromList env + let initialState = PP initialEnv [] + ast <- evalStateT (loadFile file) initialState + return $ descriptions ast diff --git a/src/Language/SystemVerilog/Parser/Lex.x b/src/Language/SystemVerilog/Parser/Lex.x index 8ea7775..01c4837 100644 --- a/src/Language/SystemVerilog/Parser/Lex.x +++ b/src/Language/SystemVerilog/Parser/Lex.x @@ -56,6 +56,21 @@ $decimalDigit = [0-9] @simpleIdentifier = [a-zA-Z_] [a-zA-Z0-9_\$]* @systemIdentifier = "$" [a-zA-Z0-9_\$]+ +-- Comments + +@commentBegin = "/*" +@commentEnd = "*/" | "**/" +@comment = "//" [^\n]* | "/**/" + +-- Directives + +@directive = "`" @simpleIdentifier + +-- Whitespace + +@newline = \n +@escapedNewline = \\\n +@whitespace = ($white # \n) | @escapedNewline tokens :- @@ -206,7 +221,13 @@ tokens :- "<<<=" { tok Sym_lt_lt_lt_eq } ">>>=" { tok Sym_gt_gt_gt_eq } - $white ; + @comment { tok Spe_Comment } + @commentBegin { tok Spe_CommentBegin } + @commentEnd { tok Spe_CommentEnd } + @directive { tok Spe_Directive } + @newline { tok Spe_Newline } + + @whitespace ; . { tok Unknown } diff --git a/src/Language/SystemVerilog/Parser/Parse.y b/src/Language/SystemVerilog/Parser/Parse.y index af2bcd7..c0c52af 100644 --- a/src/Language/SystemVerilog/Parser/Parse.y +++ b/src/Language/SystemVerilog/Parser/Parse.y @@ -160,6 +160,8 @@ string { Token Lit_string _ _ } "<<<=" { Token Sym_lt_lt_lt_eq _ _ } ">>>=" { Token Sym_gt_gt_gt_eq _ _ } +directive { Token Spe_Directive _ _ } + -- operator precedences, from *lowest* to *highest* %nonassoc NoElse %nonassoc "else" @@ -193,6 +195,10 @@ Descriptions :: { [Description] } Description :: { Description } : Part { $1 } | PackageItem { PackageItem $1 } + | Directive { Directive $1 } + +Directive :: { String } + : directive { tokenString $1 } Type :: { Type } : PartialType Dimensions { $1 $2 } diff --git a/src/Language/SystemVerilog/Parser/Preprocess.hs b/src/Language/SystemVerilog/Parser/Preprocess.hs index 29c1b5c..86cb801 100644 --- a/src/Language/SystemVerilog/Parser/Preprocess.hs +++ b/src/Language/SystemVerilog/Parser/Preprocess.hs @@ -1,77 +1,169 @@ +{- sv2v + - Author: Zachary Snow + - + - Source file loading and preprocessing + -} module Language.SystemVerilog.Parser.Preprocess - ( uncomment - , preprocess - ) where +( loadFile +, PP (..) +) where --- | Remove comments from code. -uncomment :: FilePath -> String -> String -uncomment file str = uncomment' str - where - uncomment' a = case a of - "" -> "" - '/' : '/' : rest -> " " ++ removeEOL rest - '/' : '*' : rest -> " " ++ remove rest - '"' : rest -> '"' : ignoreString rest - ch : rest -> ch : uncomment' rest +import Control.Monad.State +import Data.List (intercalate) +import qualified Data.Map.Strict as Map +import System.FilePath (replaceFileName) - removeEOL a = case a of - "" -> "" - '\n' : rest -> '\n' : uncomment' rest - '\t' : rest -> '\t' : removeEOL rest - _ : rest -> ' ' : removeEOL rest +import Language.SystemVerilog.Parser.Lex +import Language.SystemVerilog.Parser.Tokens - remove a = case a of - "" -> error $ "File ended without closing comment (*/): " ++ file - '"' : rest -> removeString rest - '\n' : rest -> '\n' : remove rest - '\t' : rest -> '\t' : remove rest - '*' : '/' : rest -> " " ++ uncomment' rest - _ : rest -> " " ++ remove rest - removeString a = case a of - "" -> error $ "File ended without closing string: " ++ file - '"' : rest -> " " ++ remove rest - '\\' : '"' : rest -> " " ++ removeString rest - '\n' : rest -> '\n' : removeString rest - '\t' : rest -> '\t' : removeString rest - _ : rest -> ' ' : removeString rest +isNewline :: Token -> Bool +isNewline (Token t _ _) = t == Spe_Newline - ignoreString a = case a of - "" -> error $ "File ended without closing string: " ++ file - '"' : rest -> '"' : uncomment' rest - '\\' : '"' : rest -> "\\\"" ++ ignoreString rest - ch : rest -> ch : ignoreString rest +unskippableDirectives :: [String] +unskippableDirectives = ["else", "elsif", "endif"] --- | A simple `define preprocessor. -preprocess :: [(String, String)] -> FilePath -> String -> String -preprocess _env file content = unlines $ pp True [] _env $ lines $ uncomment file content - where - pp :: Bool -> [Bool] -> [(String, String)] -> [String] -> [String] - pp _ _ _ [] = [] - pp on stack env (a : rest) = - -- handle macros with escaped newlines - if a /= "" && last a == '\\' && head a == '`' - then "" : (pp on stack env $ ((init a) ++ " " ++ (head rest)) : (tail rest)) - else case words a of - "`define" : name : value -> "" : pp on stack (if on then (name, ppLine env $ unwords value) : env else env) rest - "`ifdef" : name : _ -> "" : pp (on && (elem name $ fst $ unzip env)) (on : stack) env rest - "`ifndef" : name : _ -> "" : pp (on && (notElem name $ fst $ unzip env)) (on : stack) env rest - "`else" : _ - | not $ null stack -> "" : pp (head stack && not on) stack env rest - | otherwise -> error $ "`else without associated `ifdef/`ifndef: " ++ file - "`endif" : _ - | not $ null stack -> "" : pp (head stack) (tail stack) env rest - | otherwise -> error $ "`endif without associated `ifdef/`ifndef: " ++ file - "`default_nettype" : _ -> "" : pp on stack env rest - _ -> (if on then ppLine env a else "") : pp on stack env rest +preprocess :: [Token] -> (StateT PP IO) [Token] +preprocess tokens = pp tokens -ppLine :: [(String, String)] -> String -> String -ppLine _ "" = "" -ppLine env ('`' : a) = case lookup name env of - Just value -> value ++ ppLine env rest - Nothing -> error $ "Undefined macro: `" ++ name ++ " Env: " ++ show env - where - name = takeWhile (flip elem $ ['A' .. 'Z'] ++ ['a' .. 'z'] ++ ['0' .. '9'] ++ ['_']) a - rest = drop (length name) a -ppLine env (a : b) = a : ppLine env b +data Cond + = CurrentlyTrue + | PreviouslyTrue + | NeverTrue + deriving (Eq, Show) +data PP = PP + { ppEnv :: Map.Map String [Token] + , ppCondStack :: [Cond] + } deriving (Eq, Show) + +pp :: [Token] -> (StateT PP IO) [Token] + +pp [] = do + condStack <- gets ppCondStack + if null condStack + then return [] + else error $ "have unfinished " ++ (show $ length condStack) + ++ " conditional directive(s)" + +pp (Token Spe_Directive str pos : tokens) = do + let directive = tail str + condStack <- gets ppCondStack + env <- gets ppEnv + if not (null condStack) + && head condStack /= CurrentlyTrue + && not (elem directive unskippableDirectives) + then pp tokens + else case directive of + + "default_nettype" -> do + let str' = str ++ " " ++ (tokenString $ head tokens) + let token' = Token Spe_Directive str' pos + tokens' <- pp $ tail tokens + return $ token' : tokens' + + "timescale" -> do + -- timescale must appear alone on a line + -- read tokens until the first (un-escaped) newline + let (defn, rest) = break isNewline $ tokens + let str' = str ++ " " ++ (intercalate " " $ map tokenString defn) + let token' = Token Spe_Directive str' pos + tokens' <- pp rest + return $ token' : tokens' + + "include" -> do + let file = init $ tail $ tokenString $ head tokens + let Position basePath _ _ = pos + let filePath = replaceFileName basePath file + includedTokens <- loadFile filePath + remainingTokens <- pp $ tail tokens + return $ includedTokens ++ remainingTokens + + "ifdef" -> do + let name = tokenString $ head tokens + newCond <- return $ + if Map.member name env then CurrentlyTrue else NeverTrue + modify $ \s -> s { ppCondStack = newCond : condStack } + pp $ tail tokens + "ifndef" -> do + let name = tokenString $ head tokens + newCond <- return $ + if Map.notMember name env then CurrentlyTrue else NeverTrue + modify $ \s -> s { ppCondStack = newCond : condStack } + pp $ tail tokens + "else" -> do + newCond <- return $ + if head condStack == NeverTrue then CurrentlyTrue else NeverTrue + modify $ \s -> s { ppCondStack = newCond : tail condStack } + pp tokens + "elsif" -> do + let name = tokenString $ head tokens + let currCond = head condStack + newCond <- return $ + if currCond /= NeverTrue then + PreviouslyTrue + else if Map.member name env then + CurrentlyTrue + else + NeverTrue + modify $ \s -> s { ppCondStack = newCond : tail condStack } + pp $ tail tokens + "endif" -> do + modify $ \s -> s { ppCondStack = tail condStack } + pp tokens + + "define" -> do + -- read tokens after the name until the first (un-escaped) newline + let (defn, rest) = break isNewline $ tail tokens + -- macro definitions can contain macros, but no conditionals, so we + -- temporarily drop the condition stack while we preprocess it + modify' $ \s -> s { ppCondStack = [] } + defn' <- pp defn + modify' $ \s -> s { ppCondStack = condStack } + let env' = Map.insert (tokenString $ head tokens) defn' env + modify $ \s -> s { ppEnv = env' } + pp rest -- drop the macro, process the rest of the tokens + "undef" -> do + let name = tokenString $ head tokens + modify $ \s -> s { ppEnv = Map.delete name env } + pp $ tail tokens + "undefineall" -> do + modify $ \s -> s { ppEnv = Map.empty } + pp tokens + + _ -> do + case Map.lookup directive env of + Nothing -> do + error $ "Undefined macro: " ++ directive ++ " at " ++ (show pos) + Just replacement -> do + -- TODO: How should we track the position of tokens that are + -- substituted in? Using only one position or the other + -- doesn't tell the full story. + tokens' <- pp tokens + return $ replacement ++ tokens' + +pp (Token Spe_Newline _ _ : tokens) = pp tokens +pp (Token Spe_Comment _ _ : tokens) = pp tokens + +pp (Token Spe_CommentBegin _ _ : tokens) = + pp $ tail $ dropWhile (not . isEnd) tokens + where isEnd (Token t _ _ ) = t == Spe_CommentEnd + +pp (token : tokens) = do + condStack <- gets ppCondStack + tokens' <- pp tokens + if not (null condStack) && head condStack /= CurrentlyTrue + then return tokens' + else return $ token : tokens' + +-- loads, lexes, and preprocesses the file at the given path +loadFile :: FilePath -> (StateT PP IO) [Token] +loadFile file = do + content <- lift $ readFile file + preprocess $ + map relocate $ + alexScanTokens $ + content + where + relocate :: Token -> Token + relocate (Token t s (Position _ l c)) = Token t s $ Position file l c diff --git a/src/Language/SystemVerilog/Parser/Tokens.hs b/src/Language/SystemVerilog/Parser/Tokens.hs index 478418e..a3905af 100644 --- a/src/Language/SystemVerilog/Parser/Tokens.hs +++ b/src/Language/SystemVerilog/Parser/Tokens.hs @@ -333,5 +333,10 @@ data TokenName | Sym_amp_amp_amp | Sym_lt_lt_lt_eq | Sym_gt_gt_gt_eq + | Spe_Comment + | Spe_CommentBegin + | Spe_CommentEnd + | Spe_Directive + | Spe_Newline | Unknown deriving (Show, Eq) diff --git a/src/sv2v.hs b/src/sv2v.hs index 9959282..254eaf4 100644 --- a/src/sv2v.hs +++ b/src/sv2v.hs @@ -16,8 +16,7 @@ main = do job <- readJob -- parse the input file let filePath = file job - content <- readFile filePath - let ast = parseFile [] filePath content + ast <- parseFile [] filePath -- convert the file let ast' = convert (exclude job) ast -- print the converted file out diff --git a/sv2v.cabal b/sv2v.cabal index f1d000a..ce4c8f9 100644 --- a/sv2v.cabal +++ b/sv2v.cabal @@ -27,6 +27,7 @@ executable sv2v base, cmdargs, containers, + filepath, mtl other-modules: -- SystemVerilog modules