diff --git a/README.md b/README.md index 410af4c..4ba8431 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ will be given to issues which include examples or test cases. ## SystemVerilog Front End -This project contains a preprocessor and lexer, a parser, and an abstract syntax +This project contains a preprocessor, lexer, and parser, and an abstract syntax tree representation for a subset of the SystemVerilog specification. The parser is not very strict. The AST allows for the representation of syntactically (and semantically) invalid Verilog. The goal is to be more general in the diff --git a/src/Language/SystemVerilog/Parser.hs b/src/Language/SystemVerilog/Parser.hs index 65006d7..266ad30 100644 --- a/src/Language/SystemVerilog/Parser.hs +++ b/src/Language/SystemVerilog/Parser.hs @@ -9,8 +9,9 @@ import Control.Monad.Except import Control.Monad.State import qualified Data.Map.Strict as Map import Language.SystemVerilog.AST (AST) -import Language.SystemVerilog.Parser.Lex (lexFile, Env) +import Language.SystemVerilog.Parser.Lex (lexStr) import Language.SystemVerilog.Parser.Parse (parse) +import Language.SystemVerilog.Parser.Preprocess (preprocess, Env) import Language.SystemVerilog.Parser.Tokens (Position(..), tokenPosition) -- parses a compilation unit given include search paths and predefined macros @@ -32,8 +33,10 @@ parseFiles' includePaths env siloed (path : paths) = do -- the file path parseFile' :: [String] -> Env -> FilePath -> ExceptT String IO (AST, Env) parseFile' includePaths env path = do - result <- liftIO $ lexFile includePaths env path - (tokens, env') <- liftEither result + preResult <- liftIO $ preprocess includePaths env path + (contents, env') <- liftEither preResult + result <- liftIO $ uncurry lexStr $ unzip contents + tokens <- liftEither result let position = if null tokens then Position path 1 1 diff --git a/src/Language/SystemVerilog/Parser/Lex.x b/src/Language/SystemVerilog/Parser/Lex.x index d9c322c..23edf98 100644 --- a/src/Language/SystemVerilog/Parser/Lex.x +++ b/src/Language/SystemVerilog/Parser/Lex.x @@ -3,40 +3,22 @@ - Author: Zachary Snow - Original Lexer Author: Tom Hawkins - - - Combined source lexing and preprocessing + - SystemVerilog Lexer - - - These procedures are combined so that we can simultaneously process macros in - - a sane way (something analogous to character-by-character) and have our - - lexemes properly tagged with source file positions. - - - - The scariest piece of this module is the use of `unsafePerformIO`. We want to - - be able to search for and read files whenever we see an include directive. - - Trying to thread the IO Monad through alex's interface would be very - - convoluted. The operations performed are not effectful, and are type safe. - - - - It may be possible to separate the preprocessor from the lexer by having a - - preprocessor which produces location annotations. This could improve error - - messaging and remove the include file and macro boundary hacks. + - All preprocessor directives are handled separately by the preprocessor. The + - `begin_keywords` and `end_keywords` lexer directives are handled here. -} +-- This pragma gets rid of a warning caused by alex 3.2.5. {-# OPTIONS_GHC -fno-warn-unused-imports #-} --- The above pragma gets rid of annoying warning caused by alex 3.2.4. This has --- been fixed on their development branch, so this can be removed once they roll --- a new release. (no new release as of 3/29/2018) module Language.SystemVerilog.Parser.Lex - ( lexFile - , Env + ( lexStr ) where -import System.FilePath (dropFileName) -import System.Directory (findFile) -import System.IO.Unsafe (unsafePerformIO) -import Text.Read (readMaybe) +import Control.Monad.Except import qualified Data.Map.Strict as Map import qualified Data.Set as Set -import Data.List (span, elemIndex, dropWhileEnd) -import Data.Maybe (isJust, fromJust) import Language.SystemVerilog.Parser.Keywords (specMap) import Language.SystemVerilog.Parser.Tokens @@ -112,15 +94,6 @@ import Language.SystemVerilog.Parser.Tokens @simpleIdentifier = [a-zA-Z_] [a-zA-Z0-9_\$]* @systemIdentifier = "$" [a-zA-Z0-9_\$]+ --- Comments - -@commentBlock = "/*" -@commentLine = "//" - --- Directives - -@directive = "`" @simpleIdentifier - -- Whitespace @newline = \n @@ -486,715 +459,99 @@ tokens :- "<<<=" { tok Sym_lt_lt_lt_eq } ">>>=" { tok Sym_gt_gt_gt_eq } - @directive { handleDirective } - @commentLine { removeUntil "\n" } - @commentBlock { removeUntil "*/" } + "`celldefine" { tok Dir_celldefine } + "`endcelldefine" { tok Dir_endcelldefine } + "`unconnected_drive" { tok Dir_unconnected_drive } + "`nounconnected_drive" { tok Dir_nounconnected_drive } + "`default_nettype" { tok Dir_default_nettype } + "`resetall" { tok Dir_resetall } + "`begin_keywords" { tok Dir_begin_keywords } + "`end_keywords" { tok Dir_end_keywords } $white ; . { tok Unknown } { - --- our actions don't return any data -type Action = AlexInput -> Int -> Alex () - --- keeps track of the state of an if-else cascade level -data Cond - = CurrentlyTrue - | PreviouslyTrue - | NeverTrue - deriving (Eq, Show) - --- map from macro to definition, plus arguments -type Env = Map.Map String (String, [(String, Maybe String)]) - -- our custom lexer state data AlexUserState = LS - { lsToks :: [Token] -- tokens read so far, *in reverse order* for efficiency - , lsCurrFile :: FilePath -- currently active filename - , lsEnv :: Env -- active macro definitions - , lsCondStack :: [Cond] -- if-else cascade state - , lsIncludePaths :: [FilePath] -- folders to search for includes - , lsSpecStack :: [Set.Set TokenName] -- stack of non-keyword token names + { lsToks :: [Token] -- tokens read so far, *in reverse order* for efficiency + , lsPositions :: [Position] -- character positions in reverse order } deriving (Eq, Show) --- this initial user state does not contain the initial filename, environment, --- or include paths; alex requires that this be defined; we override it before --- we begin the actual lexing procedure +-- this initial user state does not contain the initial token positions; alex +-- requires that this be defined; we override it before we begin the actual +-- lexing procedure alexInitUserState :: AlexUserState -alexInitUserState = LS [] "" Map.empty [] [] [] +alexInitUserState = LS [] [] --- public-facing lexer entrypoint -lexFile :: [String] -> Env -> FilePath -> IO (Either String ([Token], Env)) -lexFile includePaths env path = do - str <- - if path == "-" - then getContents - else readFile path >>= return . normalize - let result = runAlex str $ setEnv >> alexMonadScan >> get +-- lexer entrypoint +lexStr :: String -> [Position] -> IO (Either String [Token]) +lexStr chars positions = do + let setEnv = modify $ \s -> s { lsPositions = reverse positions } + let result = runAlex chars $ setEnv >> alexMonadScan >> get return $ case result of Left msg -> Left msg Right finalState -> - if not $ null $ lsCondStack finalState then - Left $ path ++ ": unfinished conditional directives: " ++ - (show $ length $ lsCondStack finalState) - else if not $ null $ lsSpecStack finalState then - Left $ path ++ ": unterminated begin_keywords blocks: " ++ - (show $ length $ lsSpecStack finalState) - else - Right (finalToks, lsEnv finalState) - where - finalToks = coalesce $ combineBoundaries $ - reverse $ lsToks finalState - where - setEnv = do - modify $ \s -> s - { lsEnv = env - , lsIncludePaths = includePaths - , lsCurrFile = path - } + runExcept $ postProcess [] tokens + where tokens = reverse $ lsToks finalState --- combines identifiers and numbers that cross macro boundaries -coalesce :: [Token] -> [Token] -coalesce [] = [] -coalesce (Token MacroBoundary _ _ : rest) = coalesce rest -coalesce (Token t1 str1 pn1 : Token MacroBoundary _ _ : Token t2 str2 pn2 : rest) = - case (t1, t2, immediatelyFollows) of - (Lit_number, Lit_number, _) -> - Token t1 (str1 ++ str2) pn1 : (coalesce rest) - (Id_simple, Id_simple, True) -> - Token t1 (str1 ++ str2) pn1 : (coalesce rest) - _ -> - Token t1 str1 pn1 : (coalesce $ Token t2 str2 pn2 : rest) +-- process begin/end keywords directives +postProcess :: [Set.Set TokenName] -> [Token] -> Except String [Token] +postProcess stack [] = + if null stack + then return [] + else throwError $ "unterminated begin_keywords blocks: " ++ show stack +postProcess stack (Token Dir_begin_keywords _ pos : ts) = + case ts of + Token Lit_string quotedSpec _ : ts' -> + case Map.lookup spec specMap of + Nothing -> throwError $ show pos + ++ ": invalid keyword set name: " ++ show spec + Just set -> postProcess (set : stack) ts' + where spec = tail $ init quotedSpec + _ -> throwError $ show pos ++ ": begin_keywords not followed by string" +postProcess stack (Token Dir_end_keywords _ pos : ts) = + case stack of + (_ : stack') -> postProcess stack' ts + [] -> throwError $ show pos ++ ": unmatched end_keywords" +postProcess [] (t : ts) = do + ts' <- postProcess [] ts + return $ t : ts' +postProcess stack (t : ts) = do + ts' <- postProcess stack ts + return $ t' : ts' where - Position _ l1 c1 = pn1 - Position _ l2 c2 = pn2 - apn1 = AlexPn 0 l1 c1 - apn2 = AlexPn (length str1) l2 c2 - immediatelyFollows = apn2 == foldl alexMove apn1 str1 -coalesce (x : xs) = x : coalesce xs - -combineBoundaries :: [Token] -> [Token] -combineBoundaries [] = [] -combineBoundaries (Token MacroBoundary s p : Token MacroBoundary _ _ : rest) = - combineBoundaries $ Token MacroBoundary s p : rest -combineBoundaries (x : xs) = x : combineBoundaries xs + Token tokId str pos = t + t' = if Set.member tokId (head stack) + then Token Id_simple ('_' : str) pos + else t -- invoked by alexMonadScan alexEOF :: Alex () alexEOF = return () --- raises an alexError with the current file position appended -lexicalError :: String -> Alex a -lexicalError msg = do - (pn, _, _, _) <- alexGetInput - pos <- toTokPos pn - alexError $ show pos ++ ": Lexical error: " ++ msg - -- get the current user state get :: Alex AlexUserState get = Alex $ \s -> Right (s, alex_ust s) --- get the current user state and apply a function to it -gets :: (AlexUserState -> a) -> Alex a -gets f = get >>= return . f - -- apply a transformation to the current user state modify :: (AlexUserState -> AlexUserState) -> Alex () modify f = Alex func where func s = Right (s { alex_ust = new }, ()) where new = f (alex_ust s) --- helpers specifically accessing the current file state -getCurrentFile :: Alex String -getCurrentFile = gets lsCurrFile -setCurrentFile :: String -> Alex () -setCurrentFile x = modify $ \s -> s { lsCurrFile = x } - --- find the given file for inclusion -includeSearch :: FilePath -> Alex FilePath -includeSearch file = do - base <- getCurrentFile - includePaths <- gets lsIncludePaths - let directories = dropFileName base : includePaths - let result = unsafePerformIO $ findFile directories file - case result of - Just path -> return path - Nothing -> lexicalError $ "Could not find file " ++ show file ++ - ", included from " ++ show base - --- read in the given file -loadFile :: FilePath -> Alex String -loadFile = return . normalize . unsafePerformIO . readFile - --- removes carriage returns before newlines -normalize :: String -> String -normalize ('\r' : '\n' : rest) = '\n' : (normalize rest) -normalize (ch : chs) = ch : (normalize chs) -normalize [] = [] - -isIdentChar :: Char -> Bool -isIdentChar ch = - ('a' <= ch && ch <= 'z') || - ('A' <= ch && ch <= 'Z') || - ('0' <= ch && ch <= '9') || - (ch == '_') || (ch == '$') - -takeString :: Alex String -takeString = do - (pos, _, _, str) <- alexGetInput - let (x, rest) = span isIdentChar str - let lastChar = if null x then ' ' else last x - alexSetInput (foldl alexMove pos x, lastChar, [], rest) - return x - -toTokPos :: AlexPosn -> Alex Position -toTokPos (AlexPn _ l c) = do - file <- getCurrentFile - return $ Position file l c - --- read tokens after the name until the first (un-escaped) newline -takeUntilNewline :: Alex String -takeUntilNewline = do - (pos, _, _, str) <- alexGetInput - case str of - [] -> return "" - '\n' : _ -> do - return "" - '/' : '/' : _ -> do - remainder <- takeThrough '\n' - case last $ init remainder of - '\\' -> takeUntilNewline >>= return . (' ' :) - _ -> return "" - '\\' : '\n' : rest -> do - let newPos = alexMove (alexMove pos '\\') '\n' - alexSetInput (newPos, '\n', [], rest) - takeUntilNewline >>= return . (' ' :) - ch : rest -> do - let newPos = alexMove pos ch - alexSetInput (newPos, ch, [], rest) - takeUntilNewline >>= return . (ch :) - --- select characters up to and including the given character -takeThrough :: Char -> Alex String -takeThrough goal = do +getPosition :: Int -> Alex Position +getPosition lookback = do (_, _, _, str) <- alexGetInput - if null str - then lexicalError $ - "unexpected end of input, looking for " ++ (show goal) - else do - ch <- takeChar - if ch == goal - then return [ch] - else do - rest <- takeThrough goal - return $ ch : rest + positions <- get >>= return . lsPositions + return $ positions !! (lookback + length str) --- pop one character from the input stream -takeChar :: Alex Char -takeChar = do - (pos, _, _, str) <- alexGetInput - (ch, chs) <- - if null str - then lexicalError "unexpected end of input" - else return (head str, tail str) - let newPos = alexMove pos ch - alexSetInput (newPos, ch, [], chs) - return ch - --- drop spaces in the input until a non-space is reached or EOF -dropSpaces :: Alex () -dropSpaces = do - (pos, _, _, str) <- alexGetInput - if null str then - return () - else do - let ch : rest = str - if ch == '\t' || ch == ' ' then do - alexSetInput (alexMove pos ch, ch, [], tail str) - dropSpaces - else - return () - -isWhitespaceChar :: Char -> Bool -isWhitespaceChar ch = elem ch [' ', '\t', '\n'] - --- drop all leading whitespace in the input -dropWhitespace :: Alex () -dropWhitespace = do - (pos, _, _, str) <- alexGetInput - case str of - ch : chs -> - if isWhitespaceChar ch - then do - alexSetInput (alexMove pos ch, ch, [], chs) - dropWhitespace - else return() - [] -> return () - --- lex the remainder of the current line into tokens and return them, rather --- than storing them in the lexer state -tokenizeLine :: Alex [Token] -tokenizeLine = do - -- read in the rest of the current line - str <- takeUntilNewline - dropWhitespace - -- save the current lexer state - currInput <- alexGetInput - currFile <- getCurrentFile - currToks <- gets lsToks - -- parse the line into tokens (which includes macro processing) - modify $ \s -> s { lsToks = [] } - let newInput = (alexStartPos, ' ', [], str) - alexSetInput newInput - alexMonadScan - toks <- gets lsToks - -- return to the previous state - alexSetInput currInput - setCurrentFile currFile - modify $ \s -> s { lsToks = currToks } - -- remove macro boundary tokens and put the tokens in order - let isntMacroBoundary = \(Token t _ _ ) -> t /= MacroBoundary - let toks' = filter isntMacroBoundary toks - return $ reverse toks' - --- removes and returns a decimal number -takeNumber :: Alex Int -takeNumber = do - dropSpaces - leadCh <- peekChar - if '0' <= leadCh && leadCh <= '9' - then step 0 - else lexicalError $ "expected number, but found unexpected char: " - ++ show leadCh - where - step number = do - ch <- takeChar - if ch == ' ' || ch == '\n' then - return number - else if '0' <= ch && ch <= '9' then do - let digit = ord ch - ord '0' - step $ number * 10 + digit - else - lexicalError $ "unexpected char while reading number: " - ++ show ch - -peekChar :: Alex Char -peekChar = do - (_, _, _, str) <- alexGetInput - if null str - then lexicalError "unexpected end of input" - else return $head str - -atEOF :: Alex Bool -atEOF = do - (_, _, _, str) <- alexGetInput - return $ null str - -takeMacroDefinition :: Alex (String, [(String, Maybe String)]) -takeMacroDefinition = do - leadCh <- peekChar - if leadCh /= '(' - then do - body <- takeUntilNewline - return (body, []) - else do - args <- takeMacroArguments - body <- takeUntilNewline - argsWithDefaults <- mapM splitArg args - if null args - then lexicalError "macros cannot have 0 args" - else return (body, argsWithDefaults) - where - splitArg :: String -> Alex (String, Maybe String) - splitArg [] = lexicalError "macro defn. empty argument" - splitArg str = do - let (name, rest) = span isIdentChar str - if null name || not (all isIdentChar name) then - lexicalError $ "invalid macro arg name: " ++ show name - else if null rest then - return (name, Nothing) - else do - let trimmed = dropWhile isWhitespaceChar rest - let leadCh = head trimmed - if leadCh /= '=' - then lexicalError $ "bad char after arg name: " ++ (show leadCh) - else return (name, Just $ tail trimmed) - --- commas and right parens are forbidden outside matched pairs of: (), [], {}, --- "", except to delimit arguments or end the list of arguments; see 22.5.1 -takeMacroArguments :: Alex [String] -takeMacroArguments = do - dropWhitespace - leadCh <- takeChar - if leadCh == '(' - then argLoop - else lexicalError $ "expected begining of macro arguments, but found " - ++ show leadCh - where - argLoop :: Alex [String] - argLoop = do - dropWhitespace - (arg, isEnd) <- loop "" [] - let arg' = dropWhileEnd isWhitespaceChar arg - if isEnd - then return [arg'] - else do - rest <- argLoop - return $ arg' : rest - loop :: String -> [Char] -> Alex (String, Bool) - loop curr stack = do - ch <- takeChar - case (stack, ch) of - ( s,'\\') -> do - ch2 <- takeChar - loop (curr ++ [ch, ch2]) s - ([ ], ',') -> return (curr, False) - ([ ], ')') -> return (curr, True) - - ('"' : s, '"') -> loop (curr ++ [ch]) s - ( s, '"') -> loop (curr ++ [ch]) ('"' : s) - ('[' : s, ']') -> loop (curr ++ [ch]) s - ( s, '[') -> loop (curr ++ [ch]) ('[' : s) - ('(' : s, ')') -> loop (curr ++ [ch]) s - ( s, '(') -> loop (curr ++ [ch]) ('(' : s) - ('{' : s, '}') -> loop (curr ++ [ch]) s - ( s, '{') -> loop (curr ++ [ch]) ('{' : s) - - ( s,'\n') -> loop (curr ++ [' ']) s - ( s, _ ) -> loop (curr ++ [ch ]) s - -findUnescapedQuote :: String -> (String, String) -findUnescapedQuote [] = ([], []) -findUnescapedQuote ('`' : '\\' : '`' : '"' : rest) = ('\\' : '"' : start, end) - where (start, end) = findUnescapedQuote rest -findUnescapedQuote ('\\' : '"' : rest) = ('\\' : '"' : start, end) - where (start, end) = findUnescapedQuote rest -findUnescapedQuote ('"' : rest) = ("\"", rest) -findUnescapedQuote ('`' : '"' : rest) = ("\"", rest) -findUnescapedQuote (ch : rest) = (ch : start, end) - where (start, end) = findUnescapedQuote rest - --- substitute in the arguments for a macro expension -substituteArgs :: String -> [String] -> [String] -> String -substituteArgs "" _ _ = "" -substituteArgs ('`' : '`' : body) names args = - substituteArgs body names args -substituteArgs ('"' : body) names args = - '"' : start ++ substituteArgs rest names args - where (start, rest) = findUnescapedQuote body -substituteArgs ('\\' : '"' : body) names args = - '\\' : '"' : substituteArgs body names args -substituteArgs ('`' : '"' : body) names args = - '"' : substituteArgs (init start) names args - ++ '"' : substituteArgs rest names args - where (start, rest) = findUnescapedQuote body -substituteArgs body names args = - case span isIdentChar body of - ([], _) -> head body : substituteArgs (tail body) names args - (ident, rest) -> - case elemIndex ident names of - Nothing -> ident ++ substituteArgs rest names args - Just idx -> (args !! idx) ++ substituteArgs rest names args - -defaultMacroArgs :: [Maybe String] -> [String] -> Alex [String] -defaultMacroArgs [] [] = return [] -defaultMacroArgs [] _ = lexicalError "too many macro arguments given" -defaultMacroArgs defaults [] = do - if all isJust defaults - then return $ map fromJust defaults - else lexicalError "too few macro arguments given" -defaultMacroArgs (f : fs) (a : as) = do - let arg = if a == "" && isJust f - then fromJust f - else a - args <- defaultMacroArgs fs as - return $ arg : args - --- directives that must always be processed even if the current code block is --- being excluded; we have to process conditions so we can match them up with --- their ending tag, even if they're being skipped -unskippableDirectives :: [String] -unskippableDirectives = ["else", "elsif", "endif", "ifdef", "ifndef"] - --- list of all of the supported directive names; used to prevent defining macros --- with illegal names -directives :: [String] -directives = - [ "timescale" - , "celldefine" - , "endcelldefine" - , "unconnected_drive" - , "nounconnected_drive" - , "default_nettype" - , "pragma" - , "resetall" - , "begin_keywords" - , "end_keywords" - , "__FILE__" - , "__LINE__" - , "line" - , "include" - , "ifdef" - , "ifndef" - , "else" - , "elsif" - , "endif" - , "define" - , "undef" - , "undefineall" - ] - -handleDirective :: Action -handleDirective (posOrig, _, _, strOrig) len = do - let thisTokenStr = take len strOrig - let directive = tail $ thisTokenStr - let newPos = foldl alexMove posOrig thisTokenStr - alexSetInput (newPos, last thisTokenStr, [], drop len strOrig) - - env <- gets lsEnv - tempInput <- alexGetInput - let dropUntilNewline = removeUntil "\n" tempInput 0 - let passThrough = do - rest <- takeUntilNewline - let str = '`' : directive ++ rest - tok Spe_Directive (posOrig, ' ', [], strOrig) (length str) - - condStack <- gets lsCondStack - if any (/= CurrentlyTrue) condStack - && not (elem directive unskippableDirectives) - then alexMonadScan - else case directive of - - "timescale" -> dropUntilNewline - - "celldefine" -> passThrough - "endcelldefine" -> passThrough - - "unconnected_drive" -> passThrough - "nounconnected_drive" -> passThrough - - "default_nettype" -> passThrough - "pragma" -> do - leadCh <- peekChar - if leadCh == '\n' || leadCh == '\r' - then lexicalError "pragma directive cannot be empty" - else passThrough - "resetall" -> passThrough - - "begin_keywords" -> do - toks <- tokenizeLine - quotedSpec <- case toks of - [Token Lit_string str _] -> return str - _ -> lexicalError $ "unexpected tokens following `begin_keywords: " ++ show toks - let spec = tail $ init quotedSpec - case Map.lookup spec specMap of - Nothing -> - lexicalError $ "invalid keyword set name: " ++ show spec - Just set -> do - specStack <- gets lsSpecStack - modify $ \s -> s { lsSpecStack = set : specStack } - dropWhitespace - alexMonadScan - "end_keywords" -> do - specStack <- gets lsSpecStack - if null specStack - then - lexicalError "unexpected end_keywords before begin_keywords" - else do - modify $ \s -> s { lsSpecStack = tail specStack } - dropWhitespace - alexMonadScan - - "__FILE__" -> do - tokPos <- toTokPos posOrig - currFile <- gets lsCurrFile - let tokStr = show currFile - modify $ push $ Token Lit_string tokStr tokPos - alexMonadScan - "__LINE__" -> do - tokPos <- toTokPos posOrig - let Position _ currLine _ = tokPos - let tokStr = show currLine - modify $ push $ Token Lit_number tokStr tokPos - alexMonadScan - - "line" -> do - toks <- tokenizeLine - (lineNumber, quotedFilename, levelNumber) <- - case toks of - [ Token Lit_number lineStr _, - Token Lit_string filename _, - Token Lit_number levelStr _] -> do - let Just line = readMaybe lineStr :: Maybe Int - let Just level = readMaybe levelStr :: Maybe Int - return (line, filename, level) - _ -> lexicalError $ - "unexpected tokens types following `line: " - ++ show (map tokenName toks) ++ "; should be: " - ++ show [Lit_number, Lit_string, Lit_number] - let filename = init $ tail quotedFilename - setCurrentFile filename - (AlexPn f _ c, prev, _, str) <- alexGetInput - alexSetInput (AlexPn f (lineNumber + 1) c, prev, [], str) - if 0 <= levelNumber && levelNumber <= 2 - then alexMonadScan - else lexicalError "line directive invalid level number" - - "include" -> do - toks <- tokenizeLine - quotedFilename <- case toks of - [Token Lit_string str _] -> return str - _ -> lexicalError $ "unexpected tokens following `include: " ++ show toks - inputFollow <- alexGetInput - fileFollow <- getCurrentFile - -- process the included file - let filename = init $ tail quotedFilename - path <- includeSearch filename - content <- loadFile path - let inputIncluded = (alexStartPos, ' ', [], content) - setCurrentFile path - alexSetInput inputIncluded - alexMonadScan - -- resume processing the original file - setCurrentFile fileFollow - alexSetInput inputFollow - alexMonadScan - - "ifdef" -> do - dropSpaces - name <- takeString - let newCond = if Map.member name env - then CurrentlyTrue - else NeverTrue - modify $ \s -> s { lsCondStack = newCond : condStack } - alexMonadScan - "ifndef" -> do - dropSpaces - name <- takeString - let newCond = if Map.notMember name env - then CurrentlyTrue - else NeverTrue - modify $ \s -> s { lsCondStack = newCond : condStack } - alexMonadScan - "else" -> do - let newCond = if head condStack == NeverTrue - then CurrentlyTrue - else NeverTrue - modify $ \s -> s { lsCondStack = newCond : tail condStack } - alexMonadScan - "elsif" -> do - dropSpaces - name <- takeString - let currCond = head condStack - let newCond = - if currCond /= NeverTrue then - PreviouslyTrue - else if Map.member name env then - CurrentlyTrue - else - NeverTrue - modify $ \s -> s { lsCondStack = newCond : tail condStack } - alexMonadScan - "endif" -> do - modify $ \s -> s { lsCondStack = tail condStack } - alexMonadScan - - "define" -> do - dropSpaces - name <- do - str <- takeString - if elem str directives - then lexicalError $ "illegal macro name: " ++ str - else return str - defn <- do - eof <- atEOF - if eof - then return ("", []) - else takeMacroDefinition - modify $ \s -> s { lsEnv = Map.insert name defn env } - alexMonadScan - "undef" -> do - dropSpaces - name <- takeString - modify $ \s -> s { lsEnv = Map.delete name env } - alexMonadScan - "undefineall" -> do - modify $ \s -> s { lsEnv = Map.empty } - alexMonadScan - - _ -> do - case Map.lookup directive env of - Nothing -> lexicalError $ "Undefined macro: " ++ directive - Just (body, formalArgs) -> do - (AlexPn _ l c, _, _, _) <- alexGetInput - replacement <- if null formalArgs - then return body - else do - actualArgs <- takeMacroArguments - defaultedArgs <- defaultMacroArgs (map snd formalArgs) actualArgs - return $ substituteArgs body (map fst formalArgs) defaultedArgs - -- save our current state - currInput <- alexGetInput - currToks <- gets lsToks - modify $ \s -> s { lsToks = [] } - -- lex the macro expansion, preserving the file and line - alexSetInput (AlexPn 0 l 0, ' ', [], replacement) - alexMonadScan - -- re-tag and save tokens from the macro expansion - newToks <- gets lsToks - currFile <- getCurrentFile - let loc = "macro expansion of " ++ directive ++ " at " ++ currFile - let pos = Position loc l (c - length directive - 1) - let reTag (Token a b _) = Token a b pos - let boundary = Token MacroBoundary "" (Position "" 0 0) - let boundedToks = boundary : (map reTag newToks) ++ boundary : currToks - modify $ \s -> s { lsToks = boundedToks } - -- continue lexing after the macro - alexSetInput currInput - alexMonadScan - --- remove characters from the input until the pattern is reached -removeUntil :: String -> Action -removeUntil pattern _ _ = loop - where - patternLen = length pattern - wantNewline = pattern == "\n" - loop = do - (pos, _, _, str) <- alexGetInput - let found = (null str && wantNewline) - || pattern == take patternLen str - let nextPos = alexMove pos (head str) - let afterPos = if wantNewline - then alexMove pos '\n' - else foldl alexMove pos pattern - let (newPos, newStr) = if found - then (afterPos, drop patternLen str) - else (nextPos, drop 1 str) - if not found && null str - then lexicalError $ "Reached EOF while looking for: " ++ - show pattern - else do - alexSetInput (newPos, ' ', [], newStr) - if found - then alexMonadScan - else loop - -push :: Token -> AlexUserState -> AlexUserState -push t s = s { lsToks = t : (lsToks s) } - -tok :: TokenName -> Action -tok tokId (pos, _, _, input) len = do +tok :: TokenName -> AlexInput -> Int -> Alex () +tok tokId (_, _, _, input) len = do let tokStr = take len input - tokPos <- toTokPos pos - condStack <- gets lsCondStack - () <- if any (/= CurrentlyTrue) condStack - then return () - else do - specStack <- gets lsSpecStack - if null specStack || Set.notMember tokId (head specStack) - then modify (push $ Token tokId tokStr tokPos) - else modify (push $ Token Id_simple ('_' : tokStr) tokPos) + tokPos <- getPosition (len - 1) + let t = Token tokId tokStr tokPos + modify $ \s -> s { lsToks = t : (lsToks s) } alexMonadScan } diff --git a/src/Language/SystemVerilog/Parser/Parse.y b/src/Language/SystemVerilog/Parser/Parse.y index 36e958f..ea7e68e 100644 --- a/src/Language/SystemVerilog/Parser/Parse.y +++ b/src/Language/SystemVerilog/Parser/Parse.y @@ -297,7 +297,15 @@ systemIdentifier { Token Id_system _ _ } number { Token Lit_number _ _ } string { Token Lit_string _ _ } time { Token Lit_time _ _ } -directive { Token Spe_Directive _ _ } + +"`celldefine" { Token Dir_celldefine _ _ } +"`endcelldefine" { Token Dir_endcelldefine _ _ } +"`unconnected_drive" { Token Dir_unconnected_drive _ _ } +"`nounconnected_drive" { Token Dir_nounconnected_drive _ _ } +"`default_nettype" { Token Dir_default_nettype _ _ } +"`resetall" { Token Dir_resetall _ _ } +"`begin_keywords" { Token Dir_begin_keywords _ _ } +"`end_keywords" { Token Dir_end_keywords _ _ } "(" { Token Sym_paren_l _ _ } ")" { Token Sym_paren_r _ _ } @@ -797,7 +805,18 @@ TimeunitsDeclaration :: { [PackageItem] } | "timeprecision" Time ";" { [] } Directive :: { String } - : directive { tokenString $1 } + : "`celldefine" { tokenString $1 } + | "`endcelldefine" { tokenString $1 } + | "`unconnected_drive" Drive { tokenString $1 ++ " " ++ $2 } + | "`nounconnected_drive" { tokenString $1 } + | "`default_nettype" DefaultNetType { tokenString $1 ++ " " ++ $2 } + | "`resetall" { tokenString $1 } +Drive :: { String } + : "pull0" { tokenString $1 } + | "pull1" { tokenString $1 } +DefaultNetType :: { String } + : NetType { show $1 } + | Identifier { $1 } PackageImportItems :: { [(Identifier, Maybe Identifier)] } : PackageImportItem { [$1] } diff --git a/src/Language/SystemVerilog/Parser/Preprocess.hs b/src/Language/SystemVerilog/Parser/Preprocess.hs new file mode 100644 index 0000000..41725bf --- /dev/null +++ b/src/Language/SystemVerilog/Parser/Preprocess.hs @@ -0,0 +1,673 @@ +{- sv2v + - Author: Zachary Snow + - + - SystemVerilog Preprocessor + - + - This preprocessor handles all preprocessor directives and produces an output + - stream that is tagged with the effective source position of resulting + - characters. + -} +module Language.SystemVerilog.Parser.Preprocess + ( preprocess + , Env + ) where + +import Control.Monad.Except +import Control.Monad.State +import Data.Char (ord) +import Data.List (span, elemIndex, dropWhileEnd, splitAt, tails, isPrefixOf, findIndex) +import Data.Maybe (isJust, fromJust) +import System.Directory (findFile) +import System.FilePath (dropFileName) +import qualified Data.Map.Strict as Map + +import Language.SystemVerilog.Parser.Tokens (Position(..)) + +type Env = Map.Map String (String, [(String, Maybe String)]) + +type PPS = StateT PP (ExceptT String IO) + +data PP = PP + { ppInput :: String -- current input string + , ppOutput :: [(Char, Position)] -- preprocessor output (in reverse) + , ppPosition :: Position -- current file position + , ppFilePath :: FilePath -- currently active filename + , ppEnv :: Env -- active macro definitions + , ppCondStack :: [Cond] -- if-else cascade state + , ppIncludePaths :: [FilePath] -- folders to search for includes + } deriving (Eq, Show) + +-- keeps track of the state of an if-else cascade level +data Cond + = CurrentlyTrue + | PreviouslyTrue + | NeverTrue + deriving (Eq, Show) + +-- preprocessor entrypoint +preprocess :: [String] -> Env -> FilePath -> IO (Either String ([(Char, Position)], Env)) +preprocess includePaths env path = do + contents <- + if path == "-" + then getContents + else loadFile path + let initialState = PP contents [] (Position path 1 1) path env [] includePaths + result <- runExceptT $ execStateT preprocessInput initialState + return $ case result of + Left msg -> Left msg + Right finalState -> + if not $ null $ ppCondStack finalState then + Left $ path ++ ": unfinished conditional directives: " ++ + (show $ length $ ppCondStack finalState) + else + Right (output, env') + where + output = reverse $ ppOutput finalState + env' = ppEnv finalState + +-- read in the given file +loadFile :: FilePath -> IO String +loadFile path = do + contents <- readFile path + return $ normalize contents + where + -- removes carriage returns before newlines + normalize :: String -> String + normalize ('\r' : '\n' : rest) = '\n' : (normalize rest) + normalize (ch : chs) = ch : (normalize chs) + normalize [] = [] + +-- find the given file for inclusion +includeSearch :: FilePath -> PPS FilePath +includeSearch file = do + base <- getFilePath + includePaths <- gets ppIncludePaths + let directories = dropFileName base : includePaths + result <- liftIO $ findFile directories file + case result of + Just path -> return path + Nothing -> lexicalError $ "Could not find file " ++ show file ++ + ", included from " ++ show base + +lexicalError :: String -> PPS a +lexicalError msg = do + pos <- getPosition + lift $ throwError $ show pos ++ ": Lexical error: " ++ msg + +-- input accessors +setInput :: String -> PPS () +setInput x = modify $ \s -> s { ppInput = x } +getInput :: PPS String +getInput = gets ppInput +-- output accessors +setOutput :: [(Char, Position)] -> PPS () +setOutput x = modify $ \s -> s { ppOutput = x } +getOutput :: PPS [(Char, Position)] +getOutput = gets ppOutput +-- position accessors +getPosition :: PPS Position +getPosition = gets ppPosition +setPosition :: Position -> PPS () +setPosition x = modify $ \s -> s { ppPosition = x } +-- file path accessors +getFilePath :: PPS FilePath +getFilePath = gets ppFilePath +setFilePath :: String -> PPS () +setFilePath x = modify $ \s -> s { ppFilePath = x } +-- environment accessors +getEnv :: PPS Env +getEnv = gets ppEnv +setEnv :: Env -> PPS () +setEnv x = modify $ \s -> s { ppEnv = x } +-- cond stack accessors +getCondStack :: PPS [Cond] +getCondStack = gets ppCondStack +setCondStack :: [Cond] -> PPS () +setCondStack x = modify $ \s -> s { ppCondStack = x } +-- combined input and position accessors +setBuffer :: (String, Position) -> PPS () +setBuffer (x, p) = do + setInput x + setPosition p +getBuffer :: PPS (String, Position) +getBuffer = do + x <- getInput + p <- getPosition + return (x, p) + +isIdentChar :: Char -> Bool +isIdentChar ch = + ('a' <= ch && ch <= 'z') || + ('A' <= ch && ch <= 'Z') || + ('0' <= ch && ch <= '9') || + (ch == '_') || (ch == '$') + +-- reads an identifier from the front of the input +takeIdentifier :: PPS String +takeIdentifier = do + str <- getInput + let (ident, rest) = span isIdentChar str + advancePositions ident + setInput rest + return ident + +-- read tokens after the name until the first (un-escaped) newline +takeUntilNewline :: PPS String +takeUntilNewline = do + str <- getInput + case str of + [] -> return "" + '\n' : _ -> do + return "" + '/' : '/' : _ -> do + remainder <- takeThrough '\n' + case last $ init remainder of + '\\' -> takeUntilNewline >>= return . (' ' :) + _ -> return "" + '\\' : '\n' : rest -> do + advancePosition '\\' + advancePosition '\n' + setInput rest + takeUntilNewline >>= return . (' ' :) + ch : rest -> do + advancePosition ch + setInput rest + takeUntilNewline >>= return . (ch :) + +-- select characters up to and including the given character +takeThrough :: Char -> PPS String +takeThrough goal = do + str <- getInput + if null str + then lexicalError $ + "unexpected end of input, looking for " ++ (show goal) + else do + ch <- takeChar + if ch == goal + then return [ch] + else do + rest <- takeThrough goal + return $ ch : rest + +-- pop one character from the input stream +takeChar :: PPS Char +takeChar = do + str <- getInput + (ch, chs) <- + if null str + then lexicalError "unexpected end of input" + else return (head str, tail str) + advancePosition ch + setInput chs + return ch + +-- removes and returns a quoted string such as or "foo.bar" +takeQuotedString :: PPS String +takeQuotedString = do + dropSpaces + ch <- takeChar + end <- + case ch of + '"' -> return '"' + '<' -> return '>' + _ -> lexicalError $ "bad beginning of include arg: " ++ (show ch) + rest <- takeThrough end + let res = ch : rest + return res + +-- removes and returns a decimal number +takeNumber :: PPS Int +takeNumber = do + dropSpaces + leadCh <- peekChar + if '0' <= leadCh && leadCh <= '9' + then step 0 + else lexicalError $ "expected number, but found unexpected char: " + ++ show leadCh + where + step number = do + ch <- peekChar + if ch == ' ' || ch == '\n' then + return number + else if '0' <= ch && ch <= '9' then do + _ <- takeChar + let digit = ord ch - ord '0' + step $ number * 10 + digit + else + lexicalError $ "unexpected char while reading number: " + ++ show ch + +peekChar :: PPS Char +peekChar = do + str <- getInput + if null str + then lexicalError "unexpected end of input" + else return $ head str + +takeMacroDefinition :: PPS (String, [(String, Maybe String)]) +takeMacroDefinition = do + leadCh <- peekChar + if leadCh /= '(' + then do + dropSpaces + body <- takeUntilNewline + return (body, []) + else do + args <- takeMacroArguments + body <- takeUntilNewline + argsWithDefaults <- mapM splitArg args + if null args + then lexicalError "macros cannot have 0 args" + else return (body, argsWithDefaults) + where + splitArg :: String -> PPS (String, Maybe String) + splitArg [] = lexicalError "macro defn. empty argument" + splitArg str = do + let (name, rest) = span isIdentChar str + if null name || not (all isIdentChar name) then + lexicalError $ "invalid macro arg name: " ++ show name + else if null rest then + return (name, Nothing) + else do + let trimmed = dropWhile isWhitespaceChar rest + let leadCh = head trimmed + if leadCh /= '=' + then lexicalError $ "bad char after arg name: " ++ (show leadCh) + else return (name, Just $ tail trimmed) + +-- commas and right parens are forbidden outside matched pairs of: (), [], {}, +-- "", except to delimit arguments or end the list of arguments; see 22.5.1 +takeMacroArguments :: PPS [String] +takeMacroArguments = do + dropWhitespace + leadCh <- takeChar + if leadCh == '(' + then argLoop + else lexicalError $ "expected begining of macro arguments, but found " + ++ show leadCh + where + argLoop :: PPS [String] + argLoop = do + dropWhitespace + (arg, isEnd) <- loop "" [] + let arg' = dropWhileEnd isWhitespaceChar arg + if isEnd + then return [arg'] + else do + rest <- argLoop + return $ arg' : rest + loop :: String -> [Char] -> PPS (String, Bool) + loop curr stack = do + ch <- takeChar + case (stack, ch) of + ( s,'\\') -> do + ch2 <- takeChar + loop (curr ++ [ch, ch2]) s + ([ ], ',') -> return (curr, False) + ([ ], ')') -> return (curr, True) + + ('"' : s, '"') -> loop (curr ++ [ch]) s + ( s, '"') -> loop (curr ++ [ch]) ('"' : s) + ('[' : s, ']') -> loop (curr ++ [ch]) s + ( s, '[') -> loop (curr ++ [ch]) ('[' : s) + ('(' : s, ')') -> loop (curr ++ [ch]) s + ( s, '(') -> loop (curr ++ [ch]) ('(' : s) + ('{' : s, '}') -> loop (curr ++ [ch]) s + ( s, '{') -> loop (curr ++ [ch]) ('{' : s) + + ( s,'\n') -> loop (curr ++ [' ']) s + ( s, _ ) -> loop (curr ++ [ch ]) s + +findUnescapedQuote :: String -> (String, String) +findUnescapedQuote [] = ([], []) +findUnescapedQuote ('`' : '\\' : '`' : '"' : rest) = ('\\' : '"' : start, end) + where (start, end) = findUnescapedQuote rest +findUnescapedQuote ('\\' : '"' : rest) = ('\\' : '"' : start, end) + where (start, end) = findUnescapedQuote rest +findUnescapedQuote ('"' : rest) = ("\"", rest) +findUnescapedQuote ('`' : '"' : rest) = ("\"", rest) +findUnescapedQuote (ch : rest) = (ch : start, end) + where (start, end) = findUnescapedQuote rest + +-- substitute in the arguments for a macro expansion +substituteArgs :: String -> [String] -> [String] -> String +substituteArgs "" _ _ = "" +substituteArgs ('`' : '`' : body) names args = + substituteArgs body names args +substituteArgs ('"' : body) names args = + '"' : start ++ substituteArgs rest names args + where (start, rest) = findUnescapedQuote body +substituteArgs ('\\' : '"' : body) names args = + '\\' : '"' : substituteArgs body names args +substituteArgs ('`' : '"' : body) names args = + '"' : substituteArgs (init start) names args + ++ '"' : substituteArgs rest names args + where (start, rest) = findUnescapedQuote body +substituteArgs body names args = + case span isIdentChar body of + ([], _) -> head body : substituteArgs (tail body) names args + (ident, rest) -> + case elemIndex ident names of + Nothing -> ident ++ substituteArgs rest names args + Just idx -> (args !! idx) ++ substituteArgs rest names args + +defaultMacroArgs :: [Maybe String] -> [String] -> PPS [String] +defaultMacroArgs [] [] = return [] +defaultMacroArgs [] _ = lexicalError "too many macro arguments given" +defaultMacroArgs defaults [] = do + if all isJust defaults + then return $ map fromJust defaults + else lexicalError "too few macro arguments given" +defaultMacroArgs (f : fs) (a : as) = do + let arg = if a == "" && isJust f + then fromJust f + else a + args <- defaultMacroArgs fs as + return $ arg : args + +-- drop spaces in the input until a non-space is reached or EOF +dropSpaces :: PPS () +dropSpaces = do + str <- getInput + if null str then + return () + else do + let ch : rest = str + if ch == '\t' || ch == ' ' then do + advancePosition ch + setInput rest + dropSpaces + else + return () + +isWhitespaceChar :: Char -> Bool +isWhitespaceChar ch = elem ch [' ', '\t', '\n'] + +-- drop all leading whitespace in the input +dropWhitespace :: PPS () +dropWhitespace = do + str <- getInput + case str of + ch : chs -> + if isWhitespaceChar ch + then do + advancePosition ch + setInput chs + dropWhitespace + else return () + [] -> return () + +-- directives that must always be processed even if the current code block is +-- being excluded; we have to process conditions so we can match them up with +-- their ending tag, even if they're being skipped +unskippableDirectives :: [String] +unskippableDirectives = ["else", "elsif", "endif", "ifdef", "ifndef"] + +-- list of all of the supported directive names; used to prevent defining macros +-- with illegal names +directives :: [String] +directives = + [ "timescale" + , "celldefine" + , "endcelldefine" + , "unconnected_drive" + , "nounconnected_drive" + , "default_nettype" + , "pragma" + , "resetall" + , "begin_keywords" + , "end_keywords" + , "__FILE__" + , "__LINE__" + , "line" + , "include" + , "ifdef" + , "ifndef" + , "else" + , "elsif" + , "endif" + , "define" + , "undef" + , "undefineall" + ] + +-- primary preprocessor loop +preprocessInput :: PPS () +preprocessInput = do + str <- getInput + case str of + '/' : '/' : _ -> removeThrough "\n" + '/' : '*' : _ -> removeThrough "*/" + '`' : _ -> handleDirective + ch : chs -> do + pos <- getPosition + advancePosition ch + setInput chs + condStack <- getCondStack + if any (/= CurrentlyTrue) condStack + then return () + else pushChar ch pos + [] -> return () + if str == [] + then return () + else preprocessInput + +handleDirective :: PPS () +handleDirective = do + directivePos <- getPosition + '`' <- takeChar + directive <- takeIdentifier + + -- helper for directives which are not operated on + let passThrough = do + pushChar '`' directivePos + _ <- mapM (flip pushChar directivePos) directive + return () + + env <- getEnv + condStack <- getCondStack + if any (/= CurrentlyTrue) condStack + && not (elem directive unskippableDirectives) + then return () + else case directive of + + "timescale" -> removeThrough "\n" + + "celldefine" -> passThrough + "endcelldefine" -> passThrough + + "unconnected_drive" -> passThrough + "nounconnected_drive" -> passThrough + + "default_nettype" -> passThrough + "pragma" -> do + leadCh <- peekChar + if leadCh == '\n' + then lexicalError "pragma directive cannot be empty" + else removeThrough "\n" + "resetall" -> passThrough + + "begin_keywords" -> passThrough + "end_keywords" -> passThrough + + "__FILE__" -> do + currFile <- getFilePath + insertChars directivePos (show currFile) + "__LINE__" -> do + Position _ currLine _ <- getPosition + insertChars directivePos (show currLine) + + "line" -> do + lineLookahead + lineNumber <- takeNumber + quotedFilename <- takeQuotedString + levelNumber <- takeNumber + let filename = init $ tail quotedFilename + setFilePath filename + let newPos = Position filename lineNumber 0 + setPosition newPos + if 0 <= levelNumber && levelNumber <= 2 + then return () + else lexicalError "line directive invalid level number" + + "include" -> do + lineLookahead + quotedFilename <- takeQuotedString + fileFollow <- getFilePath + bufFollow <- getBuffer + -- find and load the included file + let filename = init $ tail quotedFilename + includePath <- includeSearch filename + includeContent <- liftIO $ loadFile includePath + -- pre-process the included file + setFilePath includePath + setBuffer (includeContent, Position includePath 1 1) + preprocessInput + -- resume processing the original file + setFilePath fileFollow + setBuffer bufFollow + + "ifdef" -> do + dropSpaces + name <- takeIdentifier + let newCond = if Map.member name env + then CurrentlyTrue + else NeverTrue + setCondStack $ newCond : condStack + "ifndef" -> do + dropSpaces + name <- takeIdentifier + let newCond = if Map.notMember name env + then CurrentlyTrue + else NeverTrue + setCondStack $ newCond : condStack + "else" -> do + let newCond = if head condStack == NeverTrue + then CurrentlyTrue + else NeverTrue + setCondStack $ newCond : tail condStack + "elsif" -> do + dropSpaces + name <- takeIdentifier + let currCond = head condStack + let newCond = + if currCond /= NeverTrue then + PreviouslyTrue + else if Map.member name env then + CurrentlyTrue + else + NeverTrue + setCondStack $ newCond : tail condStack + "endif" -> do + setCondStack $ tail condStack + + "define" -> do + dropSpaces + name <- do + str <- takeIdentifier + if elem str directives + then lexicalError $ "illegal macro name: " ++ str + else return str + defn <- do + str <- getInput + if null str + then return ("", []) + else takeMacroDefinition + setEnv $ Map.insert name defn env + "undef" -> do + dropSpaces + name <- takeIdentifier + setEnv $ Map.delete name env + "undefineall" -> do + setEnv Map.empty + + _ -> do + case Map.lookup directive env of + Nothing -> lexicalError $ "Undefined macro: " ++ directive + Just (body, formalArgs) -> do + replacement <- if null formalArgs + then return body + else do + actualArgs <- takeMacroArguments + defaultedArgs <- defaultMacroArgs (map snd formalArgs) actualArgs + return $ substituteArgs body (map fst formalArgs) defaultedArgs + -- save our current state + currFile <- getFilePath + bufFollow <- getBuffer + -- lex the macro expansion, preserving the file and line + let Position _ l c = snd bufFollow + let loc = "macro expansion of " ++ directive ++ " at " ++ currFile + let pos = Position loc l (c - length directive - 1) + setBuffer (replacement, pos) + preprocessInput + -- return to the rest of the input + setBuffer bufFollow + +-- inserts the given string into the output at the given position +insertChars :: Position -> String -> PPS () +insertChars pos str = do + bufFollow <- getBuffer + setBuffer (str, pos) + preprocessInput + setBuffer bufFollow + +-- pre-pre-processes the current line, such that macros can be used in +-- directives +lineLookahead :: PPS () +lineLookahead = do + line <- takeUntilNewline + -- save the state + outputOrig <- gets ppOutput + condStackOrig <- getCondStack + inputOrig <- getInput + -- process the line + setOutput [] + setCondStack [] + setInput line + preprocessInput + outputAfter <- getOutput + -- add in the new characters + let newChars = reverse $ map fst outputAfter + setInput $ newChars ++ inputOrig + -- restore the previous state + setOutput outputOrig + setCondStack condStackOrig + +-- update the position in the preprocessor state according to the movement of +-- the given character +advancePosition :: Char -> PPS () +advancePosition '\n' = do + Position f l _ <- getPosition + setPosition $ Position f (l + 1) 1 +advancePosition _ = do + Position f l c <- getPosition + setPosition $ Position f l (c + 1) + +-- advances position for multiple characters +advancePositions :: String -> PPS () +advancePositions str = do + _ <- mapM advancePosition str + return () + +-- adds a character (and its position) to the output state +pushChar :: Char -> Position -> PPS () +pushChar c p = do + output <- getOutput + setOutput $ (c, p) : output + +-- search for a pattern in the input and remove remove characters up to and +-- including the first occurrence of the pattern +removeThrough :: String -> PPS () +removeThrough pattern = do + str <- getInput + case findIndex (isPrefixOf pattern) (tails str) of + Nothing -> + if pattern == "\n" + then setInput "" + else lexicalError $ "Reached EOF while looking for: " + ++ show pattern + Just patternIdx -> do + let chars = patternIdx + length pattern + let (dropped, rest) = splitAt chars str + advancePositions dropped + setInput rest diff --git a/src/Language/SystemVerilog/Parser/Tokens.hs b/src/Language/SystemVerilog/Parser/Tokens.hs index 5d30f1a..20d3f45 100644 --- a/src/Language/SystemVerilog/Parser/Tokens.hs +++ b/src/Language/SystemVerilog/Parser/Tokens.hs @@ -28,7 +28,7 @@ tokenPosition :: Token -> Position tokenPosition (Token _ _ pos) = pos pattern TokenEOF :: Token -pattern TokenEOF = Token MacroBoundary "" (Position "" 0 0) +pattern TokenEOF = Token Unknown "" (Position "" 0 0) data Position = Position String Int Int @@ -391,7 +391,13 @@ data TokenName | Sym_amp_amp_amp | Sym_lt_lt_lt_eq | Sym_gt_gt_gt_eq - | Spe_Directive + | Dir_celldefine + | Dir_endcelldefine + | Dir_unconnected_drive + | Dir_nounconnected_drive + | Dir_default_nettype + | Dir_resetall + | Dir_begin_keywords + | Dir_end_keywords | Unknown - | MacroBoundary deriving (Show, Eq, Ord) diff --git a/sv2v.cabal b/sv2v.cabal index eace236..e957fad 100644 --- a/sv2v.cabal +++ b/sv2v.cabal @@ -53,6 +53,7 @@ executable sv2v Language.SystemVerilog.Parser.Lex Language.SystemVerilog.Parser.Parse Language.SystemVerilog.Parser.ParseDecl + Language.SystemVerilog.Parser.Preprocess Language.SystemVerilog.Parser.Tokens -- Conversion modules Convert diff --git a/test/lex/macro_boundary.sv b/test/lex/macro_boundary.sv index dd7ef36..8b4a7f2 100644 --- a/test/lex/macro_boundary.sv +++ b/test/lex/macro_boundary.sv @@ -1,7 +1,23 @@ `define SIZE 4 `define NESTED_SIZE `SIZE `define NAME op -module t`NAME; - initial $display(`SIZE'ha); - initial $display(`NESTED_SIZE'ha); +`define FOO ha +`define BAR 'ha +`define MULTI 1, 2, 5 +`define DULE dule +mo`DULE t`NAME; + initial $display("%b", `SIZE'ha); + initial $display("%b", `NESTED_SIZE'ha); + initial $display("%b", 10'h`NESTED_SIZE); + initial $display("%b", 10`BAR); + initial $display("%b", 10`SIZE); + initial $display("%b %b %b", `MULTI'ha); + + initial begin : block_name + reg [4:0] foo; + foo <= #1 `SIZE; + $display("%b", foo); + #2; + $display("%b", foo); + end endmodule