{
{- sv2v
 - Author: Zachary Snow <zach@zachjs.com>
 - Original Lexer Author: Tom Hawkins <tomahawkins@gmail.com>
 -
 - Combined source lexing and preprocessing
 -
 - These procedures are combined so that we can simultaneously process macros in
 - a sane way (something analogous to character-by-character) and have our
 - lexemes properly tagged with source file positions.
 -
 - The scariest piece of this module is the use of `unsafePerformIO`. We want to
 - be able to search for and read files whenever we see an include directive.
 - Trying to thread the IO Monad through alex's interface would be very
 - convoluted. The operations performed are not effectful, and are type safe.
 -
 - It may be possible to separate the preprocessor from the lexer by having a
 - preprocessor which produces location annotations. This could improve error
 - messaging and remove the include file and macro boundary hacks.
 -}

{-# OPTIONS_GHC -fno-warn-unused-imports #-}
-- The above pragma gets rid of annoying warning caused by alex 3.2.4. This has
-- been fixed on their development branch, so this can be removed once they roll
-- a new release. (no new release as of 3/29/2018)

module Language.SystemVerilog.Parser.Lex
    ( lexFile
    , Env
    ) where

import System.FilePath (dropFileName)
import System.Directory (findFile)
import System.IO.Unsafe (unsafePerformIO)
import Text.Read (readMaybe)
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Data.List (span, elemIndex, dropWhileEnd)
import Data.Maybe (isJust, fromJust)

import Language.SystemVerilog.Parser.Keywords (specMap)
import Language.SystemVerilog.Parser.Tokens
}

%wrapper "monadUserState"

-- Numbers

@nonZeroDecimalDigit = [1-9]
@decimalDigit = [0-9]
@xDigit       = [xX]
@zDigit       = [zZ\?]
@binaryDigit  = @xDigit | @zDigit | [0-1]
@octalDigit   = @xDigit | @zDigit | [0-7]
@hexDigit     = @xDigit | @zDigit | [0-9a-fA-F]

@decimalBase = "'" [sS]? [dD]
@binaryBase  = "'" [sS]? [bB]
@octalBase   = "'" [sS]? [oO]
@hexBase     = "'" [sS]? [hH]

@nonZeroUnsignedNumber = @nonZeroDecimalDigit ("_" | @decimalDigit)*
@unsignedNumber        = @decimalDigit        ("_" | @decimalDigit)*
@binaryValue           = @binaryDigit         ("_" | @binaryDigit )*
@octalValue            = @octalDigit          ("_" | @octalDigit  )*
@hexValue              = @hexDigit            ("_" | @hexDigit    )*

@exp = [eE]
@sign = [\-\+]
@fixedPointNumber = @unsignedNumber "." @unsignedNumber
@realNumber
    = @fixedPointNumber
    | @unsignedNumber ("." @unsignedNumber)? @exp @sign? @unsignedNumber

@size = @nonZeroUnsignedNumber " "?

@binaryNumber = @size? @binaryBase " "? @binaryValue
@octalNumber  = @size? @octalBase  " "? @octalValue
@hexNumber    = @size? @hexBase    " "? @hexValue

@unbasedUnsizedLiteral = "'" ( 0 | 1 | x | X | z | Z )

@decimalNumber
    = @unsignedNumber
    | @size? @decimalBase " "? @unsignedNumber
    | @size? @decimalBase " "? @xDigit "_"*
    | @size? @decimalBase " "? @zDigit "_"*
@integralNumber
    = @decimalNumber
    | @octalNumber
    | @binaryNumber
    | @hexNumber
    | @unbasedUnsizedLiteral
@number
    = @integralNumber
    | @realNumber

-- Strings

@string = \" (\\\"|\\\r?\n|[^\"\r\n])* \"

-- Times

@timeUnit = s | ms | us | ns | ps | fs
@time
    = @unsignedNumber @timeUnit
    | @fixedPointNumber @timeUnit

-- Identifiers

@escapedIdentifier = "\" ($printable # $white)+ $white
@simpleIdentifier  = [a-zA-Z_] [a-zA-Z0-9_\$]*
@systemIdentifier = "$" [a-zA-Z0-9_\$]+

-- Comments

@commentBlock = "/*"
@commentLine = "//"

-- Directives

@directive = "`" @simpleIdentifier

-- Whitespace

@newline = \n
@escapedNewline = \\\n
@whitespace = ($white # \n) | @escapedNewline

tokens :-

    "$bits"                { tok KW_dollar_bits                }
    "$dimensions"          { tok KW_dollar_dimensions          }
    "$unpacked_dimensions" { tok KW_dollar_unpacked_dimensions }
    "$left"                { tok KW_dollar_left                }
    "$right"               { tok KW_dollar_right               }
    "$low"                 { tok KW_dollar_low                 }
    "$high"                { tok KW_dollar_high                }
    "$increment"           { tok KW_dollar_increment           }
    "$size"                { tok KW_dollar_size                }

    "accept_on"        { tok KW_accept_on    }
    "alias"            { tok KW_alias        }
    "always"           { tok KW_always       }
    "always_comb"      { tok KW_always_comb  }
    "always_ff"        { tok KW_always_ff    }
    "always_latch"     { tok KW_always_latch }
    "and"              { tok KW_and          }
    "assert"           { tok KW_assert       }
    "assign"           { tok KW_assign       }
    "assume"           { tok KW_assume       }
    "automatic"        { tok KW_automatic    }
    "before"           { tok KW_before       }
    "begin"            { tok KW_begin        }
    "bind"             { tok KW_bind         }
    "bins"             { tok KW_bins         }
    "binsof"           { tok KW_binsof       }
    "bit"              { tok KW_bit          }
    "break"            { tok KW_break        }
    "buf"              { tok KW_buf          }
    "bufif0"           { tok KW_bufif0       }
    "bufif1"           { tok KW_bufif1       }
    "byte"             { tok KW_byte         }
    "case"             { tok KW_case         }
    "casex"            { tok KW_casex        }
    "casez"            { tok KW_casez        }
    "cell"             { tok KW_cell         }
    "chandle"          { tok KW_chandle      }
    "checker"          { tok KW_checker      }
    "class"            { tok KW_class        }
    "clocking"         { tok KW_clocking     }
    "cmos"             { tok KW_cmos         }
    "config"           { tok KW_config       }
    "const"            { tok KW_const        }
    "constraint"       { tok KW_constraint   }
    "context"          { tok KW_context      }
    "continue"         { tok KW_continue     }
    "cover"            { tok KW_cover        }
    "covergroup"       { tok KW_covergroup   }
    "coverpoint"       { tok KW_coverpoint   }
    "cross"            { tok KW_cross        }
    "deassign"         { tok KW_deassign     }
    "default"          { tok KW_default      }
    "defparam"         { tok KW_defparam     }
    "design"           { tok KW_design       }
    "disable"          { tok KW_disable      }
    "dist"             { tok KW_dist         }
    "do"               { tok KW_do           }
    "edge"             { tok KW_edge         }
    "else"             { tok KW_else         }
    "end"              { tok KW_end          }
    "endcase"          { tok KW_endcase      }
    "endchecker"       { tok KW_endchecker   }
    "endclass"         { tok KW_endclass     }
    "endclocking"      { tok KW_endclocking  }
    "endconfig"        { tok KW_endconfig    }
    "endfunction"      { tok KW_endfunction  }
    "endgenerate"      { tok KW_endgenerate  }
    "endgroup"         { tok KW_endgroup     }
    "endinterface"     { tok KW_endinterface }
    "endmodule"        { tok KW_endmodule    }
    "endpackage"       { tok KW_endpackage   }
    "endprimitive"     { tok KW_endprimitive }
    "endprogram"       { tok KW_endprogram   }
    "endproperty"      { tok KW_endproperty  }
    "endspecify"       { tok KW_endspecify   }
    "endsequence"      { tok KW_endsequence  }
    "endtable"         { tok KW_endtable     }
    "endtask"          { tok KW_endtask      }
    "enum"             { tok KW_enum         }
    "event"            { tok KW_event        }
    "eventually"       { tok KW_eventually   }
    "expect"           { tok KW_expect       }
    "export"           { tok KW_export       }
    "extends"          { tok KW_extends      }
    "extern"           { tok KW_extern       }
    "final"            { tok KW_final        }
    "first_match"      { tok KW_first_match  }
    "for"              { tok KW_for          }
    "force"            { tok KW_force        }
    "foreach"          { tok KW_foreach      }
    "forever"          { tok KW_forever      }
    "fork"             { tok KW_fork         }
    "forkjoin"         { tok KW_forkjoin     }
    "function"         { tok KW_function     }
    "generate"         { tok KW_generate     }
    "genvar"           { tok KW_genvar       }
    "global"           { tok KW_global       }
    "highz0"           { tok KW_highz0       }
    "highz1"           { tok KW_highz1       }
    "if"               { tok KW_if           }
    "iff"              { tok KW_iff          }
    "ifnone"           { tok KW_ifnone       }
    "ignore_bins"      { tok KW_ignore_bins  }
    "illegal_bins"     { tok KW_illegal_bins }
    "implements"       { tok KW_implements   }
    "implies"          { tok KW_implies      }
    "import"           { tok KW_import       }
    "incdir"           { tok KW_incdir       }
    "include"          { tok KW_include      }
    "initial"          { tok KW_initial      }
    "inout"            { tok KW_inout        }
    "input"            { tok KW_input        }
    "inside"           { tok KW_inside       }
    "instance"         { tok KW_instance     }
    "int"              { tok KW_int          }
    "integer"          { tok KW_integer      }
    "interconnect"     { tok KW_interconnect }
    "interface"        { tok KW_interface    }
    "intersect"        { tok KW_intersect    }
    "join"             { tok KW_join         }
    "join_any"         { tok KW_join_any     }
    "join_none"        { tok KW_join_none    }
    "large"            { tok KW_large        }
    "let"              { tok KW_let          }
    "liblist"          { tok KW_liblist      }
    "library"          { tok KW_library      }
    "local"            { tok KW_local        }
    "localparam"       { tok KW_localparam   }
    "logic"            { tok KW_logic        }
    "longint"          { tok KW_longint      }
    "macromodule"      { tok KW_macromodule  }
    "matches"          { tok KW_matches      }
    "medium"           { tok KW_medium       }
    "modport"          { tok KW_modport      }
    "module"           { tok KW_module       }
    "nand"             { tok KW_nand         }
    "negedge"          { tok KW_negedge      }
    "nettype"          { tok KW_nettype      }
    "new"              { tok KW_new          }
    "nexttime"         { tok KW_nexttime     }
    "nmos"             { tok KW_nmos         }
    "nor"              { tok KW_nor          }
    "noshowcancelled"  { tok KW_noshowcancelled }
    "not"              { tok KW_not          }
    "notif0"           { tok KW_notif0       }
    "notif1"           { tok KW_notif1       }
    "null"             { tok KW_null         }
    "or"               { tok KW_or           }
    "output"           { tok KW_output       }
    "package"          { tok KW_package      }
    "packed"           { tok KW_packed       }
    "parameter"        { tok KW_parameter    }
    "pmos"             { tok KW_pmos         }
    "posedge"          { tok KW_posedge      }
    "primitive"        { tok KW_primitive    }
    "priority"         { tok KW_priority     }
    "program"          { tok KW_program      }
    "property"         { tok KW_property     }
    "protected"        { tok KW_protected    }
    "pull0"            { tok KW_pull0        }
    "pull1"            { tok KW_pull1        }
    "pulldown"         { tok KW_pulldown     }
    "pullup"           { tok KW_pullup       }
    "pulsestyle_ondetect" { tok KW_pulsestyle_ondetect }
    "pulsestyle_onevent"  { tok KW_pulsestyle_onevent }
    "pure"             { tok KW_pure         }
    "rand"             { tok KW_rand         }
    "randc"            { tok KW_randc        }
    "randcase"         { tok KW_randcase     }
    "randsequence"     { tok KW_randsequence }
    "rcmos"            { tok KW_rcmos        }
    "real"             { tok KW_real         }
    "realtime"         { tok KW_realtime     }
    "ref"              { tok KW_ref          }
    "reg"              { tok KW_reg          }
    "reject_on"        { tok KW_reject_on    }
    "release"          { tok KW_release      }
    "repeat"           { tok KW_repeat       }
    "restrict"         { tok KW_restrict     }
    "return"           { tok KW_return       }
    "rnmos"            { tok KW_rnmos        }
    "rpmos"            { tok KW_rpmos        }
    "rtran"            { tok KW_rtran        }
    "rtranif0"         { tok KW_rtranif0     }
    "rtranif1"         { tok KW_rtranif1     }
    "s_always"         { tok KW_s_always     }
    "s_eventually"     { tok KW_s_eventually }
    "s_nexttime"       { tok KW_s_nexttime   }
    "s_until"          { tok KW_s_until      }
    "s_until_with"     { tok KW_s_until_with }
    "scalared"         { tok KW_scalared     }
    "sequence"         { tok KW_sequence     }
    "shortint"         { tok KW_shortint     }
    "shortreal"        { tok KW_shortreal    }
    "showcancelled"    { tok KW_showcancelled }
    "signed"           { tok KW_signed       }
    "small"            { tok KW_small        }
    "soft"             { tok KW_soft         }
    "solve"            { tok KW_solve        }
    "specify"          { tok KW_specify      }
    "specparam"        { tok KW_specparam    }
    "static"           { tok KW_static       }
    "string"           { tok KW_string       }
    "strong"           { tok KW_strong       }
    "strong0"          { tok KW_strong0      }
    "strong1"          { tok KW_strong1      }
    "struct"           { tok KW_struct       }
    "super"            { tok KW_super        }
    "supply0"          { tok KW_supply0      }
    "supply1"          { tok KW_supply1      }
    "sync_accept_on"   { tok KW_sync_accept_on }
    "sync_reject_on"   { tok KW_sync_reject_on }
    "table"            { tok KW_table        }
    "tagged"           { tok KW_tagged       }
    "task"             { tok KW_task         }
    "this"             { tok KW_this         }
    "throughout"       { tok KW_throughout   }
    "time"             { tok KW_time         }
    "timeprecision"    { tok KW_timeprecision }
    "timeunit"         { tok KW_timeunit     }
    "tran"             { tok KW_tran         }
    "tranif0"          { tok KW_tranif0      }
    "tranif1"          { tok KW_tranif1      }
    "tri"              { tok KW_tri          }
    "tri0"             { tok KW_tri0         }
    "tri1"             { tok KW_tri1         }
    "triand"           { tok KW_triand       }
    "trior"            { tok KW_trior        }
    "trireg"           { tok KW_trireg       }
    "type"             { tok KW_type         }
    "typedef"          { tok KW_typedef      }
    "union"            { tok KW_union        }
    "unique"           { tok KW_unique       }
    "unique0"          { tok KW_unique0      }
    "unsigned"         { tok KW_unsigned     }
    "until"            { tok KW_until        }
    "until_with"       { tok KW_until_with   }
    "untyped"          { tok KW_untyped      }
    "use"              { tok KW_use          }
    "uwire"            { tok KW_uwire        }
    "var"              { tok KW_var          }
    "vectored"         { tok KW_vectored     }
    "virtual"          { tok KW_virtual      }
    "void"             { tok KW_void         }
    "wait"             { tok KW_wait         }
    "wait_order"       { tok KW_wait_order   }
    "wand"             { tok KW_wand         }
    "weak"             { tok KW_weak         }
    "weak0"            { tok KW_weak0        }
    "weak1"            { tok KW_weak1        }
    "while"            { tok KW_while        }
    "wildcard"         { tok KW_wildcard     }
    "wire"             { tok KW_wire         }
    "with"             { tok KW_with         }
    "within"           { tok KW_within       }
    "wor"              { tok KW_wor          }
    "xnor"             { tok KW_xnor         }
    "xor"              { tok KW_xor          }

    @simpleIdentifier  { tok Id_simple  }
    @escapedIdentifier { tok Id_escaped }
    @systemIdentifier  { tok Id_system  }

    @number            { tok Lit_number }
    @string            { tok Lit_string }
    @time              { tok Lit_time }

    "("                { tok Sym_paren_l }
    ")"                { tok Sym_paren_r }
    "["                { tok Sym_brack_l }
    "]"                { tok Sym_brack_r }
    "{"                { tok Sym_brace_l }
    "}"                { tok Sym_brace_r }
    "~"                { tok Sym_tildy }
    "!"                { tok Sym_bang }
    "@"                { tok Sym_at }
    "#"                { tok Sym_pound }
    "%"                { tok Sym_percent }
    "^"                { tok Sym_hat }
    "&"                { tok Sym_amp }
    "|"                { tok Sym_bar }
    "*"                { tok Sym_aster }
    "."                { tok Sym_dot }
    ","                { tok Sym_comma }
    ":"                { tok Sym_colon }
    ";"                { tok Sym_semi }
    "="                { tok Sym_eq }
    "<"                { tok Sym_lt }
    ">"                { tok Sym_gt }
    "+"                { tok Sym_plus }
    "-"                { tok Sym_dash }
    "?"                { tok Sym_question }
    "/"                { tok Sym_slash }
    "$"                { tok Sym_dollar }
    "'"                { tok Sym_s_quote }

    "~&"               { tok Sym_tildy_amp }
    "~|"               { tok Sym_tildy_bar }
    "~^"               { tok Sym_tildy_hat }
    "^~"               { tok Sym_hat_tildy }
    "=="               { tok Sym_eq_eq }
    "!="               { tok Sym_bang_eq }
    "&&"               { tok Sym_amp_amp }
    "||"               { tok Sym_bar_bar }
    "**"               { tok Sym_aster_aster }
    "<="               { tok Sym_lt_eq }
    ">="               { tok Sym_gt_eq }
    ">>"               { tok Sym_gt_gt }
    "<<"               { tok Sym_lt_lt }
    "++"               { tok Sym_plus_plus }
    "--"               { tok Sym_dash_dash }
    "+="               { tok Sym_plus_eq }
    "-="               { tok Sym_dash_eq }
    "*="               { tok Sym_aster_eq }
    "/="               { tok Sym_slash_eq }
    "%="               { tok Sym_percent_eq }
    "&="               { tok Sym_amp_eq }
    "|="               { tok Sym_bar_eq }
    "^="               { tok Sym_hat_eq }
    "+:"               { tok Sym_plus_colon }
    "-:"               { tok Sym_dash_colon }
    "::"               { tok Sym_colon_colon }
    ".*"               { tok Sym_dot_aster }
    "->"               { tok Sym_dash_gt }
    ":="               { tok Sym_colon_eq }
    ":/"               { tok Sym_colon_slash }
    "##"               { tok Sym_pound_pound }
    "[*"               { tok Sym_brack_l_aster }
    "[="               { tok Sym_brack_l_eq }
    "=>"               { tok Sym_eq_gt }
    "@*"               { tok Sym_at_aster }
    "(*"               { tok Sym_paren_l_aster }
    "*)"               { tok Sym_aster_paren_r }
    "*>"               { tok Sym_aster_gt }

    "==="              { tok Sym_eq_eq_eq }
    "!=="              { tok Sym_bang_eq_eq }
    "==?"              { tok Sym_eq_eq_question }
    "!=?"              { tok Sym_bang_eq_question }
    ">>>"              { tok Sym_gt_gt_gt }
    "<<<"              { tok Sym_lt_lt_lt }
    "<<="              { tok Sym_lt_lt_eq }
    ">>="              { tok Sym_gt_gt_eq }
    "<->"              { tok Sym_lt_dash_gt }
    "|->"              { tok Sym_bar_dash_gt }
    "|=>"              { tok Sym_bar_eq_gt }
    "[->"              { tok Sym_brack_l_dash_gt }
    "#-#"              { tok Sym_pound_dash_pound }
    "#=#"              { tok Sym_pound_eq_pound }
    "@@("              { tok Sym_at_at_paren_l }
    "(*)"              { tok Sym_paren_l_aster_paren_r }
    "->>"              { tok Sym_dash_gt_gt }
    "&&&"              { tok Sym_amp_amp_amp }

    "<<<="             { tok Sym_lt_lt_lt_eq }
    ">>>="             { tok Sym_gt_gt_gt_eq }

    @directive         { handleDirective }
    @commentLine       { removeUntil "\n" }
    @commentBlock      { removeUntil "*/" }

    $white             ;

    .                  { tok Unknown }

{

-- our actions don't return any data
type Action = AlexInput -> Int -> Alex ()

-- keeps track of the state of an if-else cascade level
data Cond
    = CurrentlyTrue
    | PreviouslyTrue
    | NeverTrue
    deriving (Eq, Show)

-- map from macro to definition, plus arguments
type Env = Map.Map String (String, [(String, Maybe String)])

-- our custom lexer state
data AlexUserState = LS
    { lsToks         :: [Token] -- tokens read so far, *in reverse order* for efficiency
    , lsCurrFile     :: FilePath -- currently active filename
    , lsEnv          :: Env -- active macro definitions
    , lsCondStack    :: [Cond] -- if-else cascade state
    , lsIncludePaths :: [FilePath] -- folders to search for includes
    , lsSpecStack    :: [Set.Set TokenName] -- stack of non-keyword token names
    } deriving (Eq, Show)

-- this initial user state does not contain the initial filename, environment,
-- or include paths; alex requires that this be defined; we override it before
-- we begin the actual lexing procedure
alexInitUserState :: AlexUserState
alexInitUserState = LS [] "" Map.empty [] [] []

-- public-facing lexer entrypoint
lexFile :: [String] -> Env -> FilePath -> IO (Either String ([Token], Env))
lexFile includePaths env path = do
    str <-
        if path == "-"
            then getContents
            else readFile path >>= return . normalize
    let result = runAlex str $ setEnv >> alexMonadScan >> get
    return $ case result of
        Left msg -> Left msg
        Right finalState ->
            if not $ null $ lsCondStack finalState then
                Left $ path ++ ": unfinished conditional directives: " ++
                    (show $ length $ lsCondStack finalState)
            else if not $ null $ lsSpecStack finalState then
                Left $ path ++ ": unterminated begin_keywords blocks: " ++
                    (show $ length $ lsSpecStack finalState)
            else
                Right (finalToks, lsEnv finalState)
            where
                finalToks = coalesce $ combineBoundaries $
                    reverse $ lsToks finalState
    where
        setEnv = do
            modify $ \s -> s
                { lsEnv = env
                , lsIncludePaths = includePaths
                , lsCurrFile = path
                }

-- combines identifiers and numbers that cross macro boundaries
coalesce :: [Token] -> [Token]
coalesce [] = []
coalesce (Token MacroBoundary _ _ : rest) = coalesce rest
coalesce (Token t1 str1 pn1 : Token MacroBoundary _ _ : Token t2 str2 pn2 : rest) =
    case (t1, t2, immediatelyFollows) of
        (Lit_number, Lit_number, _) ->
            Token t1 (str1 ++ str2) pn1 : (coalesce rest)
        (Id_simple, Id_simple, True) ->
            Token t1 (str1 ++ str2) pn1 : (coalesce rest)
        _ ->
            Token t1 str1 pn1 : (coalesce $ Token t2 str2 pn2 : rest)
    where
        Position _ l1 c1 = pn1
        Position _ l2 c2 = pn2
        apn1 = AlexPn 0 l1 c1
        apn2 = AlexPn (length str1) l2 c2
        immediatelyFollows = apn2 == foldl alexMove apn1 str1
coalesce (x : xs) = x : coalesce xs

combineBoundaries :: [Token] -> [Token]
combineBoundaries [] = []
combineBoundaries (Token MacroBoundary s p : Token MacroBoundary _ _ : rest) =
    combineBoundaries $ Token MacroBoundary s p : rest
combineBoundaries (x : xs) = x : combineBoundaries xs

-- invoked by alexMonadScan
alexEOF :: Alex ()
alexEOF = return ()

-- raises an alexError with the current file position appended
lexicalError :: String -> Alex a
lexicalError msg = do
    (pn, _, _, _) <- alexGetInput
    pos <- toTokPos pn
    alexError $ show pos ++ ": Lexical error: " ++ msg

-- get the current user state
get :: Alex AlexUserState
get = Alex $ \s -> Right (s, alex_ust s)

-- get the current user state and apply a function to it
gets :: (AlexUserState -> a) -> Alex a
gets f = get >>= return . f

-- apply a transformation to the current user state
modify :: (AlexUserState -> AlexUserState) -> Alex ()
modify f = Alex func
    where func s = Right (s { alex_ust = new }, ())
            where new = f (alex_ust s)

-- helpers specifically accessing the current file state
getCurrentFile :: Alex String
getCurrentFile = gets lsCurrFile
setCurrentFile :: String -> Alex ()
setCurrentFile x = modify $ \s -> s { lsCurrFile = x }

-- find the given file for inclusion
includeSearch :: FilePath -> Alex FilePath
includeSearch file = do
    base <- getCurrentFile
    includePaths <- gets lsIncludePaths
    let directories = dropFileName base : includePaths
    let result = unsafePerformIO $ findFile directories file
    case result of
        Just path -> return path
        Nothing -> lexicalError $ "Could not find file " ++ show file ++
                        ", included from " ++ show base

-- read in the given file
loadFile :: FilePath -> Alex String
loadFile = return . normalize . unsafePerformIO . readFile

-- removes carriage returns before newlines
normalize :: String -> String
normalize ('\r' : '\n' : rest) = '\n' : (normalize rest)
normalize (ch : chs) = ch : (normalize chs)
normalize [] = []

isIdentChar :: Char -> Bool
isIdentChar ch =
    ('a' <= ch && ch <= 'z') ||
    ('A' <= ch && ch <= 'Z') ||
    ('0' <= ch && ch <= '9') ||
    (ch == '_') || (ch == '$')

takeString :: Alex String
takeString = do
    (pos, _, _, str) <- alexGetInput
    let (x, rest) = span isIdentChar str
    let lastChar = if null x then ' ' else last x
    alexSetInput (foldl alexMove pos x, lastChar, [], rest)
    return x

toTokPos :: AlexPosn -> Alex Position
toTokPos (AlexPn _ l c) = do
    file <- getCurrentFile
    return $ Position file l c

-- read tokens after the name until the first (un-escaped) newline
takeUntilNewline :: Alex String
takeUntilNewline = do
    (pos, _, _, str) <- alexGetInput
    case str of
        []                 -> return ""
        '\n' :        _    -> do
            return ""
        '/' : '/' : _ -> do
            remainder <- takeThrough '\n'
            case last $ init remainder of
                '\\' -> takeUntilNewline >>= return . (' ' :)
                _ -> return ""
        '\\' : '\n' : rest -> do
            let newPos = alexMove (alexMove pos '\\') '\n'
            alexSetInput (newPos, '\n', [], rest)
            takeUntilNewline >>= return . (' ' :)
        ch   :        rest -> do
            let newPos = alexMove pos ch
            alexSetInput (newPos, ch, [], rest)
            takeUntilNewline >>= return . (ch :)

-- select characters up to and including the given character
takeThrough :: Char -> Alex String
takeThrough goal = do
    (_, _, _, str) <- alexGetInput
    if null str
        then lexicalError $
                "unexpected end of input, looking for " ++ (show goal)
        else do
            ch <- takeChar
            if ch == goal
                then return [ch]
                else do
                    rest <- takeThrough goal
                    return $ ch : rest

-- pop one character from the input stream
takeChar :: Alex Char
takeChar = do
    (pos, _, _, str) <- alexGetInput
    (ch, chs) <-
        if null str
            then lexicalError "unexpected end of input"
            else return (head str, tail str)
    let newPos = alexMove pos ch
    alexSetInput (newPos, ch, [], chs)
    return ch

-- drop spaces in the input until a non-space is reached or EOF
dropSpaces :: Alex ()
dropSpaces = do
    (pos, _, _, str) <- alexGetInput
    if null str then
        return ()
    else do
        let ch : rest = str
        if ch == '\t' || ch == ' ' then do
            alexSetInput (alexMove pos ch, ch, [], tail str)
            dropSpaces
        else
            return ()

isWhitespaceChar :: Char -> Bool
isWhitespaceChar ch = elem ch [' ', '\t', '\n']

-- drop all leading whitespace in the input
dropWhitespace :: Alex ()
dropWhitespace = do
    (pos, _, _, str) <- alexGetInput
    case str of
        ch : chs ->
            if isWhitespaceChar ch
                then do
                    alexSetInput (alexMove pos ch, ch, [], chs)
                    dropWhitespace
                else return()
        [] -> return ()

-- lex the remainder of the current line into tokens and return them, rather
-- than storing them in the lexer state
tokenizeLine :: Alex [Token]
tokenizeLine = do
    -- read in the rest of the current line
    str <- takeUntilNewline
    dropWhitespace
    -- save the current lexer state
    currInput <- alexGetInput
    currFile <- getCurrentFile
    currToks <- gets lsToks
    -- parse the line into tokens (which includes macro processing)
    modify $ \s -> s { lsToks = [] }
    let newInput = (alexStartPos, ' ', [], str)
    alexSetInput newInput
    alexMonadScan
    toks <- gets lsToks
    -- return to the previous state
    alexSetInput currInput
    setCurrentFile currFile
    modify $ \s -> s { lsToks = currToks }
    -- remove macro boundary tokens and put the tokens in order
    let isntMacroBoundary = \(Token t _ _ ) -> t /= MacroBoundary
    let toks' = filter isntMacroBoundary toks
    return $ reverse toks'

-- removes and returns a decimal number
takeNumber :: Alex Int
takeNumber = do
    dropSpaces
    leadCh <- peekChar
    if '0' <= leadCh && leadCh <= '9'
        then step 0
        else lexicalError $ "expected number, but found unexpected char: "
                ++ show leadCh
    where
        step number = do
            ch <- takeChar
            if ch == ' ' || ch == '\n' then
                return number
            else if '0' <= ch && ch <= '9' then do
                let digit = ord ch - ord '0'
                step $ number * 10 + digit
            else
                lexicalError $ "unexpected char while reading number: "
                    ++ show ch

peekChar :: Alex Char
peekChar = do
    (_, _, _, str) <- alexGetInput
    if null str
        then lexicalError "unexpected end of input"
        else return $head str

takeMacroDefinition :: Alex (String, [(String, Maybe String)])
takeMacroDefinition = do
    leadCh <- peekChar
    if leadCh /= '('
        then do
            body <- takeUntilNewline
            return (body, [])
        else do
            args <- takeMacroArguments
            body <- takeUntilNewline
            argsWithDefaults <- mapM splitArg args
            if null args
                then lexicalError "macros cannot have 0 args"
                else return (body, argsWithDefaults)
    where
        splitArg :: String -> Alex (String, Maybe String)
        splitArg [] = lexicalError "macro defn. empty argument"
        splitArg str = do
            let (name, rest) = span isIdentChar str
            if null name || not (all isIdentChar name) then
                lexicalError $ "invalid macro arg name: " ++ show name
            else if null rest then
                return (name, Nothing)
            else do
                let trimmed = dropWhile isWhitespaceChar rest
                let leadCh = head trimmed
                if leadCh /= '='
                then lexicalError $ "bad char after arg name: " ++ (show leadCh)
                else return (name, Just $ tail trimmed)

-- commas and right parens are forbidden outside matched pairs of: (), [], {},
-- "", except to delimit arguments or end the list of arguments; see 22.5.1
takeMacroArguments :: Alex [String]
takeMacroArguments = do
    dropWhitespace
    leadCh <- takeChar
    if leadCh == '('
        then argLoop
        else lexicalError $ "expected begining of macro arguments, but found "
                ++ show leadCh
    where
        argLoop :: Alex [String]
        argLoop = do
            dropWhitespace
            (arg, isEnd) <- loop "" []
            let arg' = dropWhileEnd isWhitespaceChar arg
            if isEnd
                then return [arg']
                else do
                    rest <- argLoop
                    return $ arg' : rest
        loop :: String -> [Char] -> Alex (String, Bool)
        loop curr stack = do
            ch <- takeChar
            case (stack, ch) of
                (      s,'\\') -> do
                    ch2 <- takeChar
                    loop (curr ++ [ch, ch2]) s
                ([     ], ',') -> return (curr, False)
                ([     ], ')') -> return (curr, True)

                ('"' : s, '"') -> loop (curr ++ [ch]) s
                (      s, '"') -> loop (curr ++ [ch]) ('"' : s)
                ('[' : s, ']') -> loop (curr ++ [ch]) s
                (      s, '[') -> loop (curr ++ [ch]) ('[' : s)
                ('(' : s, ')') -> loop (curr ++ [ch]) s
                (      s, '(') -> loop (curr ++ [ch]) ('(' : s)
                ('{' : s, '}') -> loop (curr ++ [ch]) s
                (      s, '{') -> loop (curr ++ [ch]) ('{' : s)

                (      s,'\n') -> loop (curr ++ [' ']) s
                (      s, _  ) -> loop (curr ++ [ch ]) s

findUnescapedQuote :: String -> (String, String)
findUnescapedQuote [] = ([], [])
findUnescapedQuote ('`' : '\\' : '`' : '"' : rest) = ('\\' : '"' : start, end)
    where (start, end) = findUnescapedQuote rest
findUnescapedQuote ('\\' : '"' : rest) = ('\\' : '"' : start, end)
    where (start, end) = findUnescapedQuote rest
findUnescapedQuote ('"' : rest) = ("\"", rest)
findUnescapedQuote ('`' : '"' : rest) = ("\"", rest)
findUnescapedQuote (ch : rest) = (ch : start, end)
    where (start, end) = findUnescapedQuote rest

-- substitute in the arguments for a macro expension
substituteArgs :: String -> [String] -> [String] -> String
substituteArgs "" _ _ = ""
substituteArgs ('`' : '`' : body) names args =
    substituteArgs body names args
substituteArgs ('"' : body) names args =
    '"' : start ++ substituteArgs rest names args
    where (start, rest) = findUnescapedQuote body
substituteArgs ('\\' : '"' : body) names args =
    '\\' : '"' : substituteArgs body names args
substituteArgs ('`' : '"' : body) names args =
    '"' : substituteArgs (init start) names args
    ++ '"' : substituteArgs rest names args
    where (start, rest) = findUnescapedQuote body
substituteArgs body names args =
    case span isIdentChar body of
        ([], _) -> head body : substituteArgs (tail body) names args
        (ident, rest) ->
            case elemIndex ident names of
                Nothing -> ident ++ substituteArgs rest names args
                Just idx -> (args !! idx) ++ substituteArgs rest names args

defaultMacroArgs :: [Maybe String] -> [String] -> Alex [String]
defaultMacroArgs [] [] = return []
defaultMacroArgs [] _ = lexicalError "too many macro arguments given"
defaultMacroArgs defaults [] = do
    if all isJust defaults
        then return $ map fromJust defaults
        else lexicalError "too few macro arguments given"
defaultMacroArgs (f : fs) (a : as) = do
    let arg = if a == "" && isJust f
            then fromJust f
            else a
    args <- defaultMacroArgs fs as
    return $ arg : args

-- directives that must always be processed even if the current code block is
-- being excluded; we have to process conditions so we can match them up with
-- their ending tag, even if they're being skipped
unskippableDirectives :: [String]
unskippableDirectives = ["else", "elsif", "endif", "ifdef", "ifndef"]

handleDirective :: Action
handleDirective (posOrig, _, _, strOrig) len = do
    let thisTokenStr = take len strOrig
    let directive = tail $ thisTokenStr
    let newPos = foldl alexMove posOrig thisTokenStr
    alexSetInput (newPos, last thisTokenStr, [], drop len strOrig)

    env <- gets lsEnv
    tempInput <- alexGetInput
    let dropUntilNewline = removeUntil "\n" tempInput 0
    let passThrough = do
            rest <- takeUntilNewline
            let str = '`' : directive ++ rest
            tok Spe_Directive (posOrig, ' ', [], strOrig) (length str)

    condStack <- gets lsCondStack
    if any (/= CurrentlyTrue) condStack
        && not (elem directive unskippableDirectives)
    then alexMonadScan
    else case directive of

        "timescale" -> dropUntilNewline

        "celldefine" -> passThrough
        "endcelldefine" -> passThrough

        "unconnected_drive" -> passThrough
        "nounconnected_drive" -> passThrough

        "default_nettype" -> passThrough
        "pragma" -> do
            leadCh <- peekChar
            if leadCh == '\n' || leadCh == '\r'
                then lexicalError "pragma directive cannot be empty"
                else passThrough
        "resetall" -> passThrough

        "begin_keywords" -> do
            toks <- tokenizeLine
            quotedSpec <- case toks of
                [Token Lit_string str _] -> return str
                _ -> lexicalError $ "unexpected tokens following `begin_keywords: " ++ show toks
            let spec = tail $ init quotedSpec
            case Map.lookup spec specMap of
                Nothing ->
                    lexicalError $ "invalid keyword set name: " ++ show spec
                Just set -> do
                    specStack <- gets lsSpecStack
                    modify $ \s -> s { lsSpecStack = set : specStack }
                    dropWhitespace
                    alexMonadScan
        "end_keywords" -> do
            specStack <- gets lsSpecStack
            if null specStack
                then
                    lexicalError "unexpected end_keywords before begin_keywords"
                else do
                    modify $ \s -> s { lsSpecStack = tail specStack }
                    dropWhitespace
                    alexMonadScan

        "__FILE__" -> do
            tokPos <- toTokPos posOrig
            currFile <- gets lsCurrFile
            let tokStr = show currFile
            modify $ push $ Token Lit_string tokStr tokPos
            alexMonadScan
        "__LINE__" -> do
            tokPos <- toTokPos posOrig
            let Position _ currLine _ = tokPos
            let tokStr = show currLine
            modify $ push $ Token Lit_number tokStr tokPos
            alexMonadScan

        "line" -> do
            toks <- tokenizeLine
            (lineNumber, quotedFilename, levelNumber) <-
                case toks of
                    [ Token Lit_number lineStr _,
                      Token Lit_string filename _,
                      Token Lit_number levelStr _] -> do
                        let Just line = readMaybe lineStr :: Maybe Int
                        let Just level = readMaybe levelStr :: Maybe Int
                        return (line, filename, level)
                    _ -> lexicalError $ "unexpected tokens following `begin_keywords: " ++ show toks
            let filename = init $ tail quotedFilename
            setCurrentFile filename
            (AlexPn f _ c, prev, _, str) <- alexGetInput
            alexSetInput (AlexPn f (lineNumber + 1) c, prev, [], str)
            if 0 <= levelNumber && levelNumber <= 2
                then alexMonadScan
                else lexicalError "line directive invalid level number"

        "include" -> do
            toks <- tokenizeLine
            quotedFilename <- case toks of
                [Token Lit_string str _] -> return str
                _ -> lexicalError $ "unexpected tokens following `include: " ++ show toks
            inputFollow <- alexGetInput
            fileFollow <- getCurrentFile
            -- process the included file
            let filename = init $ tail quotedFilename
            path <- includeSearch filename
            content <- loadFile path
            let inputIncluded = (alexStartPos, ' ', [], content)
            setCurrentFile path
            alexSetInput inputIncluded
            alexMonadScan
            -- resume processing the original file
            setCurrentFile fileFollow
            alexSetInput inputFollow
            alexMonadScan

        "ifdef" -> do
            dropSpaces
            name <- takeString
            let newCond = if Map.member name env
                    then CurrentlyTrue
                    else NeverTrue
            modify $ \s -> s { lsCondStack = newCond : condStack }
            alexMonadScan
        "ifndef" -> do
            dropSpaces
            name <- takeString
            let newCond = if Map.notMember name env
                    then CurrentlyTrue
                    else NeverTrue
            modify $ \s -> s { lsCondStack = newCond : condStack }
            alexMonadScan
        "else" -> do
            let newCond = if head condStack == NeverTrue
                    then CurrentlyTrue
                    else NeverTrue
            modify $ \s -> s { lsCondStack = newCond : tail condStack }
            alexMonadScan
        "elsif" -> do
            dropSpaces
            name <- takeString
            let currCond = head condStack
            let newCond =
                    if currCond /= NeverTrue then
                        PreviouslyTrue
                    else if Map.member name env then
                        CurrentlyTrue
                    else
                        NeverTrue
            modify $ \s -> s { lsCondStack = newCond : tail condStack }
            alexMonadScan
        "endif" -> do
            modify $ \s -> s { lsCondStack = tail condStack }
            alexMonadScan

        "define" -> do
            dropSpaces
            name <- takeString
            defn <- takeMacroDefinition
            modify $ \s -> s { lsEnv = Map.insert name defn env }
            alexMonadScan
        "undef" -> do
            dropSpaces
            name <- takeString
            modify $ \s -> s { lsEnv = Map.delete name env }
            alexMonadScan
        "undefineall" -> do
            modify $ \s -> s { lsEnv = Map.empty }
            alexMonadScan

        _ -> do
            case Map.lookup directive env of
                Nothing -> lexicalError $ "Undefined macro: " ++ directive
                Just (body, formalArgs) -> do
                    (AlexPn _ l c, _, _, _) <- alexGetInput
                    replacement <- if null formalArgs
                        then return body
                        else do
                            actualArgs <- takeMacroArguments
                            defaultedArgs <- defaultMacroArgs (map snd formalArgs) actualArgs
                            return $ substituteArgs body (map fst formalArgs) defaultedArgs
                    -- save our current state
                    currInput <- alexGetInput
                    currToks <- gets lsToks
                    modify $ \s -> s { lsToks = [] }
                    -- lex the macro expansion, preserving the file and line
                    alexSetInput (AlexPn 0 l 0, ' ', [], replacement)
                    alexMonadScan
                    -- re-tag and save tokens from the macro expansion
                    newToks <- gets lsToks
                    currFile <- getCurrentFile
                    let loc = "macro expansion of " ++ directive ++ " at " ++ currFile
                    let pos = Position loc l (c - length directive - 1)
                    let reTag (Token a b _) = Token a b pos
                    let boundary = Token MacroBoundary "" (Position "" 0 0)
                    let boundedToks = boundary : (map reTag newToks) ++ boundary : currToks
                    modify $ \s -> s { lsToks = boundedToks }
                    -- continue lexing after the macro
                    alexSetInput currInput
                    alexMonadScan

-- remove characters from the input until the pattern is reached
removeUntil :: String -> Action
removeUntil pattern _ _ = loop
    where
        patternLen = length pattern
        wantNewline = pattern == "\n"
        loop = do
            (pos, _, _, str) <- alexGetInput
            let found = (null str && wantNewline)
                     || pattern == take patternLen str
            let nextPos = alexMove pos (head str)
            let afterPos = if wantNewline
                    then alexMove pos '\n'
                    else foldl alexMove pos pattern
            let (newPos, newStr) = if found
                    then (afterPos, drop patternLen str)
                    else (nextPos, drop 1 str)
            if not found && null str
                then lexicalError $ "Reached EOF while looking for: " ++
                        show pattern
                else do
                    alexSetInput (newPos, ' ', [], newStr)
                    if found
                        then alexMonadScan
                        else loop

push :: Token -> AlexUserState -> AlexUserState
push t s = s { lsToks = t : (lsToks s) }

tok :: TokenName -> Action
tok tokId (pos, _, _, input) len = do
    let tokStr = take len input
    tokPos <- toTokPos pos
    condStack <- gets lsCondStack
    () <- if any (/= CurrentlyTrue) condStack
        then return ()
        else do
            specStack <- gets lsSpecStack
            if null specStack || Set.notMember tokId (head specStack)
                then modify (push $ Token tokId tokStr tokPos)
                else modify (push $ Token Id_simple ('_' : tokStr) tokPos)
    alexMonadScan
}