mirror of https://github.com/zachjs/sv2v.git
531 lines
21 KiB
Plaintext
531 lines
21 KiB
Plaintext
{
|
|
{- sv2v
|
|
- Author: Zachary Snow <zach@zachjs.com>
|
|
- Original Lexer Author: Tom Hawkins <tomahawkins@gmail.com>
|
|
- vim: filetype=haskell
|
|
-
|
|
- SystemVerilog Lexer
|
|
-
|
|
- All preprocessor directives are handled separately by the preprocessor. The
|
|
- `begin_keywords` and `end_keywords` lexer directives are handled here.
|
|
-}
|
|
|
|
module Language.SystemVerilog.Parser.Lex
|
|
( lexStr
|
|
) where
|
|
|
|
import Control.Monad.Except
|
|
import qualified Data.Map.Strict as Map
|
|
import qualified Data.Set as Set
|
|
import qualified Data.Vector as Vector
|
|
|
|
import Language.SystemVerilog.Parser.Keywords (specMap)
|
|
import Language.SystemVerilog.Parser.Preprocess (Contents)
|
|
import Language.SystemVerilog.Parser.Tokens
|
|
}
|
|
|
|
%wrapper "posn"
|
|
|
|
-- Numbers
|
|
|
|
@nonZeroDecimalDigit = [1-9]
|
|
@decimalDigit = [0-9]
|
|
@xDigit = [xX]
|
|
@zDigit = [zZ\?]
|
|
@binaryDigit = @xDigit | @zDigit | [0-1]
|
|
@octalDigit = @xDigit | @zDigit | [0-7]
|
|
@hexDigit = @xDigit | @zDigit | [0-9a-fA-F]
|
|
|
|
@decimalBase = "'" [sS]? [dD]
|
|
@binaryBase = "'" [sS]? [bB]
|
|
@octalBase = "'" [sS]? [oO]
|
|
@hexBase = "'" [sS]? [hH]
|
|
|
|
@nonZeroUnsignedNumber = @nonZeroDecimalDigit ("_" | @decimalDigit)*
|
|
@unsignedNumber = @decimalDigit ("_" | @decimalDigit)*
|
|
@binaryValue = @binaryDigit ("_" | @binaryDigit )*
|
|
@octalValue = @octalDigit ("_" | @octalDigit )*
|
|
@hexValue = @hexDigit ("_" | @hexDigit )*
|
|
|
|
@exp = [eE]
|
|
@sign = [\-\+]
|
|
@fixedPointNumber = @unsignedNumber "." @unsignedNumber
|
|
@realNumber
|
|
= @fixedPointNumber
|
|
| @unsignedNumber ("." @unsignedNumber)? @exp @sign? @unsignedNumber
|
|
|
|
@size = @nonZeroUnsignedNumber $white*
|
|
|
|
@binaryNumber = @size? @binaryBase $white* @binaryValue
|
|
@octalNumber = @size? @octalBase $white* @octalValue
|
|
@hexNumber = @size? @hexBase $white* @hexValue
|
|
|
|
@unbasedUnsizedLiteral = "'" ( 0 | 1 | x | X | z | Z )
|
|
|
|
@decimalNumber
|
|
= @unsignedNumber
|
|
| @size? @decimalBase $white* @unsignedNumber
|
|
| @size? @decimalBase $white* @xDigit "_"*
|
|
| @size? @decimalBase $white* @zDigit "_"*
|
|
@integralNumber
|
|
= @decimalNumber
|
|
| @octalNumber
|
|
| @binaryNumber
|
|
| @hexNumber
|
|
| @unbasedUnsizedLiteral
|
|
|
|
-- Strings
|
|
|
|
@string = \" (\\\"|\\\r?\n|[^\"\r\n])* \"
|
|
|
|
-- Times
|
|
|
|
@timeUnit = s | ms | us | ns | ps | fs
|
|
@time
|
|
= @unsignedNumber @timeUnit
|
|
| @fixedPointNumber @timeUnit
|
|
|
|
-- Identifiers
|
|
|
|
@escapedIdentifier = "\" ($printable # $white)+ $white
|
|
@simpleIdentifier = [a-zA-Z_] [a-zA-Z0-9_\$]*
|
|
@systemIdentifier = "$" [a-zA-Z0-9_\$]+
|
|
|
|
-- Whitespace
|
|
|
|
@newline = \n
|
|
@escapedNewline = \\\n
|
|
@whitespace = ($white # \n) | @escapedNewline
|
|
|
|
tokens :-
|
|
|
|
"$bits" { tok KW_dollar_bits }
|
|
"$dimensions" { tok KW_dollar_dimensions }
|
|
"$unpacked_dimensions" { tok KW_dollar_unpacked_dimensions }
|
|
"$left" { tok KW_dollar_left }
|
|
"$right" { tok KW_dollar_right }
|
|
"$low" { tok KW_dollar_low }
|
|
"$high" { tok KW_dollar_high }
|
|
"$increment" { tok KW_dollar_increment }
|
|
"$size" { tok KW_dollar_size }
|
|
"$info" { tok KW_dollar_info }
|
|
"$warning" { tok KW_dollar_warning }
|
|
"$error" { tok KW_dollar_error }
|
|
"$fatal" { tok KW_dollar_fatal }
|
|
|
|
"accept_on" { tok KW_accept_on }
|
|
"alias" { tok KW_alias }
|
|
"always" { tok KW_always }
|
|
"always_comb" { tok KW_always_comb }
|
|
"always_ff" { tok KW_always_ff }
|
|
"always_latch" { tok KW_always_latch }
|
|
"and" { tok KW_and }
|
|
"assert" { tok KW_assert }
|
|
"assign" { tok KW_assign }
|
|
"assume" { tok KW_assume }
|
|
"automatic" { tok KW_automatic }
|
|
"before" { tok KW_before }
|
|
"begin" { tok KW_begin }
|
|
"bind" { tok KW_bind }
|
|
"bins" { tok KW_bins }
|
|
"binsof" { tok KW_binsof }
|
|
"bit" { tok KW_bit }
|
|
"break" { tok KW_break }
|
|
"buf" { tok KW_buf }
|
|
"bufif0" { tok KW_bufif0 }
|
|
"bufif1" { tok KW_bufif1 }
|
|
"byte" { tok KW_byte }
|
|
"case" { tok KW_case }
|
|
"casex" { tok KW_casex }
|
|
"casez" { tok KW_casez }
|
|
"cell" { tok KW_cell }
|
|
"chandle" { tok KW_chandle }
|
|
"checker" { tok KW_checker }
|
|
"class" { tok KW_class }
|
|
"clocking" { tok KW_clocking }
|
|
"cmos" { tok KW_cmos }
|
|
"config" { tok KW_config }
|
|
"const" { tok KW_const }
|
|
"constraint" { tok KW_constraint }
|
|
"context" { tok KW_context }
|
|
"continue" { tok KW_continue }
|
|
"cover" { tok KW_cover }
|
|
"covergroup" { tok KW_covergroup }
|
|
"coverpoint" { tok KW_coverpoint }
|
|
"cross" { tok KW_cross }
|
|
"deassign" { tok KW_deassign }
|
|
"default" { tok KW_default }
|
|
"defparam" { tok KW_defparam }
|
|
"design" { tok KW_design }
|
|
"disable" { tok KW_disable }
|
|
"dist" { tok KW_dist }
|
|
"do" { tok KW_do }
|
|
"edge" { tok KW_edge }
|
|
"else" { tok KW_else }
|
|
"end" { tok KW_end }
|
|
"endcase" { tok KW_endcase }
|
|
"endchecker" { tok KW_endchecker }
|
|
"endclass" { tok KW_endclass }
|
|
"endclocking" { tok KW_endclocking }
|
|
"endconfig" { tok KW_endconfig }
|
|
"endfunction" { tok KW_endfunction }
|
|
"endgenerate" { tok KW_endgenerate }
|
|
"endgroup" { tok KW_endgroup }
|
|
"endinterface" { tok KW_endinterface }
|
|
"endmodule" { tok KW_endmodule }
|
|
"endpackage" { tok KW_endpackage }
|
|
"endprimitive" { tok KW_endprimitive }
|
|
"endprogram" { tok KW_endprogram }
|
|
"endproperty" { tok KW_endproperty }
|
|
"endspecify" { tok KW_endspecify }
|
|
"endsequence" { tok KW_endsequence }
|
|
"endtable" { tok KW_endtable }
|
|
"endtask" { tok KW_endtask }
|
|
"enum" { tok KW_enum }
|
|
"event" { tok KW_event }
|
|
"eventually" { tok KW_eventually }
|
|
"expect" { tok KW_expect }
|
|
"export" { tok KW_export }
|
|
"extends" { tok KW_extends }
|
|
"extern" { tok KW_extern }
|
|
"final" { tok KW_final }
|
|
"first_match" { tok KW_first_match }
|
|
"for" { tok KW_for }
|
|
"force" { tok KW_force }
|
|
"foreach" { tok KW_foreach }
|
|
"forever" { tok KW_forever }
|
|
"fork" { tok KW_fork }
|
|
"forkjoin" { tok KW_forkjoin }
|
|
"function" { tok KW_function }
|
|
"generate" { tok KW_generate }
|
|
"genvar" { tok KW_genvar }
|
|
"global" { tok KW_global }
|
|
"highz0" { tok KW_highz0 }
|
|
"highz1" { tok KW_highz1 }
|
|
"if" { tok KW_if }
|
|
"iff" { tok KW_iff }
|
|
"ifnone" { tok KW_ifnone }
|
|
"ignore_bins" { tok KW_ignore_bins }
|
|
"illegal_bins" { tok KW_illegal_bins }
|
|
"implements" { tok KW_implements }
|
|
"implies" { tok KW_implies }
|
|
"import" { tok KW_import }
|
|
"incdir" { tok KW_incdir }
|
|
"include" { tok KW_include }
|
|
"initial" { tok KW_initial }
|
|
"inout" { tok KW_inout }
|
|
"input" { tok KW_input }
|
|
"inside" { tok KW_inside }
|
|
"instance" { tok KW_instance }
|
|
"int" { tok KW_int }
|
|
"integer" { tok KW_integer }
|
|
"interconnect" { tok KW_interconnect }
|
|
"interface" { tok KW_interface }
|
|
"intersect" { tok KW_intersect }
|
|
"join" { tok KW_join }
|
|
"join_any" { tok KW_join_any }
|
|
"join_none" { tok KW_join_none }
|
|
"large" { tok KW_large }
|
|
"let" { tok KW_let }
|
|
"liblist" { tok KW_liblist }
|
|
"library" { tok KW_library }
|
|
"local" { tok KW_local }
|
|
"localparam" { tok KW_localparam }
|
|
"logic" { tok KW_logic }
|
|
"longint" { tok KW_longint }
|
|
"macromodule" { tok KW_macromodule }
|
|
"matches" { tok KW_matches }
|
|
"medium" { tok KW_medium }
|
|
"modport" { tok KW_modport }
|
|
"module" { tok KW_module }
|
|
"nand" { tok KW_nand }
|
|
"negedge" { tok KW_negedge }
|
|
"nettype" { tok KW_nettype }
|
|
"new" { tok KW_new }
|
|
"nexttime" { tok KW_nexttime }
|
|
"nmos" { tok KW_nmos }
|
|
"nor" { tok KW_nor }
|
|
"noshowcancelled" { tok KW_noshowcancelled }
|
|
"not" { tok KW_not }
|
|
"notif0" { tok KW_notif0 }
|
|
"notif1" { tok KW_notif1 }
|
|
"null" { tok KW_null }
|
|
"or" { tok KW_or }
|
|
"output" { tok KW_output }
|
|
"package" { tok KW_package }
|
|
"packed" { tok KW_packed }
|
|
"parameter" { tok KW_parameter }
|
|
"pmos" { tok KW_pmos }
|
|
"posedge" { tok KW_posedge }
|
|
"primitive" { tok KW_primitive }
|
|
"priority" { tok KW_priority }
|
|
"program" { tok KW_program }
|
|
"property" { tok KW_property }
|
|
"protected" { tok KW_protected }
|
|
"pull0" { tok KW_pull0 }
|
|
"pull1" { tok KW_pull1 }
|
|
"pulldown" { tok KW_pulldown }
|
|
"pullup" { tok KW_pullup }
|
|
"pulsestyle_ondetect" { tok KW_pulsestyle_ondetect }
|
|
"pulsestyle_onevent" { tok KW_pulsestyle_onevent }
|
|
"pure" { tok KW_pure }
|
|
"rand" { tok KW_rand }
|
|
"randc" { tok KW_randc }
|
|
"randcase" { tok KW_randcase }
|
|
"randsequence" { tok KW_randsequence }
|
|
"rcmos" { tok KW_rcmos }
|
|
"real" { tok KW_real }
|
|
"realtime" { tok KW_realtime }
|
|
"ref" { tok KW_ref }
|
|
"reg" { tok KW_reg }
|
|
"reject_on" { tok KW_reject_on }
|
|
"release" { tok KW_release }
|
|
"repeat" { tok KW_repeat }
|
|
"restrict" { tok KW_restrict }
|
|
"return" { tok KW_return }
|
|
"rnmos" { tok KW_rnmos }
|
|
"rpmos" { tok KW_rpmos }
|
|
"rtran" { tok KW_rtran }
|
|
"rtranif0" { tok KW_rtranif0 }
|
|
"rtranif1" { tok KW_rtranif1 }
|
|
"s_always" { tok KW_s_always }
|
|
"s_eventually" { tok KW_s_eventually }
|
|
"s_nexttime" { tok KW_s_nexttime }
|
|
"s_until" { tok KW_s_until }
|
|
"s_until_with" { tok KW_s_until_with }
|
|
"scalared" { tok KW_scalared }
|
|
"sequence" { tok KW_sequence }
|
|
"shortint" { tok KW_shortint }
|
|
"shortreal" { tok KW_shortreal }
|
|
"showcancelled" { tok KW_showcancelled }
|
|
"signed" { tok KW_signed }
|
|
"small" { tok KW_small }
|
|
"soft" { tok KW_soft }
|
|
"solve" { tok KW_solve }
|
|
"specify" { tok KW_specify }
|
|
"specparam" { tok KW_specparam }
|
|
"static" { tok KW_static }
|
|
"string" { tok KW_string }
|
|
"strong" { tok KW_strong }
|
|
"strong0" { tok KW_strong0 }
|
|
"strong1" { tok KW_strong1 }
|
|
"struct" { tok KW_struct }
|
|
"super" { tok KW_super }
|
|
"supply0" { tok KW_supply0 }
|
|
"supply1" { tok KW_supply1 }
|
|
"sync_accept_on" { tok KW_sync_accept_on }
|
|
"sync_reject_on" { tok KW_sync_reject_on }
|
|
"table" { tok KW_table }
|
|
"tagged" { tok KW_tagged }
|
|
"task" { tok KW_task }
|
|
"this" { tok KW_this }
|
|
"throughout" { tok KW_throughout }
|
|
"time" { tok KW_time }
|
|
"timeprecision" { tok KW_timeprecision }
|
|
"timeunit" { tok KW_timeunit }
|
|
"tran" { tok KW_tran }
|
|
"tranif0" { tok KW_tranif0 }
|
|
"tranif1" { tok KW_tranif1 }
|
|
"tri" { tok KW_tri }
|
|
"tri0" { tok KW_tri0 }
|
|
"tri1" { tok KW_tri1 }
|
|
"triand" { tok KW_triand }
|
|
"trior" { tok KW_trior }
|
|
"trireg" { tok KW_trireg }
|
|
"type" { tok KW_type }
|
|
"typedef" { tok KW_typedef }
|
|
"union" { tok KW_union }
|
|
"unique" { tok KW_unique }
|
|
"unique0" { tok KW_unique0 }
|
|
"unsigned" { tok KW_unsigned }
|
|
"until" { tok KW_until }
|
|
"until_with" { tok KW_until_with }
|
|
"untyped" { tok KW_untyped }
|
|
"use" { tok KW_use }
|
|
"uwire" { tok KW_uwire }
|
|
"var" { tok KW_var }
|
|
"vectored" { tok KW_vectored }
|
|
"virtual" { tok KW_virtual }
|
|
"void" { tok KW_void }
|
|
"wait" { tok KW_wait }
|
|
"wait_order" { tok KW_wait_order }
|
|
"wand" { tok KW_wand }
|
|
"weak" { tok KW_weak }
|
|
"weak0" { tok KW_weak0 }
|
|
"weak1" { tok KW_weak1 }
|
|
"while" { tok KW_while }
|
|
"wildcard" { tok KW_wildcard }
|
|
"wire" { tok KW_wire }
|
|
"with" { tok KW_with }
|
|
"within" { tok KW_within }
|
|
"wor" { tok KW_wor }
|
|
"xnor" { tok KW_xnor }
|
|
"xor" { tok KW_xor }
|
|
|
|
@simpleIdentifier { tok Id_simple }
|
|
@escapedIdentifier { tok Id_escaped }
|
|
@systemIdentifier { tok Id_system }
|
|
|
|
@realNumber { tok Lit_real }
|
|
@integralNumber { tok Lit_number }
|
|
@string { tok Lit_string }
|
|
@time { tok Lit_time }
|
|
|
|
"(" { tok Sym_paren_l }
|
|
")" { tok Sym_paren_r }
|
|
"[" { tok Sym_brack_l }
|
|
"]" { tok Sym_brack_r }
|
|
"{" { tok Sym_brace_l }
|
|
"}" { tok Sym_brace_r }
|
|
"~" { tok Sym_tildy }
|
|
"!" { tok Sym_bang }
|
|
"@" { tok Sym_at }
|
|
"#" { tok Sym_pound }
|
|
"%" { tok Sym_percent }
|
|
"^" { tok Sym_hat }
|
|
"&" { tok Sym_amp }
|
|
"|" { tok Sym_bar }
|
|
"*" { tok Sym_aster }
|
|
"." { tok Sym_dot }
|
|
"," { tok Sym_comma }
|
|
":" { tok Sym_colon }
|
|
";" { tok Sym_semi }
|
|
"=" { tok Sym_eq }
|
|
"<" { tok Sym_lt }
|
|
">" { tok Sym_gt }
|
|
"+" { tok Sym_plus }
|
|
"-" { tok Sym_dash }
|
|
"?" { tok Sym_question }
|
|
"/" { tok Sym_slash }
|
|
"$" { tok Sym_dollar }
|
|
"'" { tok Sym_s_quote }
|
|
|
|
"~&" { tok Sym_tildy_amp }
|
|
"~|" { tok Sym_tildy_bar }
|
|
"~^" { tok Sym_tildy_hat }
|
|
"^~" { tok Sym_hat_tildy }
|
|
"==" { tok Sym_eq_eq }
|
|
"!=" { tok Sym_bang_eq }
|
|
"&&" { tok Sym_amp_amp }
|
|
"||" { tok Sym_bar_bar }
|
|
"**" { tok Sym_aster_aster }
|
|
"<=" { tok Sym_lt_eq }
|
|
">=" { tok Sym_gt_eq }
|
|
">>" { tok Sym_gt_gt }
|
|
"<<" { tok Sym_lt_lt }
|
|
"++" { tok Sym_plus_plus }
|
|
"--" { tok Sym_dash_dash }
|
|
"+=" { tok Sym_plus_eq }
|
|
"-=" { tok Sym_dash_eq }
|
|
"*=" { tok Sym_aster_eq }
|
|
"/=" { tok Sym_slash_eq }
|
|
"%=" { tok Sym_percent_eq }
|
|
"&=" { tok Sym_amp_eq }
|
|
"|=" { tok Sym_bar_eq }
|
|
"^=" { tok Sym_hat_eq }
|
|
"+:" { tok Sym_plus_colon }
|
|
"-:" { tok Sym_dash_colon }
|
|
"::" { tok Sym_colon_colon }
|
|
".*" { tok Sym_dot_aster }
|
|
"->" { tok Sym_dash_gt }
|
|
":=" { tok Sym_colon_eq }
|
|
":/" { tok Sym_colon_slash }
|
|
"##" { tok Sym_pound_pound }
|
|
"[*" { tok Sym_brack_l_aster }
|
|
"[=" { tok Sym_brack_l_eq }
|
|
"=>" { tok Sym_eq_gt }
|
|
"@*" { tok Sym_at_aster }
|
|
"(*" { tok Sym_paren_l_aster }
|
|
"*)" { tok Sym_aster_paren_r }
|
|
"*>" { tok Sym_aster_gt }
|
|
|
|
"===" { tok Sym_eq_eq_eq }
|
|
"!==" { tok Sym_bang_eq_eq }
|
|
"==?" { tok Sym_eq_eq_question }
|
|
"!=?" { tok Sym_bang_eq_question }
|
|
">>>" { tok Sym_gt_gt_gt }
|
|
"<<<" { tok Sym_lt_lt_lt }
|
|
"<<=" { tok Sym_lt_lt_eq }
|
|
">>=" { tok Sym_gt_gt_eq }
|
|
"<->" { tok Sym_lt_dash_gt }
|
|
"|->" { tok Sym_bar_dash_gt }
|
|
"|=>" { tok Sym_bar_eq_gt }
|
|
"[->" { tok Sym_brack_l_dash_gt }
|
|
"#-#" { tok Sym_pound_dash_pound }
|
|
"#=#" { tok Sym_pound_eq_pound }
|
|
"@@(" { tok Sym_at_at_paren_l }
|
|
"(*)" { tok Sym_paren_l_aster_paren_r }
|
|
"->>" { tok Sym_dash_gt_gt }
|
|
"&&&" { tok Sym_amp_amp_amp }
|
|
|
|
"<<<=" { tok Sym_lt_lt_lt_eq }
|
|
">>>=" { tok Sym_gt_gt_gt_eq }
|
|
|
|
"`celldefine" { tok Dir_celldefine }
|
|
"`endcelldefine" { tok Dir_endcelldefine }
|
|
"`unconnected_drive" { tok Dir_unconnected_drive }
|
|
"`nounconnected_drive" { tok Dir_nounconnected_drive }
|
|
"`default_nettype" { tok Dir_default_nettype }
|
|
"`resetall" { tok Dir_resetall }
|
|
"`begin_keywords" { tok Dir_begin_keywords }
|
|
"`end_keywords" { tok Dir_end_keywords }
|
|
|
|
$white ;
|
|
|
|
. { tok Unknown }
|
|
|
|
{
|
|
-- lexer entrypoint
|
|
lexStr :: Contents -> Except String [Token]
|
|
lexStr contents =
|
|
postProcess [] tokens
|
|
where
|
|
(chars, positions) = unzip contents
|
|
tokensRaw = alexScanTokens chars
|
|
positionsVec = Vector.fromList positions
|
|
tokens = map (\tkf -> tkf positionsVec) tokensRaw
|
|
|
|
-- process begin/end keywords directives
|
|
postProcess :: [Set.Set TokenName] -> [Token] -> Except String [Token]
|
|
postProcess stack [] =
|
|
if null stack
|
|
then return []
|
|
else throwError $ "unterminated begin_keywords blocks: " ++ show stack
|
|
postProcess stack (Token Dir_begin_keywords _ pos : ts) =
|
|
case ts of
|
|
Token Lit_string quotedSpec _ : ts' ->
|
|
case Map.lookup spec specMap of
|
|
Nothing -> throwError $ show pos
|
|
++ ": invalid keyword set name: " ++ show spec
|
|
Just set -> postProcess (set : stack) ts'
|
|
where spec = tail $ init quotedSpec
|
|
_ -> throwError $ show pos ++ ": begin_keywords not followed by string"
|
|
postProcess stack (Token Dir_end_keywords _ pos : ts) =
|
|
case stack of
|
|
(_ : stack') -> postProcess stack' ts
|
|
[] -> throwError $ show pos ++ ": unmatched end_keywords"
|
|
postProcess stack (Token Id_escaped str pos : ts) =
|
|
postProcess stack ts >>= return . (t' :)
|
|
where
|
|
t' = Token Id_escaped str' pos
|
|
str' = (++ " ") $ init str
|
|
postProcess _ (Token Unknown str pos : _) =
|
|
throwError $ show pos ++ ": unknown token '" ++ str ++ "'"
|
|
postProcess [] (t : ts) = do
|
|
ts' <- postProcess [] ts
|
|
return $ t : ts'
|
|
postProcess stack (t : ts) = do
|
|
ts' <- postProcess stack ts
|
|
return $ t' : ts'
|
|
where
|
|
Token tokId str pos = t
|
|
t' = if Set.member tokId (head stack)
|
|
then Token Id_simple ('_' : str) pos
|
|
else t
|
|
|
|
tok :: TokenName -> AlexPosn -> String -> Vector.Vector Position -> Token
|
|
tok tokId (AlexPn charPos _ _) tokStr positions =
|
|
Token tokId tokStr tokPos
|
|
where tokPos = positions Vector.! charPos
|
|
}
|