1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 1996-2018. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20%% Yacc like LALR-1 parser generator for Erlang. 21%% Ref: Aho & Johnson: "LR Parsing", ACM Computing Surveys, vol. 6:2, 1974. 22%% Auxiliary files: yeccgramm.yrl, yeccparser.erl, yeccpre.hrl, yeccscan.erl. 23%% 24 25-module(yecc). 26 27-export([compile/3, file/1, file/2, format_error/1]). 28 29%% Kept for compatibility with R10B. 30-export([yecc/2, yecc/3, yecc/4]). 31 32-import(lists, [append/1, append/2, concat/1, delete/2, filter/2, 33 flatmap/2, foldl/3, foldr/3, foreach/2, keydelete/3, 34 keysort/2, last/1, map/2, member/2, reverse/1, 35 sort/1, usort/1]). 36 37-include("erl_compile.hrl"). 38-include("ms_transform.hrl"). 39 40-record(yecc, { 41 infile, 42 outfile, 43 includefile, 44 includefile_version, 45 module, 46 encoding = none, 47 options = [], 48 verbose = false, 49 file_attrs = true, 50 errors = [], 51 warnings = [], 52 conflicts_done = false, 53 shift_reduce = [], 54 reduce_reduce = [], 55 n_states = 0, 56 inport, 57 outport, 58 line, 59 60 parse_actions, 61 symbol_tab, 62 inv_symbol_tab, 63 state_tab, 64 prec_tab, 65 goto_tab, 66 67 terminals = [], 68 nonterminals = [], 69 all_symbols = [], 70 prec = [], 71 rules_list = [], 72 rules, % a tuple of rules_list 73 rule_pointer2rule, 74 rootsymbol = [], 75 endsymbol = [], 76 expect_shift_reduce = [], 77 expect_n_states = [], 78 header = [], 79 erlang_code = none 80 }). 81 82-record(rule, { 83 n, % rule n in the grammar file 84 anno, 85 symbols, % the names of symbols 86 tokens, 87 is_guard, % the action is a guard (not used) 88 is_well_formed % can be parsed (without macro expansion) 89 }). 90 91-record(reduce, { 92 rule_nmbr, 93 head, 94 nmbr_of_daughters, 95 prec, 96 unused % assure that #reduce{} comes before #shift{} when sorting 97 }). 98 99-record(shift, { 100 state, 101 pos, 102 prec, 103 rule_nmbr 104 }). 105 106-record(user_code, {state, terminal, funname, action}). 107 108-record(symbol, {anno = none, name}). 109 110%% ACCEPT is neither an atom nor a non-terminal. 111-define(ACCEPT, {}). 112 113%% During the phase 'compute_states' terminals in lookahead sets are 114%% coded as integers; sets of terminals are integer bit masks. This is 115%% for efficiency only. '$empty' is always given the mask 1. The 116%% behaviour can be turned off by un-defining SYMBOLS_AS_CODES (useful 117%% when debugging). 118 119%% Non-terminals are also given integer codes, starting with -1. The 120%% absolut value of the code is used for indexing a tuple of lists of 121%% rules. 122 123-define(SYMBOLS_AS_CODES, true). 124 125-ifdef(SYMBOLS_AS_CODES). 126-define(EMPTY, 0). 127-else. 128-define(EMPTY, '$empty'). 129-endif. 130 131%%% 132%%% Exported functions 133%%% 134 135%%% Interface to erl_compile. 136 137compile(Input0, Output0, 138 #options{warning = WarnLevel, verbose=Verbose, includes=Includes, 139 specific=Specific}) -> 140 Input = shorten_filename(Input0), 141 Output = shorten_filename(Output0), 142 Includefile = lists:sublist(Includes, 1), 143 Werror = proplists:get_bool(warnings_as_errors, Specific), 144 Opts = [{parserfile,Output}, {includefile,Includefile}, {verbose,Verbose}, 145 {report_errors, true}, {report_warnings, WarnLevel > 0}, 146 {warnings_as_errors, Werror}], 147 case file(Input, Opts) of 148 {ok, _OutFile} -> 149 ok; 150 error -> 151 error 152 end. 153 154format_error(bad_declaration) -> 155 io_lib:fwrite("unknown or bad declaration, ignored", []); 156format_error({bad_expect, SymName}) -> 157 io_lib:fwrite("argument ~ts of Expect is not an integer", 158 [format_symbol(SymName)]); 159format_error({bad_rootsymbol, SymName}) -> 160 io_lib:fwrite("rootsymbol ~ts is not a nonterminal", 161 [format_symbol(SymName)]); 162format_error({bad_states, SymName}) -> 163 io_lib:fwrite("argument ~ts of States is not an integer", 164 [format_symbol(SymName)]); 165format_error({conflict, Conflict}) -> 166 format_conflict(Conflict); 167format_error({conflicts, SR, RR}) -> 168 io_lib:fwrite("conflicts: ~w shift/reduce, ~w reduce/reduce", [SR, RR]); 169format_error({duplicate_declaration, Tag}) -> 170 io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]); 171format_error({duplicate_nonterminal, Nonterminal}) -> 172 io_lib:fwrite("duplicate non-terminals ~ts", 173 [format_symbol(Nonterminal)]); 174format_error({duplicate_precedence, Op}) -> 175 io_lib:fwrite("duplicate precedence operator ~ts", 176 [format_symbol(Op)]); 177format_error({duplicate_terminal, Terminal}) -> 178 io_lib:fwrite("duplicate terminal ~ts", 179 [format_symbol(Terminal)]); 180format_error({endsymbol_is_nonterminal, Symbol}) -> 181 io_lib:fwrite("endsymbol ~ts is a nonterminal", 182 [format_symbol(Symbol)]); 183format_error({endsymbol_is_terminal, Symbol}) -> 184 io_lib:fwrite("endsymbol ~ts is a terminal", 185 [format_symbol(Symbol)]); 186format_error({error, Module, Error}) -> 187 Module:format_error(Error); 188format_error({file_error, Reason}) -> 189 io_lib:fwrite("~ts",[file:format_error(Reason)]); 190format_error(illegal_empty) -> 191 io_lib:fwrite("illegal use of empty symbol", []); 192format_error({internal_error, Error}) -> 193 io_lib:fwrite("internal yecc error: ~w", [Error]); 194format_error({missing_syntax_rule, Nonterminal}) -> 195 io_lib:fwrite("no syntax rule for non-terminal symbol ~ts", 196 [format_symbol(Nonterminal)]); 197format_error({n_states, Exp, N}) -> 198 io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]); 199format_error(no_grammar_rules) -> 200 io_lib:fwrite("grammar rules are missing", []); 201format_error(nonterminals_missing) -> 202 io_lib:fwrite("Nonterminals is missing", []); 203format_error({precedence_op_is_endsymbol, SymName}) -> 204 io_lib:fwrite("precedence operator ~ts is endsymbol", 205 [format_symbol(SymName)]); 206format_error({precedence_op_is_unknown, SymName}) -> 207 io_lib:fwrite("unknown precedence operator ~ts", 208 [format_symbol(SymName)]); 209format_error({reserved, N}) -> 210 io_lib:fwrite("the use of ~w should be avoided", [N]); 211format_error({symbol_terminal_and_nonterminal, SymName}) -> 212 io_lib:fwrite("symbol ~ts is both a terminal and nonterminal", 213 [format_symbol(SymName)]); 214format_error(rootsymbol_missing) -> 215 io_lib:fwrite("Rootsymbol is missing", []); 216format_error(terminals_missing) -> 217 io_lib:fwrite("Terminals is missing", []); 218format_error({undefined_nonterminal, Symbol}) -> 219 io_lib:fwrite("undefined nonterminal: ~ts", [format_symbol(Symbol)]); 220format_error({undefined_pseudo_variable, Atom}) -> 221 io_lib:fwrite("undefined pseudo variable ~w", [Atom]); 222format_error({undefined_symbol, SymName}) -> 223 io_lib:fwrite("undefined rhs symbol ~ts", [format_symbol(SymName)]); 224format_error({unused_nonterminal, Nonterminal}) -> 225 io_lib:fwrite("non-terminal symbol ~ts not used", 226 [format_symbol(Nonterminal)]); 227format_error({unused_terminal, Terminal}) -> 228 io_lib:fwrite("terminal symbol ~ts not used", 229 [format_symbol(Terminal)]); 230format_error({bad_symbol, String}) -> 231 io_lib:fwrite("bad symbol ~ts", [String]); 232format_error(cannot_parse) -> 233 io_lib:fwrite("cannot parse; possibly encoding mismatch", []). 234 235file(File) -> 236 file(File, [report_errors, report_warnings]). 237 238file(File, Options) -> 239 case is_filename(File) of 240 no -> erlang:error(badarg, [File, Options]); 241 _ -> ok 242 end, 243 case options(Options) of 244 badarg -> 245 erlang:error(badarg, [File, Options]); 246 OptionValues -> 247 Self = self(), 248 Flag = process_flag(trap_exit, false), 249 Pid = spawn_link(fun() -> infile(Self, File, OptionValues) end), 250 receive 251 {Pid, Rep} -> 252 receive after 1 -> ok end, 253 process_flag(trap_exit, Flag), 254 Rep 255 end 256 end. 257 258%% Kept for backward compatibility. 259yecc(Infile, Outfile) -> 260 yecc(Infile, Outfile, false, []). 261 262yecc(Infile, Outfile, Verbose) -> 263 yecc(Infile, Outfile, Verbose, []). 264 265yecc(Infilex, Outfilex, Verbose, Includefilex) -> 266 _ = statistics(runtime), 267 case file(Infilex, [{parserfile, Outfilex}, 268 {verbose, Verbose}, 269 {report, true}, 270 {includefile, Includefilex}]) of 271 {ok, _File} -> 272 statistics(runtime); 273 error -> 274 exit(error) 275 end. 276 277%%% 278%%% Local functions 279%%% 280 281options(Options0) when is_list(Options0) -> 282 try 283 Options = flatmap(fun(return) -> short_option(return, true); 284 (report) -> short_option(report, true); 285 ({return,T}) -> short_option(return, T); 286 ({report,T}) -> short_option(report, T); 287 (T) -> [T] 288 end, Options0), 289 options(Options, [file_attributes, includefile, parserfile, 290 report_errors, report_warnings, warnings_as_errors, 291 return_errors, return_warnings, time, verbose], []) 292 catch error: _ -> badarg 293 end; 294options(Option) -> 295 options([Option]). 296 297short_option(return, T) -> 298 [{return_errors,T}, {return_warnings,T}]; 299short_option(report, T) -> 300 [{report_errors,T}, {report_warnings,T}]. 301 302options(Options0, [Key | Keys], L) when is_list(Options0) -> 303 Options = case member(Key, Options0) of 304 true -> 305 [atom_option(Key) | delete(Key, Options0)]; 306 false -> 307 Options0 308 end, 309 V = case lists:keyfind(Key, 1, Options) of 310 {Key, Filename0} when Key =:= includefile; 311 Key =:= parserfile -> 312 case is_filename(Filename0) of 313 no -> 314 badarg; 315 Filename -> 316 {ok, [{Key, Filename}]} 317 end; 318 {Key, Bool} = KB when is_boolean(Bool) -> 319 {ok, [KB]}; 320 {Key, _} -> 321 badarg; 322 false -> 323 {ok, [{Key, default_option(Key)}]} 324 end, 325 case V of 326 badarg -> 327 badarg; 328 {ok, KeyValueL} -> 329 NewOptions = keydelete(Key, 1, Options), 330 options(NewOptions, Keys, KeyValueL ++ L) 331 end; 332options([], [], L) -> 333 foldl(fun({_,false}, A) -> A; 334 ({Tag,true}, A) -> [Tag | A]; 335 (F, A) -> [F | A] 336 end, [], L); 337options(_Options, _, _L) -> 338 badarg. 339 340default_option(file_attributes) -> true; 341default_option(includefile) -> []; 342default_option(parserfile) -> []; 343default_option(report_errors) -> true; 344default_option(report_warnings) -> true; 345default_option(warnings_as_errors) -> false; 346default_option(return_errors) -> false; 347default_option(return_warnings) -> false; 348default_option(time) -> false; 349default_option(verbose) -> false. 350 351atom_option(file_attributes) -> {file_attributes, true}; 352atom_option(report_errors) -> {report_errors, true}; 353atom_option(report_warnings) -> {report_warnings, true}; 354atom_option(warnings_as_errors) -> {warnings_as_errors,true}; 355atom_option(return_errors) -> {return_errors, true}; 356atom_option(return_warnings) -> {return_warnings, true}; 357atom_option(time) -> {time, true}; 358atom_option(verbose) -> {verbose, true}; 359atom_option(Key) -> Key. 360 361is_filename(T) -> 362 try filename:flatten(T) 363 catch error: _ -> no 364 end. 365 366shorten_filename(Name0) -> 367 {ok,Cwd} = file:get_cwd(), 368 case string:prefix(Name0, Cwd) of 369 nomatch -> Name0; 370 Rest -> 371 case unicode:characters_to_list(Rest) of 372 "/"++N -> N; 373 N -> N 374 end 375 end. 376 377start(Infilex, Options) -> 378 Infile = assure_extension(Infilex, ".yrl"), 379 {_, Outfilex0} = lists:keyfind(parserfile, 1, Options), 380 {_, Includefilex} = lists:keyfind(includefile, 1, Options), 381 Outfilex = case Outfilex0 of 382 [] -> filename:rootname(Infilex, ".yrl"); 383 _ -> Outfilex0 384 end, 385 Includefile = case Includefilex of 386 [] -> []; 387 _ -> assure_extension(Includefilex,".hrl") 388 end, 389 IncludefileVersion = includefile_version(Includefile), 390 Outfile = assure_extension(Outfilex, ".erl"), 391 Module = list_to_atom(filename:basename(Outfile, ".erl")), 392 #yecc{infile = Infile, 393 outfile = Outfile, 394 includefile = Includefile, 395 includefile_version = IncludefileVersion, 396 module = Module, 397 options = Options, 398 verbose = member(verbose, Options), 399 file_attrs = member(file_attributes, Options)}. 400 401assure_extension(File, Ext) -> 402 concat([strip_extension(File, Ext), Ext]). 403 404%% Assumes File is a filename. 405strip_extension(File, Ext) -> 406 case filename:extension(File) of 407 Ext -> filename:rootname(File); 408 _Other -> File 409 end. 410 411infile(Parent, Infilex, Options) -> 412 St0 = start(Infilex, Options), 413 St = case file:open(St0#yecc.infile, [read, read_ahead]) of 414 {ok, Inport} -> 415 try 416 Encoding = epp:set_encoding(Inport), 417 St1 = St0#yecc{inport = Inport, encoding = Encoding}, 418 outfile(St1) 419 after 420 ok = file:close(Inport) 421 end; 422 {error, Reason} -> 423 add_error(St0#yecc.infile, none, {file_error, Reason}, St0) 424 end, 425 case {St#yecc.errors, werror(St)} of 426 {[], false} -> ok; 427 _ -> _ = file:delete(St#yecc.outfile), ok 428 end, 429 Parent ! {self(), yecc_ret(St)}. 430 431werror(St) -> 432 St#yecc.warnings =/= [] 433 andalso member(warnings_as_errors, St#yecc.options). 434 435outfile(St0) -> 436 case file:open(St0#yecc.outfile, [write, delayed_write]) of 437 {ok, Outport} -> 438 try 439 %% Set the same encoding as infile: 440 set_encoding(St0, Outport), 441 generate(St0#yecc{outport = Outport, line = 1}) 442 catch 443 throw: St1 -> 444 St1; 445 exit: Reason -> 446 add_error({internal_error, Reason}, St0) 447 after 448 ok = file:close(Outport) 449 end; 450 {error, Reason} -> 451 add_error(St0#yecc.outfile, none, {file_error, Reason}, St0) 452 end. 453 454os_process_size() -> 455 case os:type() of 456 {unix, sunos} -> 457 Size = os:cmd("ps -o vsz -p " ++ os:getpid() ++ " | tail -1"), 458 list_to_integer(nonl(Size)); 459 _ -> 460 0 461 end. 462 463nonl([$\n]) -> []; 464nonl([]) -> []; 465nonl([H|T]) -> [H|nonl(T)]. 466 467timeit(Name, Fun, St0) -> 468 Time = runtime, 469 %% Time = wall_clock, 470 {Before, _} = statistics(Time), 471 St = Fun(St0), 472 {After, _} = statistics(Time), 473 Mem0 = erts_debug:flat_size(St)*erlang:system_info(wordsize), 474 Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])), 475 Sz = lists:flatten(io_lib:format("~.1f MB", [os_process_size()/1024])), 476 io:fwrite(" ~-30w: ~10.2f s ~12s ~10s\n", 477 [Name, (After-Before)/1000, Mem, Sz]), 478 St. 479 480-define(PASS(P), {P, fun P/1}). 481 482generate(St0) -> 483 St1 = output_encoding_comment(St0), 484 Passes = [?PASS(parse_grammar), ?PASS(check_grammar), 485 ?PASS(states_and_goto_table), ?PASS(parse_actions), 486 ?PASS(action_conflicts), ?PASS(write_file)], 487 F = case member(time, St1#yecc.options) of 488 true -> 489 io:fwrite(<<"Generating parser from grammar in ~ts\n">>, 490 [format_filename(St1#yecc.infile, St1)]), 491 fun timeit/3; 492 false -> 493 fun(_Name, Fn, St) -> Fn(St) end 494 end, 495 Fun = fun({Name, Fun}, St) -> 496 St2 = F(Name, Fun, St), 497 if 498 St2#yecc.errors =:= [] -> St2; 499 true -> throw(St2) 500 end 501 end, 502 foldl(Fun, St1, Passes). 503 504parse_grammar(St) -> 505 parse_grammar(St#yecc.inport, 1, St). 506 507parse_grammar(Inport, Line, St) -> 508 {NextLine, Grammar} = read_grammar(Inport, St, Line), 509 parse_grammar(Grammar, Inport, NextLine, St). 510 511parse_grammar(eof, _Inport, _NextLine, St) -> 512 St; 513parse_grammar({#symbol{name = 'Header'}, Ss}, Inport, NextLine, St0) -> 514 St1 = St0#yecc{header = [S || {string,_,S} <- Ss]}, 515 parse_grammar(Inport, NextLine, St1); 516parse_grammar({#symbol{name = 'Erlang'}, [#symbol{name = code}]}, _Inport, 517 NextLine, St) -> 518 St#yecc{erlang_code = NextLine}; 519parse_grammar(Grammar, Inport, NextLine, St0) -> 520 St = parse_grammar(Grammar, St0), 521 parse_grammar(Inport, NextLine, St). 522 523parse_grammar({error,ErrorLine,Error}, St) -> 524 add_error(erl_anno:new(ErrorLine), Error, St); 525parse_grammar({rule, Rule, Tokens}, St0) -> 526 NmbrOfDaughters = case Rule of 527 [_, #symbol{name = '$empty'}] -> 0; 528 _ -> length(Rule) - 1 529 end, 530 {IsGuard, IsWellFormed} = check_action(Tokens), 531 {Tokens1, St} = subst_pseudo_vars(Tokens, 532 NmbrOfDaughters, 533 St0), 534 RuleDef = #rule{symbols = Rule, 535 tokens = Tokens1, 536 is_guard = IsGuard, 537 is_well_formed = IsWellFormed}, 538 St#yecc{rules_list = [RuleDef | St#yecc.rules_list]}; 539parse_grammar({prec, Prec}, St) -> 540 St#yecc{prec = Prec ++ St#yecc.prec}; 541parse_grammar({#symbol{}, [{string,Anno,String}]}, St) -> 542 add_error(Anno, {bad_symbol, String}, St); 543parse_grammar({#symbol{anno = Anno, name = Name}, Symbols}, St) -> 544 CF = fun(I) -> 545 case element(I, St) of 546 [] -> 547 setelement(I, St, Symbols); 548 _ -> 549 add_error(Anno, {duplicate_declaration, Name}, St) 550 end 551 end, 552 OneSymbol = length(Symbols) =:= 1, 553 case Name of 554 'Nonterminals' -> CF(#yecc.nonterminals); 555 'Terminals' -> CF(#yecc.terminals); 556 'Rootsymbol' when OneSymbol -> CF(#yecc.rootsymbol); 557 'Endsymbol' when OneSymbol -> CF(#yecc.endsymbol); 558 'Expect' when OneSymbol -> CF(#yecc.expect_shift_reduce); 559 'States' when OneSymbol -> CF(#yecc.expect_n_states); % undocumented 560 _ -> add_warning(Anno, bad_declaration, St) 561 end. 562 563read_grammar(Inport, St, Line) -> 564 case yeccscan:scan(Inport, '', Line) of 565 {eof, NextLine} -> 566 {NextLine, eof}; 567 {error, {ErrorLine, Mod, What}, NextLine} -> 568 {NextLine, {error, ErrorLine, {error, Mod, What}}}; 569 {error, terminated} -> 570 throw(St); 571 {error, _} -> 572 File = St#yecc.infile, 573 throw(add_error(File, none, cannot_parse, St)); 574 {ok, Input, NextLine} -> 575 {NextLine, case yeccparser:parse(Input) of 576 {error, {ErrorLine, Mod, Message}} -> 577 {error, ErrorLine, {error, Mod, Message}}; 578 {ok, {rule, Rule, {erlang_code, Tokens}}} -> 579 {rule, Rule, Tokens}; 580 {ok, {#symbol{name=P}, 581 [#symbol{name=I} | OpL]}=Ss} -> 582 A = precedence(P), 583 if 584 A =/= unknown, 585 is_integer(I), 586 OpL =/= [] -> 587 Ps = [{Op, I , A} || Op <- OpL], 588 {prec, Ps}; 589 true -> 590 Ss 591 end; 592 {ok, Ss} -> 593 Ss 594 end} 595 end. 596 597precedence('Left') -> left; 598precedence('Right') -> right; 599precedence('Unary') -> unary; 600precedence('Nonassoc') -> nonassoc; 601precedence(_) -> unknown. 602 603%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 604 605check_grammar(St0) -> 606 Empty = #symbol{anno = none, name = '$empty'}, 607 AllSymbols = St0#yecc.nonterminals ++ St0#yecc.terminals ++ [Empty], 608 St1 = St0#yecc{all_symbols = AllSymbols}, 609 Cs = [fun check_nonterminals/1, fun check_terminals/1, 610 fun check_rootsymbol/1, fun check_endsymbol/1, 611 fun check_expect/1, fun check_states/1, 612 fun check_precedences/1, fun check_rules/1], 613 foldl(fun(F, St) -> F(St) end, St1, Cs). 614 615check_nonterminals(St) -> 616 case St#yecc.nonterminals of 617 [] -> 618 add_error(nonterminals_missing, St); 619 Nonterminals -> 620 {Unique, Dups} = duplicates(names(Nonterminals)), 621 St1 = add_warnings(Dups, duplicate_nonterminal, St), 622 St2 = check_reserved(Unique, St1), 623 St2#yecc{nonterminals = [?ACCEPT | Unique]} 624 end. 625 626check_terminals(St0) -> 627 case St0#yecc.terminals of 628 [] -> 629 add_error(terminals_missing, St0); 630 Terminals -> 631 {Unique, Dups} = duplicates(names(Terminals)), 632 St1 = add_warnings(Dups, duplicate_terminal, St0), 633 Common = intersect(St1#yecc.nonterminals, Unique), 634 St2 = add_errors(Common, symbol_terminal_and_nonterminal, St1), 635 St3 = check_reserved(Unique, St2), 636 St3#yecc{terminals = ['$empty' | Unique]} 637 end. 638 639check_reserved(Names, St) -> 640 add_errors(intersect(Names, ['$empty', '$end', '$undefined']), 641 reserved, St). 642 643check_rootsymbol(St) -> 644 case St#yecc.rootsymbol of 645 [] -> 646 add_error(rootsymbol_missing, St); 647 [#symbol{anno = Anno, name = SymName}] -> 648 case kind_of_symbol(St, SymName) of 649 nonterminal -> 650 St#yecc{rootsymbol = SymName}; 651 _ -> 652 add_error(Anno, {bad_rootsymbol, SymName}, St) 653 end 654 end. 655 656check_endsymbol(St) -> 657 case St#yecc.endsymbol of 658 [] -> 659 St#yecc{endsymbol = '$end'}; 660 [#symbol{anno = Anno, name = SymName}] -> 661 case kind_of_symbol(St, SymName) of 662 nonterminal -> 663 add_error(Anno, {endsymbol_is_nonterminal, SymName}, St); 664 terminal -> 665 add_error(Anno, {endsymbol_is_terminal, SymName}, St); 666 _ -> 667 St#yecc{endsymbol = SymName} 668 end 669 end. 670 671check_expect(St0) -> 672 case St0#yecc.expect_shift_reduce of 673 [] -> 674 St0#yecc{expect_shift_reduce = 0}; 675 [#symbol{name = Expect}] when is_integer(Expect) -> 676 St0#yecc{expect_shift_reduce = Expect}; 677 [#symbol{anno = Anno, name = Name}] -> 678 St1 = add_error(Anno, {bad_expect, Name}, St0), 679 St1#yecc{expect_shift_reduce = 0} 680 end. 681 682check_states(St) -> 683 case St#yecc.expect_n_states of 684 [] -> 685 St; 686 [#symbol{name = NStates}] when is_integer(NStates) -> 687 St#yecc{expect_n_states = NStates}; 688 [#symbol{anno = Anno, name = Name}] -> 689 add_error(Anno, {bad_states, Name}, St) 690 end. 691 692check_precedences(St0) -> 693 {St1, _} = 694 foldr(fun({#symbol{anno = Anno, name = Op},_I,_A}, {St,Ps}) -> 695 case member(Op, Ps) of 696 true -> 697 {add_error(Anno, {duplicate_precedence,Op}, St), 698 Ps}; 699 false -> 700 {St, [Op | Ps]} 701 end 702 end, {St0,[]}, St0#yecc.prec), 703 foldl(fun({#symbol{anno = Anno, name = Op},I,A}, St) -> 704 case kind_of_symbol(St, Op) of 705 endsymbol -> 706 add_error(Anno,{precedence_op_is_endsymbol,Op}, St); 707 unknown -> 708 add_error(Anno, {precedence_op_is_unknown, Op}, St); 709 _ -> 710 St#yecc{prec = [{Op,I,A} | St#yecc.prec]} 711 end 712 end, St1#yecc{prec = []}, St1#yecc.prec). 713 714check_rule(Rule0, {St0,Rules}) -> 715 Symbols = Rule0#rule.symbols, 716 #symbol{anno = HeadAnno, name = Head} = hd(Symbols), 717 case member(Head, St0#yecc.nonterminals) of 718 false -> 719 {add_error(HeadAnno, {undefined_nonterminal, Head}, St0), Rules}; 720 true -> 721 St = check_rhs(tl(Symbols), St0), 722 Rule = Rule0#rule{anno = HeadAnno, symbols = names(Symbols)}, 723 {St, [Rule | Rules]} 724 end. 725 726check_rules(St0) -> 727 {St,Rules0} = foldl(fun check_rule/2, {St0,[]}, St0#yecc.rules_list), 728 case St#yecc.rules_list of 729 [] -> 730 add_error(no_grammar_rules, St); 731 _ -> 732 Rule = #rule{anno = none, 733 symbols = [?ACCEPT, St#yecc.rootsymbol], 734 tokens = []}, 735 Rules1 = [Rule | Rules0], 736 Rules = map(fun({R,I}) -> R#rule{n = I} end, count(0, Rules1)), 737 St#yecc{rules_list = Rules, rules = list_to_tuple(Rules)} 738 end. 739 740duplicates(List) -> 741 Unique = usort(List), 742 {Unique, List -- Unique}. 743 744names(Symbols) -> 745 map(fun(Symbol) -> Symbol#symbol.name end, Symbols). 746 747symbol_anno(Name, St) -> 748 #symbol{anno = Anno} = symbol_find(Name, St#yecc.all_symbols), 749 Anno. 750 751symbol_member(Symbol, Symbols) -> 752 symbol_find(Symbol#symbol.name, Symbols) =/= false. 753 754symbol_find(Name, Symbols) -> 755 lists:keyfind(Name, #symbol.name, Symbols). 756 757states_and_goto_table(St0) -> 758 St1 = create_symbol_table(St0), 759 St = compute_states(St1), 760 create_precedence_table(St). 761 762parse_actions(St) -> 763 _ = erase(), % the pd is used when decoding lookahead sets 764 ParseActions = compute_parse_actions(St#yecc.n_states, St, []), 765 _ = erase(), 766 St#yecc{parse_actions = ParseActions, state_tab = []}. 767 768action_conflicts(St0) -> 769 St = find_action_conflicts(St0), 770 St#yecc{conflicts_done = true}. 771 772-record(state_info, {reduce_only, state_repr, comment}). 773 774write_file(St0) -> 775 #yecc{parse_actions = ParseActions, goto_tab = GotoTab} = St0, 776 Sorted = sort_parse_actions(ParseActions), 777 StateReprs = find_identical_shift_states(Sorted), 778 StateInfo = collect_some_state_info(Sorted, StateReprs), 779 StateJumps = find_partial_shift_states(Sorted, StateReprs), 780 UserCodeActions = find_user_code(Sorted, St0), 781 #yecc{infile = Infile, outfile = Outfile, 782 inport = Inport, outport = Outport, 783 nonterminals = Nonterminals} = St0, 784 {St10, N_lines, LastErlangCodeLine} = 785 output_prelude(Outport, Inport, St0), 786 St20 = St10#yecc{line = St10#yecc.line + N_lines}, 787 St25 = nl(St20), 788 St30 = output_file_directive(St25, Outfile, St25#yecc.line), 789 St40 = nl(St30), 790 St50 = output_actions(St40, StateJumps, StateInfo), 791 Go0 = [{Symbol,{From,To}} || {{From,Symbol},To} <- ets:tab2list(GotoTab)], 792 Go = family_with_domain(Go0, Nonterminals), 793 St60 = output_goto(St50, Go, StateInfo), 794 St70 = output_inlined(St60, UserCodeActions, Infile), 795 St = nl(St70), 796 case LastErlangCodeLine of 797 %% Just in case warnings or errors are emitted after the last 798 %% line of the file. 799 {last_erlang_code_line, Last_line} -> 800 output_file_directive(St, Infile, Last_line); 801 no_erlang_code -> 802 St 803 end. 804 805yecc_ret(St0) -> 806 St = check_expected(St0), 807 report_errors(St), 808 report_warnings(St), 809 Es = pack_errors(St#yecc.errors), 810 Ws = pack_warnings(St#yecc.warnings), 811 Werror = werror(St), 812 if 813 Werror -> 814 do_error_return(St, Es, Ws); 815 Es =:= [] -> 816 case member(return_warnings, St#yecc.options) of 817 true -> {ok, St#yecc.outfile, Ws}; 818 false -> {ok, St#yecc.outfile} 819 end; 820 true -> 821 do_error_return(St, Es, Ws) 822 end. 823 824do_error_return(St, Es, Ws) -> 825 case member(return_errors, St#yecc.options) of 826 true -> {error, Es, Ws}; 827 false -> error 828 end. 829 830check_expected(St0) -> 831 #yecc{shift_reduce = SR, reduce_reduce = RR, expect_shift_reduce = ExpSR, 832 n_states = NStates0, expect_n_states = ExpStates, 833 conflicts_done = Done} = St0, 834 N_RR = length(usort(RR)), 835 N_SR = length(usort(SR)), 836 St1 = if 837 not Done -> 838 St0; 839 N_SR =:= ExpSR, N_RR =:= 0 -> 840 St0; 841 true -> 842 add_warning(none, {conflicts, N_SR, N_RR}, St0) 843 end, 844 NStates = NStates0 + 1, 845 if 846 (not Done) or (ExpStates =:= []) or (NStates =:= ExpStates) -> 847 St1; 848 true -> 849 add_warning(none, {n_states, ExpStates, NStates}, St1) 850 end. 851 852pack_errors([{File,_} | _] = Es) -> 853 [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}]; 854pack_errors([]) -> 855 []. 856 857pack_warnings([{File,_} | _] = Ws) -> 858 [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}]; 859pack_warnings([]) -> 860 []. 861 862report_errors(St) -> 863 case member(report_errors, St#yecc.options) of 864 true -> 865 foreach(fun({File,{none,Mod,E}}) -> 866 io:fwrite(<<"~ts: ~ts\n">>, 867 [File,Mod:format_error(E)]); 868 ({File,{Line,Mod,E}}) -> 869 io:fwrite(<<"~ts:~w: ~ts\n">>, 870 [File,Line,Mod:format_error(E)]) 871 end, sort(St#yecc.errors)); 872 false -> 873 ok 874 end. 875 876report_warnings(St) -> 877 Werror = member(warnings_as_errors, St#yecc.options), 878 Prefix = case Werror of 879 true -> ""; 880 false -> "Warning: " 881 end, 882 ReportWerror = Werror andalso member(report_errors, St#yecc.options), 883 case member(report_warnings, St#yecc.options) orelse ReportWerror of 884 true -> 885 foreach(fun({File,{none,Mod,W}}) -> 886 io:fwrite(<<"~ts: ~s~ts\n">>, 887 [File,Prefix, 888 Mod:format_error(W)]); 889 ({File,{Line,Mod,W}}) -> 890 io:fwrite(<<"~ts:~w: ~s~ts\n">>, 891 [File,Line,Prefix, 892 Mod:format_error(W)]) 893 end, sort(St#yecc.warnings)); 894 false -> 895 ok 896 end. 897 898add_error(E, St) -> 899 add_error(none, E, St). 900 901add_error(Anno, E, St) -> 902 add_error(St#yecc.infile, Anno, E, St). 903 904add_error(File, Anno, E, St) -> 905 Loc = location(Anno), 906 St#yecc{errors = [{File,{Loc,?MODULE,E}}|St#yecc.errors]}. 907 908add_errors(SymNames, E0, St0) -> 909 foldl(fun(SymName, St) -> 910 add_error(symbol_anno(SymName, St), {E0, SymName}, St) 911 end, St0, SymNames). 912 913add_warning(Anno, W, St) -> 914 Loc = location(Anno), 915 St#yecc{warnings = [{St#yecc.infile,{Loc,?MODULE,W}}|St#yecc.warnings]}. 916 917add_warnings(SymNames, W0, St0) -> 918 foldl(fun(SymName, St) -> 919 add_warning(symbol_anno(SymName, St), {W0, SymName}, St) 920 end, St0, SymNames). 921 922check_rhs([#symbol{name = '$empty'}], St) -> 923 St; 924check_rhs(Rhs, St0) -> 925 case symbol_find('$empty', Rhs) of 926 #symbol{anno = Anno} -> 927 add_error(Anno, illegal_empty, St0); 928 false -> 929 foldl(fun(Sym, St) -> 930 case symbol_member(Sym, St#yecc.all_symbols) of 931 true -> 932 St; 933 false -> 934 E = {undefined_symbol,Sym#symbol.name}, 935 add_error(Sym#symbol.anno, E, St) 936 end 937 end, St0, Rhs) 938 end. 939 940check_action(Tokens) -> 941 case erl_parse:parse_exprs(add_roberts_dot(Tokens, erl_anno:new(0))) of 942 {error, _Error} -> 943 {false, false}; 944 {ok, [Expr | Exprs]} -> 945 IsGuard = Exprs =:= [] andalso erl_lint:is_guard_test(Expr), 946 {IsGuard, true} 947 end. 948 949add_roberts_dot([], Anno) -> 950 [{'dot', Anno}]; 951add_roberts_dot([{'dot', Anno} | _], _) -> 952 [{'dot', Anno}]; 953add_roberts_dot([Token | Tokens], _) -> 954 [Token | add_roberts_dot(Tokens, element(2, Token))]. 955 956subst_pseudo_vars([], _, St) -> 957 {[], St}; 958subst_pseudo_vars([H0 | T0], NmbrOfDaughters, St0) -> 959 {H, St1} = subst_pseudo_vars(H0, NmbrOfDaughters, St0), 960 {T, St} = subst_pseudo_vars(T0, NmbrOfDaughters, St1), 961 {[H | T], St}; 962subst_pseudo_vars({atom, Anno, Atom}, NmbrOfDaughters, St0) -> 963 case atom_to_list(Atom) of 964 [$$ | Rest] -> 965 try list_to_integer(Rest) of 966 N when N > 0, N =< NmbrOfDaughters -> 967 {{var, Anno, list_to_atom(append("__", Rest))}, St0}; 968 _ -> 969 St = add_error(Anno, 970 {undefined_pseudo_variable, Atom}, 971 St0), 972 {{atom, Anno, '$undefined'}, St} 973 catch 974 error: _ -> {{atom, Anno, Atom}, St0} 975 end; 976 _ -> 977 {{atom, Anno, Atom}, St0} 978 end; 979subst_pseudo_vars(Tuple, NmbrOfDaughters, St0) when is_tuple(Tuple) -> 980 {L, St} = subst_pseudo_vars(tuple_to_list(Tuple), NmbrOfDaughters, St0), 981 {list_to_tuple(L), St}; 982subst_pseudo_vars(Something_else, _, St) -> 983 {Something_else, St}. 984 985kind_of_symbol(St, SymName) -> 986 case member(SymName, St#yecc.nonterminals) of 987 false -> 988 case member(SymName, St#yecc.terminals) of 989 false -> 990 case St#yecc.endsymbol of 991 SymName -> 992 endsymbol; 993 _ -> 994 unknown 995 end; 996 true -> 997 terminal 998 end; 999 true -> 1000 nonterminal 1001 end. 1002 1003%%%%%%%%%%%%%%%%%%%%%%%%%%%% 1004% Computing parse states and goto table from grammar. 1005% Start item: {0, [Endsymbol]} <-> 1006% (['ACCEPT' '.', Rootsymbol], {'$'}) in Aho & Johnson 1007% where '$end' is the default end of input symbol of the 1008% scanner if no 'Endsymbol' has been declared in the syntax file. 1009 1010-record(tabs, { 1011 symbols, % ETS-set, keypos 1: {SymbolName, SymbolCode} 1012 inv_symbols, % ETS-set, keypos 2: {SymbolName, SymbolCode} 1013 state_id, % ETS-bag, keypos 1: {StateId, StateNum} 1014 % StateId is not unique for a state. 1015 rp_rhs, % rule pointer -> the remaining rhs symbols 1016 rp_info, % rule pointer -> expanding rules and lookahead 1017 goto % ETS-bag, keypos 1: first 1018 % {{FromStateNum, Symbol, ToStateNum}}, then 1019 % {{FromStateNum, Symbol}, ToStateNum} 1020 }). 1021 1022-record(item, { % what states are made of 1023 rule_pointer, 1024 look_ahead, 1025 rhs 1026 }). 1027 1028compute_states(St0) -> 1029 SymbolTab = St0#yecc.symbol_tab, 1030 CodedRules = map(fun(#rule{symbols = Syms} = R) -> 1031 R#rule{symbols = code_symbols(Syms, SymbolTab)} 1032 end, St0#yecc.rules_list), 1033 CodedNonterminals = code_symbols(St0#yecc.nonterminals, SymbolTab), 1034 %% Only coded in this phase; StC is thrown away. 1035 StC = St0#yecc{rules_list = CodedRules, 1036 rules = list_to_tuple(CodedRules), 1037 nonterminals = CodedNonterminals}, 1038 {RuleIndex, RulePointer2Rule} = 1039 make_rule_index(StC, St0#yecc.rules_list), 1040 StateTab0 = {}, 1041 StateIdTab = ets:new(yecc_state_id, [set]), 1042 GotoTab = ets:new(yecc_goto, [bag]), 1043 RulePointerRhs = make_rhs_index(StC#yecc.rules_list), 1044 RulePointerInfo = make_rule_pointer_info(StC, RulePointerRhs, RuleIndex), 1045 1046 Tables = #tabs{symbols = SymbolTab, 1047 state_id = StateIdTab, 1048 rp_rhs = RulePointerRhs, 1049 rp_info = RulePointerInfo, 1050 goto = GotoTab}, 1051 1052 _ = erase(), 1053 EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab), 1054 {StateId, State0} = compute_state([{EndsymCode, 1}], Tables), 1055 1056 StateNum0 = first_state(), 1057 FirstState = {StateNum0, State0}, 1058 StateTab1 = insert_state(Tables, StateTab0, FirstState, StateId), 1059 {StateTab, N} = 1060 compute_states1([{StateNum0, get_current_symbols(State0)}], 1061 FirstState, StateTab1, Tables), 1062 true = ets:delete(StateIdTab), 1063 St = St0#yecc{state_tab = StateTab, goto_tab = GotoTab, n_states = N, 1064 rule_pointer2rule = RulePointer2Rule}, 1065 decode_goto(GotoTab, St#yecc.inv_symbol_tab), 1066 check_usage(St). 1067 1068first_state() -> 1069 0. 1070 1071decode_goto(GotoTab, InvSymTab) -> 1072 G = ets:tab2list(GotoTab), 1073 ets:delete_all_objects(GotoTab), 1074 ets:insert(GotoTab, 1075 map(fun({{From, Sym, Next}}) -> 1076 {{From, decode_symbol(Sym, InvSymTab)}, Next} 1077 end, G)). 1078 1079check_usage(St0) -> 1080 SelSyms = ets:fun2ms(fun({{_,Sym},_}) -> Sym end), 1081 UsedSymbols = ets:select(St0#yecc.goto_tab, SelSyms), 1082 Syms = ordsets:from_list([?ACCEPT, '$empty' | UsedSymbols]), 1083 NonTerms = ordsets:from_list(St0#yecc.nonterminals), 1084 UnusedNonTerms = ordsets:to_list(ordsets:subtract(NonTerms, Syms)), 1085 St1 = add_warnings(UnusedNonTerms, unused_nonterminal, St0), 1086 Terms = ordsets:from_list(St0#yecc.terminals), 1087 St2 = add_warnings(ordsets:to_list(ordsets:subtract(Terms, Syms)), 1088 unused_terminal, St1), 1089 DefinedNonTerminals = map(fun(#rule{symbols = [Name | _]}) -> 1090 Name 1091 end, St2#yecc.rules_list), 1092 DefNonTerms = ordsets:from_list(DefinedNonTerminals), 1093 UndefNonTerms = ordsets:subtract(NonTerms, DefNonTerms), 1094 add_errors(ordsets:to_list(ordsets:subtract(UndefNonTerms, 1095 UnusedNonTerms)), 1096 missing_syntax_rule, St2). 1097 1098%% States are sometimes big, should not be copied to ETS tables. 1099%% Here an "extendible" tuple is used. 1100lookup_state(StateTab, N) -> 1101 element(N+1, StateTab). 1102 1103insert_state(#tabs{state_id = StateIdTab}, StateTab0, State, StateId) -> 1104 {N, _Items} = State, 1105 insert_state_id(StateIdTab, N, StateId), 1106 StateTab = if 1107 tuple_size(StateTab0) > N -> 1108 StateTab0; 1109 true -> 1110 list_to_tuple(tuple_to_list(StateTab0) ++ 1111 lists:duplicate(round(1 + N * 1.5), [])) 1112 end, 1113 setelement(N+1, StateTab, State). 1114 1115insert_state_id(StateIdTab, N, StateId) -> 1116 true = ets:insert(StateIdTab, {StateId, N}). 1117 1118compute_states1([], {N, _}=_CurrState, StateTab0, _Tables) -> 1119 {StateTab0, N}; 1120compute_states1([{N, Symbols} | Try], CurrState, StateTab, Tables) -> 1121 {_N, S} = lookup_state(StateTab, N), 1122 Seeds = state_seeds(S, Symbols), 1123 compute_states2(Seeds, N, Try, CurrState, StateTab, Tables). 1124 1125compute_states2([], _N, Try, CurrState, StateTab, Tables) -> 1126 compute_states1(Try, CurrState, StateTab, Tables); 1127compute_states2([{Sym,Seed} | Seeds], N, Try, CurrState, StateTab, Tables) -> 1128 {StateId, NewState} = compute_state(Seed, Tables), 1129 case check_states(NewState, StateId, StateTab, Tables) of 1130 add -> 1131 {M, _} = CurrState, 1132 %% io:fwrite(<<"Adding state ~w\n">>, [M + 1]), 1133 CurrentSymbols = get_current_symbols(NewState), 1134 Next = M + 1, 1135 NextState = {Next, NewState}, 1136 NewStateTab = insert_state(Tables, StateTab, NextState, StateId), 1137 insert_goto(Tables, N, Sym, Next), 1138 compute_states2(Seeds, N, [{Next, CurrentSymbols} | Try], 1139 NextState, NewStateTab, Tables); 1140 {old, M} -> 1141 %% io:fwrite(<<"Identical to old state ~w\n">>, [M]), 1142 insert_goto(Tables, N, Sym, M), 1143 compute_states2(Seeds, N, Try, CurrState, StateTab, Tables); 1144 {merge, M, NewCurrent} -> 1145 %% io:fwrite(<<"Merging with state ~w\n">>, [M]), 1146 Try1 = case lists:keyfind(M, 1, Try) of 1147 false -> 1148 [{M, NewCurrent} | Try]; 1149 {_, OldCurrent} -> 1150 case ordsets:is_subset(NewCurrent, OldCurrent) of 1151 true -> 1152 Try; 1153 false -> 1154 [{M, ordsets:union(NewCurrent, OldCurrent)} 1155 | keydelete(M, 1, Try)] 1156 end 1157 end, 1158 NewStateTab = merge_states(NewState, StateTab, Tables, M,StateId), 1159 insert_goto(Tables, N, Sym, M), 1160 compute_states2(Seeds, N, Try1, CurrState, NewStateTab, Tables) 1161 end. 1162 1163insert_goto(Tables, From, Sym, To) -> 1164 true = ets:insert(Tables#tabs.goto, {{From, Sym, To}}). 1165 1166%% Create an ets table for faster lookups. 1167create_symbol_table(St) -> 1168 #yecc{terminals = Terminals, endsymbol = Endsymbol} = St, 1169 SymbolTab = ets:new(yecc_symbols, [{keypos,1}]), 1170 %% '$empty' is always assigned 0 1171 Ts = ['$empty', Endsymbol | delete('$empty', Terminals)], 1172 TsC = count(0, Ts), 1173 NTsC = map(fun({NT,I}) -> {NT,-I} end, count(1, St#yecc.nonterminals)), 1174 Cs = TsC++NTsC, 1175 true = ets:insert(SymbolTab, Cs), 1176 1177 InvSymTable = ets:new(yecc_inverted_terminals, [{keypos,2}]), 1178 true = ets:insert(InvSymTable, Cs), 1179 1180 St#yecc{symbol_tab = SymbolTab, inv_symbol_tab = InvSymTable}. 1181 1182get_current_symbols(State) -> 1183 usort(get_current_symbols1(State, [])). 1184 1185get_current_symbols1([], Syms) -> 1186 Syms; 1187get_current_symbols1([#item{rhs = Rhs} | Items], Syms) -> 1188 case Rhs of 1189 [] -> 1190 get_current_symbols1(Items, Syms); 1191 [Symbol | _] -> 1192 get_current_symbols1(Items, [Symbol | Syms]) 1193 end. 1194 1195state_seeds(Items, Symbols) -> 1196 L = [{S,{LA,RP + 1}} || #item{rule_pointer = RP, look_ahead = LA, 1197 rhs = [S | _]} <- Items], 1198 state_seeds1(keysort(1, L), Symbols). 1199 1200state_seeds1(_L, []) -> 1201 []; 1202state_seeds1(L, [Symbol | Symbols]) -> 1203 state_seeds(L, Symbol, Symbols, []). 1204 1205state_seeds([{Symbol, Item} | L], Symbol, Symbols, Is) -> 1206 state_seeds(L, Symbol, Symbols, [Item | Is]); 1207state_seeds([{S, _Item} | L], Symbol, Symbols, Is) when S < Symbol -> 1208 state_seeds(L, Symbol, Symbols, Is); 1209state_seeds(L, Symbol, Symbols, Is) -> 1210 [{Symbol, Is} | state_seeds1(L, Symbols)]. 1211 1212compute_state(Seed, Tables) -> 1213 RpInfo = Tables#tabs.rp_info, 1214 foreach(fun({LA, RulePointer}) -> put(RulePointer, LA) end, Seed), 1215 foreach(fun({LA, RP}) -> compute_closure(LA, RP, RpInfo) end, Seed), 1216 Closure = keysort(1, erase()), 1217 state_items(Closure, [], [], Tables#tabs.rp_rhs). 1218 1219%% Collects a uniqe id for the state (all rule pointers). 1220state_items([{RP, LA} | L], Is, Id, RpRhs) -> 1221 I = #item{rule_pointer = RP, look_ahead = LA, rhs = element(RP, RpRhs)}, 1222 state_items(L, [I | Is], [RP | Id], RpRhs); 1223state_items(_, Is, Id, _RpRhs) -> 1224 {Id, Is}. 1225 1226-compile({inline,[compute_closure/3]}). 1227compute_closure(Lookahead, RulePointer, RpInfo) -> 1228 case element(RulePointer, RpInfo) of 1229 []=Void -> % no followers, or terminal 1230 Void; 1231 {no_union, ExpandingRules, NewLookahead} -> 1232 compute_closure1(ExpandingRules, NewLookahead, RpInfo); 1233 {union, ExpandingRules, Lookahead0} -> 1234 NewLookahead = set_union(Lookahead0, Lookahead), 1235 compute_closure1(ExpandingRules, NewLookahead, RpInfo); 1236 ExpandingRules -> 1237 compute_closure1(ExpandingRules, Lookahead, RpInfo) 1238 end. 1239 1240compute_closure1([RulePointer | Tail], NewLookahead, RpInfo) -> 1241 compute_closure1(Tail, NewLookahead, RpInfo), 1242 case get(RulePointer) of 1243 undefined -> % New 1244 put(RulePointer, NewLookahead), 1245 compute_closure(NewLookahead, RulePointer, RpInfo); 1246 Lookahead2 -> 1247 Lookahead = set_union(Lookahead2, NewLookahead), 1248 if 1249 Lookahead =:= Lookahead2 -> % Old 1250 Lookahead2; % void() 1251 true -> % Merge 1252 put(RulePointer, Lookahead), 1253 compute_closure(NewLookahead, RulePointer, RpInfo) 1254 end 1255 end; 1256compute_closure1(Nil, _, _RpInfo) -> 1257 Nil. 1258 1259%% Check if some old state is a superset of our NewState 1260check_states(NewState, StateId, StateTab, #tabs{state_id = StateIdTab}) -> 1261 try ets:lookup_element(StateIdTab, StateId, 2) of 1262 N -> 1263 {_N, OldState} = lookup_state(StateTab, N), 1264 check_state1(NewState, OldState, [], N) 1265 catch error:_ -> add 1266 end. 1267 1268check_state1([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], 1269 [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> 1270 case set_is_subset(Lookahead1, Lookahead2) of 1271 true -> 1272 check_state1(Items1, Items2, Symbols, N); 1273 false -> 1274 case Rhs of 1275 [] -> 1276 check_state2(Items1, Items2, Symbols, N); 1277 [Symbol | _] -> 1278 check_state2(Items1, Items2, [Symbol | Symbols], N) 1279 end 1280 end; 1281check_state1([], [], _Symbols, N) -> 1282 {old, N}. 1283 1284check_state2([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], 1285 [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> 1286 case set_is_subset(Lookahead1, Lookahead2) of 1287 true -> 1288 check_state2(Items1, Items2, Symbols, N); 1289 false -> 1290 case Rhs of 1291 [] -> 1292 check_state2(Items1, Items2, Symbols, N); 1293 [Symbol | _] -> 1294 check_state2(Items1, Items2, [Symbol | Symbols], N) 1295 end 1296 end; 1297check_state2([], [], Symbols, N) -> 1298 {merge, N, usort(Symbols)}. 1299 1300merge_states(NewState, StateTab, Tables, M, StateId) -> 1301 {_M, Old_state} = lookup_state(StateTab, M), 1302 MergedState = merge_states1(NewState, Old_state), 1303 insert_state(Tables, StateTab, {M, MergedState}, StateId). 1304 1305merge_states1([Item1 | Items1], [Item2 | Items2]) -> 1306 LA1 = Item1#item.look_ahead, 1307 LA2 = Item2#item.look_ahead, 1308 if 1309 LA1 =:= LA2 -> 1310 [Item1 | merge_states1(Items1, Items2)]; 1311 true -> 1312 [Item1#item{look_ahead = set_union(LA1, LA2)} 1313 | merge_states1(Items1, Items2)] 1314 end; 1315merge_states1(_, _) -> 1316 []. 1317 1318%% RulePointer -> Rhs. Every position Rhs in has its unique "rule pointer". 1319make_rhs_index(RulesList) -> 1320 Index = flatmap(fun(#rule{symbols = [_Non | Daughters]}) -> 1321 suffixes0(Daughters) 1322 end, RulesList), 1323 list_to_tuple(Index). 1324 1325suffixes0([?EMPTY]) -> 1326 [[], []]; 1327suffixes0(L) -> 1328 suffixes(L). 1329 1330suffixes([]=L) -> 1331 [L]; 1332suffixes([_ | T]=L) -> 1333 [L | suffixes(T)]. 1334 1335%% Setup info about lookahead and expanding rules for each point 1336%% ("rule pointer") in the grammar. 1337make_rule_pointer_info(StC, RpRhs, RuleIndex) -> 1338 SymbolTab = StC#yecc.symbol_tab, 1339 LcTab = make_left_corner_table(StC), 1340 LA_index = map(fun(Syms) -> 1341 rp_info(Syms, SymbolTab, LcTab, RuleIndex) 1342 end, tuple_to_list(RpRhs)), 1343 list_to_tuple(LA_index). 1344 1345rp_info([], _SymbolTab, _LcTab, _RuleIndex) -> 1346 []; 1347rp_info([Category | Followers], SymbolTab, LcTab, RuleIndex) -> 1348 case maps:find(Category, RuleIndex) of 1349 error -> % terminal 1350 []; 1351 {ok, ExpandingRules} when Followers =:= [] -> 1352 ExpandingRules; 1353 {ok, ExpandingRules} -> 1354 case make_lookahead(Followers, SymbolTab, LcTab, set_empty()) of 1355 {empty, LA} -> 1356 {union, ExpandingRules, LA}; 1357 LA -> 1358 {no_union, ExpandingRules, LA} 1359 end 1360 end. 1361 1362%% Lookahead computation is complicated by the possible existence 1363%% of null string rewriting rules, such as A -> '$empty'. 1364make_lookahead([], _, _, LA) -> 1365 {empty, LA}; 1366make_lookahead([Symbol | Symbols], SymbolTab, LcTab, LA) -> 1367 case maps:find(Symbol, LcTab) of 1368 {ok, LeftCorner} -> % nonterminal 1369 case empty_member(LeftCorner) of 1370 true -> 1371 make_lookahead(Symbols, SymbolTab, LcTab, 1372 set_union(empty_delete(LeftCorner), LA)); 1373 false -> 1374 set_union(LeftCorner, LA) 1375 end; 1376 error -> % terminal 1377 set_add(Symbol, LA) 1378 end. 1379 1380%% -> map-of({Nonterminal, [Terminal]}). 1381%% The algorithm FIRST/1 from the Dragon Book. 1382%% Left corner table, all terminals (including '$empty') that can 1383%% begin strings generated by Nonterminal. 1384make_left_corner_table(#yecc{rules_list = RulesList} = St) -> 1385 SymbolTab = left_corner_symbol_table(St), 1386 Rules = map(fun(#rule{symbols = [Lhs | Rhs]}) -> 1387 {Lhs,{Lhs, Rhs}} 1388 end, RulesList), 1389 LeftHandTab = maps:from_list(family(Rules)), 1390 X0 = [{S,H} || {H,{H,Rhs}} <- Rules, 1391 S <- Rhs, 1392 not is_terminal(SymbolTab, S)], 1393 XL = family_with_domain(X0, St#yecc.nonterminals), 1394 X = maps:from_list(XL), 1395 Xref = fun(NT) -> maps:get(NT, X) end, 1396 E = set_empty(), 1397 LC0 = maps:from_list([{H, E} || {H,_} <- XL]), 1398 %% Handle H -> a S, where a is a terminal ('$empty' inclusive). 1399 {Q, LC1} = 1400 foldl(fun({H,{H,[S | _]}}, {Q0, LC}) -> 1401 case ets:lookup(SymbolTab, S) of 1402 [{_,Num}=SymbolAndNum] when Num >= 0 -> 1403 F = set_add_terminal(SymbolAndNum, E), 1404 {[Xref(H) | Q0], upd_first(H, F, LC)}; 1405 _ -> 1406 {Q0, LC} 1407 end 1408 end, {[], LC0}, Rules), 1409 left_corners(Q, LC1, LeftHandTab, SymbolTab, Xref). 1410 1411left_corners(Q0, LC0, LeftHandTab, SymbolTab, Xref) -> 1412 case usort(append(Q0)) of 1413 [] -> 1414 LC0; 1415 Q1 -> 1416 Rs = flatmap(fun(NT) -> maps:get(NT, LeftHandTab) end, Q1), 1417 {LC, Q} = left_corners2(Rs, LC0, [], SymbolTab, Xref), 1418 left_corners(Q, LC, LeftHandTab, SymbolTab, Xref) 1419 end. 1420 1421left_corners2([], LC, Q, _SymbolTab, _Xref) -> 1422 {LC, Q}; 1423left_corners2([{Head,Rhs} | Rs], LC, Q0, SymbolTab, Xref) -> 1424 Ts = left_corner_rhs(Rhs, Head, LC, set_empty(), SymbolTab), 1425 First0 = maps:get(Head, LC), 1426 case set_is_subset(Ts, First0) of 1427 true -> 1428 left_corners2(Rs, LC, Q0, SymbolTab, Xref); 1429 false -> 1430 LC1 = upd_first(Head, Ts, LC), 1431 left_corners2(Rs, LC1, [Xref(Head) | Q0], SymbolTab, Xref) 1432 end. 1433 1434upd_first(NT, Ts, LC) -> 1435 maps:update_with(NT, fun(First) -> set_union(First, Ts) end, LC). 1436 1437left_corner_rhs([S | Ss], Head, LC, Ts, SymbolTab) -> 1438 case ets:lookup(SymbolTab, S) of 1439 [{_,Num}=SymbolAndNum] when Num >= 0 -> 1440 set_add_terminal(SymbolAndNum, Ts); 1441 [_NonTerminalSymbol] -> 1442 First = maps:get(S, LC), 1443 case empty_member(First) of 1444 true -> 1445 NTs = set_union(empty_delete(First), Ts), 1446 left_corner_rhs(Ss, Head, LC, NTs, SymbolTab); 1447 false -> 1448 set_union(First, Ts) 1449 end 1450 end; 1451left_corner_rhs([], _Head, _LC, Ts, _SymbolTab) -> 1452 set_add(?EMPTY, Ts). 1453 1454%% For every non-terminal return a list of "rule pointers" for rules 1455%% expanding the non-terminal. 1456%% Also assigns a unique number to each point in the grammar, "rule pointer". 1457make_rule_index(#yecc{nonterminals = Nonterminals, 1458 rules_list = RulesList}, RulesListNoCodes) -> 1459 {RulesL, _N} = 1460 lists:mapfoldl(fun(#rule{symbols = [Nonterminal | Daughters]}, I) -> 1461 I1 = I + length(Daughters)+1, 1462 {{Nonterminal, I}, I1} 1463 end, 1, RulesList), 1464 IndexedTab = family_with_domain(RulesL, Nonterminals), 1465 1466 Symbol2Rule = [{Foo,R} || #rule{symbols = Symbols}=R <- RulesListNoCodes, 1467 Foo <- Symbols], 1468 Pointer2Rule = [{I, R} || {{_Foo,R},I} <- count(1, Symbol2Rule)], 1469 {maps:from_list(IndexedTab), maps:from_list(Pointer2Rule)}. 1470 1471%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 1472% Computing parse action table from list of states and goto table: 1473 1474compute_parse_actions(N, St, StateActions) -> 1475 case N < first_state() of 1476 true -> 1477 StateActions; 1478 false -> 1479 {N, StateN} = lookup_state(St#yecc.state_tab, N), 1480 %% There can be duplicates in Actions. 1481 Actions = compute_parse_actions1(StateN, N, St), 1482 compute_parse_actions(N - 1, St, [{N, Actions} | StateActions]) 1483 end. 1484 1485compute_parse_actions1([], _, _) -> 1486 []; 1487compute_parse_actions1([#item{rule_pointer = RulePointer, 1488 look_ahead = Lookahead0, 1489 rhs = Rhs} | Items], N, St) -> 1490 case Rhs of 1491 [] -> 1492 Lookahead = decode_terminals(Lookahead0, St#yecc.inv_symbol_tab), 1493 case rule(RulePointer, St) of 1494 {[?ACCEPT | _], _RuleLine, _} -> 1495 [{Lookahead, accept} 1496 | compute_parse_actions1(Items, N, St)]; 1497 %% Head is placed after the daughters when finding the 1498 %% precedence. This is how giving precedence to 1499 %% non-terminals takes effect. 1500 {[Head | Daughters0], _RuleLine, _} -> 1501 Daughters = delete('$empty', Daughters0), 1502 [{Lookahead, 1503 #reduce{rule_nmbr = RulePointer, head = Head, 1504 nmbr_of_daughters = length(Daughters), 1505 prec = get_prec(Daughters ++ [Head], St)}} 1506 | compute_parse_actions1(Items, N, St)] 1507 end; 1508 [Symbol | Daughters] -> 1509 case is_terminal(St#yecc.symbol_tab, Symbol) of 1510 true -> 1511 DecSymbol = decode_symbol(Symbol, St#yecc.inv_symbol_tab), 1512 {[Head | _], _RuleLine, _} = rule(RulePointer, St), 1513 %% A bogus shift-shift conflict can be introduced 1514 %% here if some terminal occurs in different rules 1515 %% which have been given precedence "one level up". 1516 Prec1 = case Daughters of 1517 [] -> get_prec([DecSymbol, Head], St); 1518 _ -> get_prec([DecSymbol], St) 1519 end, 1520 Pos = case Daughters of 1521 [] -> z; 1522 _ -> a 1523 end, 1524 [{[DecSymbol], 1525 #shift{state = goto(N, DecSymbol, St), 1526 pos = Pos, 1527 prec = Prec1, 1528 rule_nmbr = RulePointer}} 1529 | compute_parse_actions1(Items, N, St)]; 1530 false -> 1531 compute_parse_actions1(Items, N, St) 1532 end 1533 end. 1534 1535get_prec(Symbols, St) -> 1536 get_prec1(Symbols, St#yecc.prec_tab, {0, none}). 1537 1538get_prec1([], _, P) -> 1539 P; 1540get_prec1([Symbol | T], PrecTab, P) -> 1541 case ets:lookup(PrecTab, Symbol) of 1542 [] -> 1543 get_prec1(T, PrecTab, P); 1544 [{_, N, Ass}] -> 1545 get_prec1(T, PrecTab, {N, Ass}) 1546 end. 1547 1548create_precedence_table(St) -> 1549 PrecTab = ets:new(yecc_precedences, []), 1550 true = ets:insert(PrecTab, St#yecc.prec), 1551 St#yecc{prec_tab = PrecTab}. 1552 1553-record(cxt, {terminal, state_n, yecc, res}). 1554 1555%% Detects shift-reduce and reduce-reduce conflicts. 1556%% Also removes all but one conflicting action. As a consequence the 1557%% lookahead sets for a state are always disjoint. 1558%% Reduce/reduce conflicts are considered errors. 1559find_action_conflicts(St0) -> 1560 Cxt0 = #cxt{yecc = St0, res = []}, 1561 {#cxt{yecc = St, res = Res}, NewParseActions0} = 1562 foldl(fun({N, Actions0}, {Cxt1, StateActions}) -> 1563 L = [{Terminal, Act} || {Lookahead, Act} <- Actions0, 1564 Terminal <- Lookahead], 1565 {Cxt, Actions} = 1566 foldl(fun({Terminal, As}, {Cxt2,Acts0}) -> 1567 Cxt3 = Cxt2#cxt{terminal = Terminal, 1568 state_n = N}, 1569 {Action, Cxt} = 1570 find_action_conflicts2(As, Cxt3), 1571 {Cxt,[{Action,Terminal} | Acts0]} 1572 end, {Cxt1,[]}, family(L)), 1573 {Cxt,[{N,inverse(family(Actions))} | StateActions]} 1574 end, {Cxt0, []}, St0#yecc.parse_actions), 1575 if 1576 length(Res) > 0, St#yecc.verbose -> 1577 io:fwrite(<<"\n*** Conflicts resolved by operator " 1578 "precedences:\n\n">>), 1579 foreach(fun({Confl, Name}) -> 1580 report_conflict(Confl, St, Name, prec) 1581 end, reverse(Res)), 1582 io:fwrite(<<"*** End of resolved conflicts\n\n">>); 1583 true -> 1584 ok 1585 end, 1586 NewParseActions = reverse(NewParseActions0), 1587 St#yecc{parse_actions = NewParseActions}. 1588 1589find_action_conflicts2([Action], Cxt) -> 1590 {Action, Cxt}; 1591find_action_conflicts2([#shift{state = St, pos = Pos, prec = Prec}, 1592 #shift{state = St}=S | As], 1593 Cxt) when Pos =:= a; Prec =:= {0,none} -> 1594 %% This is a kludge to remove the bogus shift-shift conflict 1595 %% introduced in compute_parse_actions1(). 1596 find_action_conflicts2([S | As], Cxt); 1597find_action_conflicts2([#shift{state = NewState, pos = z}=S1, 1598 #shift{state = NewState}=S2 | _], Cxt) -> 1599 %% This is even worse than last clause. Give up. 1600 Confl = conflict(S1, S2, Cxt), 1601 #cxt{yecc = St0} = Cxt, 1602 St = conflict_error(Confl, St0), 1603 {S1, Cxt#cxt{yecc = St}}; % return any action 1604find_action_conflicts2([#shift{prec = {P1, Ass1}}=S | Rs], Cxt0) -> 1605 {R, Cxt1} = find_reduce_reduce(Rs, Cxt0), 1606 #cxt{res = Res0, yecc = St0} = Cxt1, 1607 #reduce{prec = {P2, Ass2}} = R, 1608 Confl = conflict(R, S, Cxt1), 1609 if 1610 P1 > P2 -> 1611 {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; 1612 P2 > P1 -> 1613 {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; 1614 Ass1 =:= left, Ass2 =:= left -> 1615 {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; 1616 Ass1 =:= right, Ass2 =:= right -> 1617 {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; 1618 Ass1 =:= nonassoc, Ass2 =:= nonassoc -> 1619 {nonassoc, Cxt1}; 1620 P1 =:= 0, P2 =:= 0 -> 1621 report_conflict(Confl, St0, shift, default), 1622 St = add_conflict(Confl, St0), 1623 {S, Cxt1#cxt{yecc = St}}; 1624 true -> 1625 St = conflict_error(Confl, St0), 1626 {S, Cxt1#cxt{yecc = St}} % return any action 1627 end; 1628find_action_conflicts2(Rs, Cxt0) -> 1629 find_reduce_reduce(Rs, Cxt0). 1630 1631find_reduce_reduce([R], Cxt) -> 1632 {R, Cxt}; 1633find_reduce_reduce([accept=A, #reduce{}=R | Rs], Cxt0) -> 1634 Confl = conflict(R, A, Cxt0), 1635 St = conflict_error(Confl, Cxt0#cxt.yecc), 1636 Cxt = Cxt0#cxt{yecc = St}, 1637 find_reduce_reduce([R | Rs], Cxt); 1638find_reduce_reduce([#reduce{head = Categ1, prec = {P1, _}}=R1, 1639 #reduce{head = Categ2, prec = {P2, _}}=R2 | Rs], Cxt0) -> 1640 #cxt{res = Res0, yecc = St0} = Cxt0, 1641 Confl = conflict(R1, R2, Cxt0), 1642 {R, Res, St} = 1643 if 1644 P1 > P2 -> 1645 {R1, [{Confl, Categ1} | Res0], St0}; 1646 P2 > P1 -> 1647 {R2, [{Confl, Categ2} | Res0], St0}; 1648 true -> 1649 St1 = conflict_error(Confl, St0), 1650 {R1, Res0, St1} 1651 end, 1652 Cxt = Cxt0#cxt{res = Res, yecc = St}, 1653 find_reduce_reduce([R | Rs], Cxt). 1654 1655%% Since the lookahead sets are disjoint (assured by 1656%% find_action_conflicts), the order between actions can be chosen 1657%% almost arbitrarily. nonassoc has to come last, though (but is later 1658%% discarded!). And shift has to come before reduce. 1659sort_parse_actions([]) -> 1660 []; 1661sort_parse_actions([{N, La_actions} | Tail]) -> 1662 [{N, sort_parse_actions1(La_actions)} | sort_parse_actions(Tail)]. 1663 1664sort_parse_actions1(LaActions) -> 1665 As = filter(fun({_LA, A}) -> A =:= accept end, LaActions), 1666 Ss = filter(fun({_LA, A}) -> is_record(A, shift) end, LaActions), 1667 Rs = filter(fun({_LA, A}) -> is_record(A, reduce) end, LaActions), 1668 Ns = filter(fun({_LA, A}) -> A =:= nonassoc end, LaActions), 1669 As ++ Ss ++ Rs ++ Ns. 1670 1671%% -> {State, StateRepr}. StateRepr has the same set of shift actions 1672%% as State. No code will be output for State if State =/= StateRepr. 1673find_identical_shift_states(StateActions) -> 1674 L1 = [{Actions, State} || {State,Actions} <- StateActions], 1675 {SO, NotSO} = lists:partition(fun({Actions,_States}) -> 1676 shift_actions_only(Actions) 1677 end, family(L1)), 1678 R = [{State, hd(States)} || {_Actions, States} <- SO, State <- States] 1679 ++ 1680 [{State, State} || {_Actions, States} <- NotSO, State <- States], 1681 lists:keysort(1, R). 1682 1683-record(part_data, {name, eq_state, actions, n_actions, states}). 1684 1685%% Replace {SStates,Actions} with {SStates,{Actions,Jump}} where 1686%% Jump describes which clauses that have been extracted from shift 1687%% states so that they can be used from other states. Some space is 1688%% saved. 1689find_partial_shift_states(StateActionsL, StateReprs) -> 1690 L = [{State, Actions} || {{State,Actions}, {State,State}} <- 1691 lists:zip(StateActionsL, StateReprs), 1692 shift_actions_only(Actions)], 1693 StateActions = sofs:family(L, [{state,[action]}]), 1694 StateAction = sofs:family_to_relation(StateActions), 1695 1696 %% Two actions are equal if they occur in the same states: 1697 Parts = sofs:partition(sofs:range(StateActions)), 1698 PartsL = sofs:to_external(Parts), 1699 %% Assign temporary names to the parts of the partition (of actions): 1700 PartNameL = lists:zip(seq1(length(PartsL)), PartsL), 1701 ActPartL = [{Action,PartName} || 1702 {PartName,Actions} <- PartNameL, 1703 Action <- Actions], 1704 ActionPartName = sofs:relation(ActPartL, [{action,partname}]), 1705 StatePartName = sofs:relative_product(StateAction, ActionPartName), 1706 PartInStates = sofs:relation_to_family(sofs:converse(StatePartName)), 1707 1708 %% Parts that equal all actions of a state: 1709 PartActions = sofs:family(PartNameL, [{partname,[action]}]), 1710 PartState = 1711 sofs:relative_product(PartActions, sofs:converse(StateActions)), 1712 PartStates = sofs_family_with_domain(PartState, sofs:domain(PartActions)), 1713 1714 PartDataL = [#part_data{name = Nm, eq_state = EqS, actions = P, 1715 n_actions = length(P), 1716 states = ordsets:from_list(S)} || 1717 {{Nm,P}, {Nm,S}, {Nm,EqS}} <- 1718 lists:zip3(PartNameL, 1719 sofs:to_external(PartInStates), 1720 sofs:to_external(PartStates))], 1721 true = length(PartDataL) =:= length(PartNameL), 1722 Ps = select_parts(PartDataL), 1723 1724 J1 = [{State, Actions, {jump_some,hd(States)}} || 1725 {_W, #part_data{actions = Actions, eq_state = [], 1726 states = States}} <- Ps, 1727 State <- States], 1728 J2 = [{State, Actions, {jump_all,To}} || 1729 {_W, #part_data{actions = Actions, eq_state = EqS, 1730 states = States}} <- Ps, 1731 To <- EqS, 1732 State <- States, 1733 State =/= To], 1734 J = lists:keysort(1, J1 ++ J2), 1735 1736 JumpStates = ordsets:from_list([S || {S,_,_} <- J]), 1737 {JS, NJS} = 1738 sofs:partition(1, sofs:relation(StateActionsL, [{state, actions}]), 1739 sofs:set(JumpStates, [state])), 1740 R = 1741 [{S, {Actions,jump_none}} || {S,Actions} <- sofs:to_external(NJS)] 1742 ++ 1743 [{S, {Actions--Part, {Tag,ToS,Part}}} || 1744 {{S,Actions}, {S,Part,{Tag,ToS}}} <- 1745 lists:zip(sofs:to_external(JS), J)], 1746 true = length(StateActionsL) =:= length(R), 1747 lists:keysort(1, R). 1748 1749%% Very greedy. By no means optimal. 1750select_parts([]) -> 1751 []; 1752select_parts(PartDataL) -> 1753 T1 = [{score(PD), PD} || PD <- PartDataL], 1754 [{W, PD} | Ws] = lists:reverse(lists:keysort(1, T1)), 1755 #part_data{n_actions = NActions, states = S} = PD, 1756 if 1757 W < 8 -> % don't bother 1758 []; 1759 true -> 1760 %% Cannot extract more clauses from the chosen part's states: 1761 NL = [D#part_data{states = NewS} || 1762 {W1, #part_data{states = S0}=D} <- Ws, 1763 W1 > 0, 1764 (NewS = ordsets:subtract(S0, S)) =/= []], 1765 if 1766 length(S) =:= 1; NActions =:= 1 -> 1767 select_parts(NL); 1768 true -> 1769 [{W,PD} | select_parts(NL)] 1770 end 1771 end. 1772 1773%% Does it pay off to extract clauses into a new function? 1774%% Assumptions: 1775%% - a call costs 8 (C = 8); 1776%% - a clause (per action) costs 20 plus 8 (select) (Cl = 28); 1777%% - a new function costs 20 (funinfo) plus 16 (select) (F = 36). 1778%% A is number of actions, S is number of states. 1779%% Special case (the part equals all actions of some state): 1780%% C * (S - 1) < (S - 1) * A * Cl 1781%% Normal case (introduce new function): 1782%% F + A * Cl + C * S < S * A * Cl 1783score(#part_data{n_actions = NActions, eq_state = [], states = S}) -> 1784 (length(S) * NActions * 28) - (36 + NActions * 28 + length(S) * 8); 1785score(#part_data{n_actions = NActions, states = S}) -> 1786 ((length(S) - 1) * NActions * 28) - (8 * (length(S) - 1)). 1787 1788shift_actions_only(Actions) -> 1789 length([foo || {_Ts,{shift,_,_,_,_}} <- Actions]) =:= length(Actions). 1790 1791collect_some_state_info(StateActions, StateReprs) -> 1792 RF = fun({_LA, A}) -> is_record(A, reduce) end, 1793 L = [{State, 1794 begin 1795 RO = lists:all(RF, LaActions), 1796 %% C is currently always ""; identical states are all shift. 1797 C = [io_lib:fwrite(<<" %% ~w\n">>, [State]) || 1798 true <- [RO], Repr =/= State], 1799 #state_info{reduce_only = RO, state_repr = Repr, comment = C} 1800 end} || 1801 {{State, LaActions}, {State, Repr}} <- 1802 lists:zip(StateActions, StateReprs)], 1803 list_to_tuple(L). 1804 1805conflict_error(Conflict, St0) -> 1806 St1 = add_conflict(Conflict, St0), 1807 add_error({conflict, Conflict}, St1). 1808 1809report_conflict(Conflict, St, ActionName, How) -> 1810 if 1811 St#yecc.verbose -> 1812 io:fwrite(<<"~s\n">>, [format_conflict(Conflict)]), 1813 Formated = format_symbol(ActionName), 1814 case How of 1815 prec -> 1816 io:fwrite(<<"Resolved in favor of ~ts.\n\n">>, [Formated]); 1817 default -> 1818 io:fwrite(<<"Conflict resolved in favor of ~ts.\n\n">>, 1819 [Formated]) 1820 end; 1821 true -> 1822 ok 1823 end. 1824 1825add_conflict(Conflict, St) -> 1826 case Conflict of 1827 {Symbol, StateN, _, {reduce, _, _, _}} -> 1828 St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]}; 1829 {Symbol, StateN, _, {accept, _}} -> 1830 St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]}; 1831 {Symbol, StateN, _, {shift, _, _}} -> 1832 St#yecc{shift_reduce = [{StateN,Symbol} | St#yecc.shift_reduce]}; 1833 {_Symbol, _StateN, {one_level_up, _, _}, _Confl} -> 1834 St 1835 end. 1836 1837conflict(#shift{prec = Prec1, rule_nmbr = RuleNmbr1}, 1838 #shift{prec = Prec2, rule_nmbr = RuleNmbr2}, Cxt) -> 1839 %% Conflict due to precedences "one level up". Kludge. 1840 #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt, 1841 {_, L1, RuleN1} = rule(RuleNmbr1, St), 1842 {_, L2, RuleN2} = rule(RuleNmbr2, St), 1843 Confl = {one_level_up, {L1, RuleN1, Prec1}, {L2, RuleN2, Prec2}}, 1844 {Symbol, N, Confl, Confl}; 1845conflict(#reduce{rule_nmbr = RuleNmbr1}, NewAction, Cxt) -> 1846 #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt, 1847 {R1, RuleLine1, RuleN1} = rule(RuleNmbr1, St), 1848 Confl = case NewAction of 1849 accept -> 1850 {accept, St#yecc.rootsymbol}; 1851 #reduce{rule_nmbr = RuleNmbr2} -> 1852 {R2, RuleLine2, RuleN2} = rule(RuleNmbr2, St), 1853 {reduce, R2, RuleN2, RuleLine2}; 1854 #shift{state = NewState} -> 1855 {shift, NewState, last(R1)} 1856 end, 1857 {Symbol, N, {R1, RuleN1, RuleLine1}, Confl}. 1858 1859format_conflict({Symbol, N, _, {one_level_up, 1860 {L1, RuleN1, {P1, Ass1}}, 1861 {L2, RuleN2, {P2, Ass2}}}}) -> 1862 S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when " 1863 "scanning ~ts in state ~w:\n">>, 1864 [format_symbol(Symbol), N]), 1865 S2 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n" 1866 " vs.\n">>, 1867 [format_assoc(Ass1), P1, RuleN1, L1]), 1868 S3 = io_lib:fwrite(<<" ~s ~w (rule ~w at line ~w)\n">>, 1869 [format_assoc(Ass2), P2, RuleN2, L2]), 1870 [S1, S2, S3]; 1871format_conflict({Symbol, N, Reduce, Confl}) -> 1872 S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol " 1873 "~ts in state ~w:\n">>, [format_symbol(Symbol), N]), 1874 S2 = case Reduce of 1875 {[HR | TR], RuleNmbr, RuleLine} -> 1876 io_lib:fwrite(<<" Reduce to ~ts from ~ts (rule ~w at " 1877 "line ~w)\n vs.\n">>, 1878 [format_symbol(HR), format_symbols(TR), 1879 RuleNmbr, RuleLine]) 1880 end, 1881 S3 = case Confl of 1882 {reduce, [HR2|TR2], RuleNmbr2, RuleLine2} -> 1883 io_lib:fwrite(<<" reduce to ~ts from ~ts " 1884 "(rule ~w at line ~w).">>, 1885 [format_symbol(HR2), format_symbols(TR2), 1886 RuleNmbr2, RuleLine2]); 1887 {shift, NewState, Sym} -> 1888 io_lib:fwrite(<<" shift to state ~w, adding right " 1889 "sisters to ~ts.">>, 1890 [NewState, format_symbol(Sym)]); 1891 {accept, Rootsymbol} -> 1892 io_lib:fwrite(<<" reduce to rootsymbol ~ts.">>, 1893 [format_symbol(Rootsymbol)]) 1894 end, 1895 [S1, S2, S3]. 1896 1897%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 1898% Code generation: 1899 1900%% The version up to and including parsetools-1.3 is called "1.0". 1901%% 1902%% "1.1", parsetools-1.4: 1903%% - the prologue file has been updated; 1904%% - nonassoc is new; 1905%% - different order of clauses; 1906%% - never more than one clause matching a given symbol in a given state; 1907%% - file attributes relate messages to .yrl file; 1908%% - actions put in inlined functions; 1909%% - a few other minor fixes. 1910%% 1911%% "1.2", parsetools-1.4.2: 1912%% - the generated code has been changed as follows: 1913%% - yeccpars2() calls the functions yeccpars2_State(); 1914%% - several states can share yeccpars2_State(), which reduces code size; 1915%% - yeccgoto() has been split on one function per nonterminal; 1916%% - several minor changes have made the loaded code smaller. 1917%% - the include file yeccpre.hrl has been changed incompatibly. 1918%% 1919%% "1.3", parsetools-1.4.4: 1920%% - the generated code has been changed as follows: 1921%% - yeccgoto_T() no longer returns the next state, but calls yeccpars_S(); 1922%% - yeccpars2() is not called when it is known which yeccpars2_S() to call; 1923%% - "__Stack" has been substituted for "Stack"; 1924%% - several states can share yeccpars2_S_cont(), which reduces code size; 1925%% - instead if calling lists:nthtail() matching code is emitted. 1926%% 1927%% "1.4", parsetools-2.0.4: 1928%% - yeccerror() is called when a syntax error is found (as in version 1.1). 1929%% - the include file yeccpre.hrl has been changed. 1930 1931-define(CODE_VERSION, "1.4"). 1932-define(YECC_BUG(M, A), 1933 unicode:characters_to_binary( 1934 [" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",", 1935 io_lib:fwrite(M, A), "}).\n\n"])). 1936 1937%% Returns number of newlines in included files. 1938output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] -> 1939 St5 = output_header(St0), 1940 #yecc{infile = Infile, module = Module} = St5, 1941 St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), 1942 St20 = 1943 fwrite(St10, 1944 <<"-export([parse/1, parse_and_scan/1, format_error/1]).\n">>, 1945 []), 1946 {St25, N_lines_1, LastErlangCodeLine} = 1947 case St20#yecc.erlang_code of 1948 none -> 1949 {St20, 0, no_erlang_code}; 1950 Next_line -> 1951 St_10 = output_file_directive(St20, Infile, Next_line-1), 1952 Last_line = include1([], Inport, Outport, Infile, 1953 Next_line, St_10), 1954 Nmbr_of_lines = Last_line - Next_line, 1955 {St_10, Nmbr_of_lines, {last_erlang_code_line, Last_line}} 1956 end, 1957 St30 = nl(St25), 1958 IncludeFile = 1959 filename:join([code:lib_dir(parsetools), "include","yeccpre.hrl"]), 1960 %% Maybe one could assume there are no warnings in this file. 1961 St = output_file_directive(St30, IncludeFile, 0), 1962 N_lines_2 = include(St, IncludeFile, Outport), 1963 {St, N_lines_1 + N_lines_2, LastErlangCodeLine}; 1964output_prelude(Outport, Inport, St0) -> 1965 St5 = output_header(St0), 1966 #yecc{infile = Infile, module = Module, includefile = Includefile} = St5, 1967 St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), 1968 St20 = output_file_directive(St10, Includefile, 0), 1969 N_lines_1 = include(St20, Includefile, Outport), 1970 St30 = nl(St20), 1971 case St30#yecc.erlang_code of 1972 none -> 1973 {St30, N_lines_1, no_erlang_code}; 1974 Next_line -> 1975 St = output_file_directive(St30, Infile, Next_line-1), 1976 Last_line = include1([], Inport, Outport, Infile, Next_line, St), 1977 Nmbr_of_lines = Last_line - Next_line, 1978 {St, Nmbr_of_lines + N_lines_1, {last_erlang_code_line, Last_line}} 1979 end. 1980 1981output_header(St0) -> 1982 lists:foldl(fun(Str, St) -> fwrite(St, <<"~ts\n">>, [Str]) 1983 end, St0, St0#yecc.header). 1984 1985output_goto(St, [{_Nonterminal, []} | Go], StateInfo) -> 1986 output_goto(St, Go, StateInfo); 1987output_goto(St0, [{Nonterminal, List} | Go], StateInfo) -> 1988 F = function_name(St0, yeccgoto, Nonterminal), 1989 St05 = fwrite(St0, <<"-dialyzer({nowarn_function, ~w/7}).\n">>, [F]), 1990 St10 = output_goto1(St05, List, F, StateInfo, true), 1991 St = output_goto_fini(F, Nonterminal, St10), 1992 output_goto(St, Go, StateInfo); 1993output_goto(St, [], _StateInfo) -> 1994 St. 1995 1996output_goto1(St0, [{From, To} | Tail], F, StateInfo, IsFirst) -> 1997 St10 = delim(St0, IsFirst), 1998 {To, ToInfo} = lookup_state(StateInfo, To), 1999 #state_info{reduce_only = RO, state_repr = Repr, comment = C} = ToInfo, 2000 if 2001 RO -> 2002 %% Reduce actions do not use the state, so we just pass 2003 %% the old (now bogus) on: 2004 FromS = io_lib:fwrite("~w=_S", [From]), 2005 ToS = "_S"; 2006 true -> 2007 FromS = io_lib:fwrite("~w", [From]), 2008 ToS = io_lib:fwrite("~w", [To]) 2009 end, 2010 St20 = fwrite(St10, <<"~w(~s, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>, 2011 [F,FromS]), 2012 St30 = fwrite(St20, <<"~s">>, [C]), 2013 %% Short-circuit call to yeccpars2: 2014 St = fwrite(St30, <<" yeccpars2_~w(~s, Cat, Ss, Stack, T, Ts, Tzr)">>, 2015 [Repr, ToS]), 2016 output_goto1(St, Tail, F, StateInfo, false); 2017output_goto1(St, [], _F, _StateInfo, _IsFirst) -> 2018 St. 2019 2020output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) -> 2021 %% Backward compatibility. 2022 St10 = delim(St0, false), 2023 St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>, 2024 [F]), 2025 fwrite(St, 2026 ?YECC_BUG(<<"{~ts, State, missing_in_goto_table}">>, 2027 [quoted_atom(St0, NT)]), 2028 []); 2029output_goto_fini(_F, _NT, St) -> 2030 fwrite(St, <<".\n\n">>, []). 2031 2032%% Find actions having user code. 2033find_user_code(ParseActions, St) -> 2034 [#user_code{state = State, 2035 terminal = Terminal, 2036 funname = inlined_function_name(St, State, Terminal), 2037 action = Action} || 2038 {State, La_actions} <- ParseActions, 2039 {Action, Terminals, RuleNmbr, NmbrOfDaughters} 2040 <- find_user_code2(La_actions), 2041 case tokens(RuleNmbr, St) of 2042 [{var, _, '__1'}] -> NmbrOfDaughters =/= 1; 2043 _ -> true 2044 end, 2045 Terminal <- Terminals]. 2046 2047find_user_code2([]) -> 2048 []; 2049find_user_code2([{_, #reduce{rule_nmbr = RuleNmbr, 2050 nmbr_of_daughters = NmbrOfDaughters} 2051 =Action}]) -> 2052 %% Same optimization as in output_state_actions. 2053 [{Action, ["Cat"], RuleNmbr, NmbrOfDaughters}]; 2054find_user_code2([{La, #reduce{rule_nmbr = RuleNmbr, 2055 nmbr_of_daughters = NmbrOfDaughters} 2056 =Action} | T]) -> 2057 [{Action,La, RuleNmbr, NmbrOfDaughters} | find_user_code2(T)]; 2058find_user_code2([_ | T]) -> 2059 find_user_code2(T). 2060 2061output_actions(St0, StateJumps, StateInfo) -> 2062 %% Not all the clauses of the dispatcher function yeccpars2() can 2063 %% be reached. Only when shifting, that is, calling yeccpars1(), 2064 %% will yeccpars2() be called. 2065 Y2CL = [NewState || {_State,{Actions,J}} <- StateJumps, 2066 {_LA, #shift{state = NewState}} <- 2067 (Actions 2068 ++ [A || {_Tag,_To,Part} <- [J], A <- Part])], 2069 Y2CS = ordsets:from_list([0 | Y2CL]), 2070 Y2S = ordsets:from_list([S || {S,_} <- StateJumps]), 2071 NY2CS = ordsets:subtract(Y2S, Y2CS), 2072 Sel = [{S,true} || S <- ordsets:to_list(Y2CS)] ++ 2073 [{S,false} || S <- ordsets:to_list(NY2CS)], 2074 2075 SelS = [{State,Called} || 2076 {{State,_JActions}, {State,Called}} <- 2077 lists:zip(StateJumps, lists:keysort(1, Sel))], 2078 St05 = 2079 fwrite(St0, <<"-dialyzer({nowarn_function, yeccpars2/7}).\n">>, []), 2080 St10 = foldl(fun({State, Called}, St_0) -> 2081 {State, #state_info{state_repr = IState}} = 2082 lookup_state(StateInfo, State), 2083 output_state_selection(St_0, State, IState, Called) 2084 end, St05, SelS), 2085 St20 = fwrite(St10, <<"yeccpars2(Other, _, _, _, _, _, _) ->\n">>, []), 2086 St = fwrite(St20, 2087 ?YECC_BUG(<<"{missing_state_in_action_table, Other}">>, []), 2088 []), 2089 foldl(fun({State, JActions}, St_0) -> 2090 {State, #state_info{state_repr = IState}} = 2091 lookup_state(StateInfo, State), 2092 output_state_actions(St_0, State, IState, 2093 JActions, StateInfo) 2094 end, St, StateJumps). 2095 2096output_state_selection(St0, State, IState, Called) -> 2097 Comment = [<<"%% ">> || false <- [Called]], 2098 St = fwrite(St0, <<"~syeccpars2(~w=S, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>, 2099 [Comment, State]), 2100 fwrite(St, 2101 <<"~s yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr);\n">>, 2102 [Comment, IState]). 2103 2104output_state_actions(St, State, State, {Actions,jump_none}, SI) -> 2105 St1 = output_state_actions_begin(St, State, Actions), 2106 output_state_actions1(St1, State, Actions, true, normal, SI); 2107output_state_actions(St0, State, State, {Actions, Jump}, SI) -> 2108 {Tag, To, Common} = Jump, 2109 CS = case Tag of 2110 jump_some -> list_to_atom(lists:concat([cont_, To])); 2111 jump_all -> To 2112 end, 2113 St = output_state_actions1(St0, State, Actions, true, {to, CS}, SI), 2114 if 2115 To =:= State -> 2116 St1 = output_state_actions_begin(St, State, Actions), 2117 output_state_actions1(St1, CS, Common, true, normal, SI); 2118 true -> 2119 St 2120 end; 2121output_state_actions(St, State, JState, _XActions, _SI) -> 2122 fwrite(St, <<"%% yeccpars2_~w: see yeccpars2_~w\n\n">>, [State, JState]). 2123 2124output_state_actions_begin(St, State, Actions) -> 2125 case [yes || {_, #reduce{}} <- Actions] of 2126 [] -> 2127 fwrite(St, <<"-dialyzer({nowarn_function, yeccpars2_~w/7}).\n">>, 2128 [State]); % Only when yeccerror(T) is output. 2129 _ -> St 2130 end. 2131 2132output_state_actions1(St, State, [], IsFirst, normal, _SI) -> 2133 output_state_actions_fini(State, IsFirst, St); 2134output_state_actions1(St0, State, [], IsFirst, {to, ToS}, _SI) -> 2135 St = delim(St0, IsFirst), 2136 fwrite(St, 2137 <<"yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr) ->\n" 2138 " yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr).\n\n">>, 2139 [State, ToS]); 2140output_state_actions1(St0, State, [{_, #reduce{}=Action}], 2141 IsFirst, _End, SI) -> 2142 St = output_reduce(St0, State, "Cat", Action, IsFirst, SI), 2143 fwrite(St, <<".\n\n">>, []); 2144output_state_actions1(St0, State, [{Lookahead,Action} | Tail], 2145 IsFirst, End, SI) -> 2146 {_, St} = 2147 foldl(fun(Terminal, {IsFst,St_0}) -> 2148 {false, 2149 output_action(St_0, State, Terminal, Action, IsFst,SI)} 2150 end, {IsFirst,St0}, Lookahead), 2151 output_state_actions1(St, State, Tail, false, End, SI). 2152 2153output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) -> 2154 output_reduce(St, State, Terminal, Action, IsFirst, SI); 2155output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) -> 2156 St10 = delim(St0, IsFirst), 2157 St = fwrite(St10, <<"yeccpars2_~w(S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>, 2158 [State, quoted_atom(St10, Terminal)]), 2159 output_call_to_includefile(NewState, St); 2160output_action(St0, State, Terminal, accept, IsFirst, _SI) -> 2161 St10 = delim(St0, IsFirst), 2162 St = fwrite(St10, 2163 <<"yeccpars2_~w(_S, ~ts, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>, 2164 [State, quoted_atom(St10, Terminal)]), 2165 fwrite(St, <<" {ok, hd(Stack)}">>, []); 2166output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) -> 2167 St. 2168 2169output_call_to_includefile(NewState, #yecc{includefile_version = {1,1}}=St) -> 2170 %% Backward compatibility. 2171 fwrite(St, <<" yeccpars1(Ts, Tzr, ~w, [S | Ss], [T | Stack])">>, 2172 [NewState]); 2173output_call_to_includefile(NewState, St) -> 2174 fwrite(St, <<" yeccpars1(S, ~w, Ss, Stack, T, Ts, Tzr)">>, 2175 [NewState]). 2176 2177output_state_actions_fini(State, IsFirst, St0) -> 2178 %% Backward compatible. 2179 St10 = delim(St0, IsFirst), 2180 St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]), 2181 fwrite(St, <<" yeccerror(T).\n\n">>, []). 2182 2183output_reduce(St0, State, Terminal, 2184 #reduce{rule_nmbr = RuleNmbr, 2185 head = Head, 2186 nmbr_of_daughters = NmbrOfDaughters}, 2187 IsFirst, StateInfo) -> 2188 St10 = delim(St0, IsFirst), 2189 QuotedTerminal = if 2190 is_atom(Terminal) -> quoted_atom(St10, Terminal); 2191 true -> Terminal 2192 end, 2193 St20 = fwrite(St10, 2194 <<"yeccpars2_~w(_S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>, 2195 [State, QuotedTerminal]), 2196 St30 = 2197 if 2198 NmbrOfDaughters < 2 -> 2199 Ns = "Ss", 2200 St20; 2201 true -> 2202 Ns = "Nss", 2203 Tmp = lists:join(",", 2204 lists:duplicate(NmbrOfDaughters - 1, "_")), 2205 fwrite(St20, <<" [~s|Nss] = Ss,\n">>, [Tmp]) 2206 end, 2207 St40 = case tokens(RuleNmbr, St30) of 2208 [{var, _, '__1'}] when NmbrOfDaughters =:= 1 -> 2209 NewStack = "Stack", 2210 St30; 2211 _ -> 2212 NewStack = "NewStack", 2213 fwrite(St30, <<" NewStack = ~w(Stack),\n">>, 2214 [inlined_function_name(St30, State, Terminal)]) 2215 end, 2216 if 2217 NmbrOfDaughters =:= 0 -> 2218 NextState = goto(State, Head, St40), 2219 {NextState, I} = lookup_state(StateInfo, NextState), 2220 #state_info{reduce_only = RO, state_repr = Repr, comment = C} = I, 2221 %% Reduce actions do not use the state, so we just pass 2222 %% the old (now bogus) on: 2223 if 2224 RO -> NextS = "_S"; 2225 true -> NextS = io_lib:fwrite("~w", [NextState]) 2226 end, 2227 St = fwrite(St40, <<"~s">>, [C]), 2228 %% Short-circuit call to yeccpars2: 2229 fwrite(St, 2230 <<" yeccpars2_~w(~s, ~ts, [~w | Ss], ~s, T, Ts, Tzr)">>, 2231 [Repr, NextS, QuotedTerminal, State, NewStack]); 2232 true -> 2233 fwrite(St40, 2234 <<" ~w(hd(~s), ~ts, ~s, ~s, T, Ts, Tzr)">>, 2235 [function_name(St40, yeccgoto, Head), Ns, 2236 QuotedTerminal, Ns, NewStack]) 2237 end. 2238 2239delim(St, true) -> 2240 St; 2241delim(St, false) -> 2242 fwrite(St, <<";\n">>, []). 2243 2244quoted_atom(#yecc{encoding = latin1}, Atom) when is_atom(Atom) -> 2245 io_lib:write_atom_as_latin1(Atom); 2246quoted_atom(_St, Atomic) -> 2247 io_lib:write(Atomic). 2248 2249output_inlined(St, UserCodeActions, Infile) -> 2250 foldl(fun(#user_code{funname = InlinedFunctionName, 2251 action = Action}, St_0) -> 2252 output_inlined(St_0, InlinedFunctionName, 2253 Action, Infile) 2254 end, St, UserCodeActions). 2255 2256%% Each action with user code is placed in a separate inlined function. 2257%% The purpose is to be able to pinpoint errors and warnings correctly. 2258output_inlined(St0, FunctionName, Reduce, Infile) -> 2259 #reduce{rule_nmbr = RuleNmbr, nmbr_of_daughters = N_daughters} = Reduce, 2260 #rule{tokens = Tokens, is_well_formed = WF} = get_rule(RuleNmbr, St0), 2261 Line0 = first_line(Tokens), 2262 NLines = last_line(Tokens) - Line0, 2263 2264 St5 = if 2265 WF -> 2266 St0; 2267 not WF -> 2268 %% The compiler will generate an error message for 2269 %% the inlined function (unless the reason that yecc 2270 %% failed to parse the action was some macro). The 2271 %% line number of the message will be correct since 2272 %% we are keeping track of the current line of the 2273 %% output file... 2274 #yecc{outfile = Outfile, line = CurLine} = St0, 2275 output_file_directive(St0, Outfile, CurLine) 2276 end, 2277 2278 CodeStartLine = lists:max([0, Line0 - 4]), 2279 St10 = fwrite(St5, <<"-compile({inline,~w/1}).\n">>, [FunctionName]), 2280 St20 = output_file_directive(St10, Infile, CodeStartLine), 2281 St30 = fwrite(St20, <<"~w(__Stack0) ->\n">>, [FunctionName]), 2282 %% Currently the (old) inliner emits less code if matching the 2283 %% stack inside the body rather than in the head... 2284 St40 = case N_daughters of 2285 0 -> 2286 Stack = "__Stack0", 2287 St30; 2288 _ -> 2289 Stack = "__Stack", 2290 A = concat(flatmap(fun(I) -> [",__",I] end, 2291 lists:seq(N_daughters, 1, -1))), 2292 fwrite(St30, <<" ~s = __Stack0,\n">>, 2293 [append(["[", tl(A), " | __Stack]"])]) 2294 end, 2295 St = St40#yecc{line = St40#yecc.line + NLines}, 2296 fwrite(St, <<" [begin\n ~ts\n end | ~s].\n\n">>, 2297 [pp_tokens(Tokens, Line0, St#yecc.encoding), Stack]). 2298 2299inlined_function_name(St, State, Terminal) -> 2300 End = case Terminal of 2301 "Cat" -> []; 2302 _ -> [quoted_atom(St, Terminal)] 2303 end, 2304 list_to_atom(concat([yeccpars2_, State, '_'] ++ End)). 2305 2306-compile({nowarn_unused_function,function_name/3}). 2307function_name(St, Name, Suf) -> 2308 list_to_atom(concat([Name, '_'] ++ [quoted_atom(St, Suf)])). 2309 2310rule(RulePointer, St) -> 2311 #rule{n = N, anno = Anno, symbols = Symbols} = 2312 maps:get(RulePointer, St#yecc.rule_pointer2rule), 2313 {Symbols, Anno, N}. 2314 2315get_rule(RuleNmbr, St) -> 2316 maps:get(RuleNmbr, St#yecc.rule_pointer2rule). 2317 2318tokens(RuleNmbr, St) -> 2319 Rule = maps:get(RuleNmbr, St#yecc.rule_pointer2rule), 2320 Rule#rule.tokens. 2321 2322goto(From, Symbol, St) -> 2323 case ets:lookup(St#yecc.goto_tab, {From, Symbol}) of 2324 [{_, To}] -> 2325 To; 2326 [] -> 2327 erlang:error({error_in_goto_table, From, Symbol}) 2328 end. 2329 2330%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2331% Auxiliaries: 2332 2333-ifdef(SYMBOLS_AS_CODES). 2334 2335%%% Bit mask operations. 2336 2337-compile({inline,[set_empty/0]}). 2338set_empty() -> 2339 0. 2340 2341set_add(I, BM) -> 2342 (1 bsl I) bor BM. 2343 2344-compile({inline,[set_member/2]}). 2345set_member(I, BM) -> 2346 ((1 bsl I) band BM) =/= 0. 2347 2348%% Assumes I is a member... 2349-compile({inline,[set_delete/2]}). 2350set_delete(I, BM) -> 2351 (1 bsl I) bxor BM. 2352 2353-compile({inline,[set_union/2]}). 2354set_union(BM1, BM2) -> 2355 BM1 bor BM2. 2356 2357-compile({inline,[set_is_subset/2]}). 2358set_is_subset(BM1, BM2) -> 2359 (BM1 band BM2) =:= BM1. 2360 2361empty_member(BM) -> 2362 set_member(0, BM). 2363 2364empty_delete(BM) -> 2365 set_delete(0, BM). 2366 2367code_symbols(Ss, SymbolTable) -> 2368 map(fun(S) -> ets:lookup_element(SymbolTable, S, 2) end, Ss). 2369 2370decode_symbol(C, InvSymbolTable) -> 2371 ets:lookup_element(InvSymbolTable, C, 1). 2372 2373code_terminal(T, SymbolTab) -> 2374 set_add(ets:lookup_element(SymbolTab, T, 2), 0). 2375 2376decode_terminals(BM, InvSymbolTab) -> 2377 case get(BM) of 2378 undefined -> 2379 Symbols = decode_terminals(BM, 0, InvSymbolTab), 2380 put(BM, Symbols), 2381 Symbols; 2382 Symbols -> 2383 Symbols 2384 end. 2385 2386decode_terminals(0, _I, _InvSymbolTab) -> 2387 []; 2388decode_terminals(BM, I, InvSymbolTab) -> 2389 case set_member(I, BM) of 2390 true -> 2391 [ets:lookup_element(InvSymbolTab, I, 1) 2392 | decode_terminals(set_delete(I, BM), I+1, InvSymbolTab)]; 2393 false -> 2394 decode_terminals(BM, I+1, InvSymbolTab) 2395 end. 2396 2397set_add_terminal({_Symbol, TerminalNum}, BM) -> 2398 set_add(TerminalNum, BM). 2399 2400-compile({inline,[is_terminal/2]}). 2401is_terminal(_Tab, SymbolCode) -> 2402 SymbolCode >= 0. 2403 2404left_corner_symbol_table(St) -> 2405 St#yecc.inv_symbol_tab. 2406 2407-else. 2408 2409set_empty() -> 2410 []. 2411 2412set_add(Symbol, L) -> 2413 ordsets:union([Symbol], L). 2414 2415set_union(Es1, Es2) -> 2416 ordsets:union(Es1, Es2). 2417 2418set_is_subset(Es1, Es2) -> 2419 ordsets:is_subset(Es1, Es2). 2420 2421code_symbols(Ss, _SymbolTab) -> 2422 Ss. 2423 2424decode_symbol(S, _InvSymbolTab) -> 2425 S. 2426 2427code_terminal(T, _SymbolTab) -> 2428 [T]. 2429 2430decode_terminals(Ts, _InvSymbolTab) -> 2431 Ts. 2432 2433empty_member(['$empty' | _]) -> 2434 true; 2435empty_member(_) -> 2436 false. 2437 2438empty_delete(['$empty' | Terminals]) -> 2439 Terminals. 2440 2441set_add_terminal({Symbol, _TerminalNum}, L) -> 2442 set_add(Symbol, L). 2443 2444is_terminal(Tab, SymbolName) -> 2445 ets:lookup_element(Tab, SymbolName, 2) >= 0. 2446 2447left_corner_symbol_table(St) -> 2448 St#yecc.symbol_tab. 2449 2450-endif. % SYMBOLS_AS_CODES 2451 2452intersect(L1, L2) -> 2453 ordsets:to_list(ordsets:intersection(ordsets:from_list(L1), 2454 ordsets:from_list(L2))). 2455 2456format_symbols([Sym | Syms]) -> 2457 concat([format_symbol(Sym) | format_symbols1(Syms)]). 2458 2459format_symbols1([]) -> 2460 []; 2461format_symbols1([H | T]) -> 2462 [" ", format_symbol(H) | format_symbols1(T)]. 2463 2464include(St, File, Outport) -> 2465 case file:open(File, [read]) of 2466 {error, Reason} -> 2467 throw(add_error(File, none, {file_error, Reason}, St)); 2468 {ok, Inport} -> 2469 _ = epp:set_encoding(Inport), 2470 Line = io:get_line(Inport, ''), 2471 try include1(Line, Inport, Outport, File, 1, St) - 1 2472 after ok = file:close(Inport) 2473 end 2474 end. 2475 2476include1(eof, _, _, _File, L, _St) -> 2477 L; 2478include1({error, _}=_Error, _Inport, _Outport, File, L, St) -> 2479 throw(add_error(File, erl_anno:new(L), cannot_parse, St)); 2480include1(Line, Inport, Outport, File, L, St) -> 2481 Incr = case member($\n, Line) of 2482 true -> 1; 2483 false -> 0 2484 end, 2485 io:put_chars(Outport, Line), 2486 include1(io:get_line(Inport, ''), Inport, Outport, File, L + Incr, St). 2487 2488includefile_version([]) -> 2489 {1,4}; 2490includefile_version(Includefile) -> 2491 case epp:open(Includefile, []) of 2492 {ok, Epp} -> 2493 try 2494 parse_file(Epp) 2495 after 2496 epp:close(Epp) 2497 end; 2498 {error, _Error} -> 2499 {1,1} 2500 end. 2501 2502parse_file(Epp) -> 2503 case epp:parse_erl_form(Epp) of 2504 {ok, {function,_Anno,yeccpars1,7,_Clauses}} -> 2505 {1,4}; 2506 {eof,_Line} -> 2507 {1,1}; 2508 _Form -> 2509 parse_file(Epp) 2510 end. 2511 2512%% Keeps the line breaks of the original code. 2513pp_tokens(Tokens, Line0, Enc) -> 2514 concat(pp_tokens1(Tokens, Line0, Enc, [])). 2515 2516pp_tokens1([], _Line0, _Enc, _T0) -> 2517 []; 2518pp_tokens1([T | Ts], Line0, Enc, T0) -> 2519 Line = location(anno(T)), 2520 [pp_sep(Line, Line0, T0), pp_symbol(T, Enc)|pp_tokens1(Ts, Line, Enc, T)]. 2521 2522pp_symbol({var,_,Var}, _Enc) -> Var; 2523pp_symbol({string,_,String}, latin1) -> 2524 io_lib:write_string_as_latin1(String); 2525pp_symbol({string,_,String}, _Enc) -> io_lib:write_string(String); 2526pp_symbol({_,_,Symbol}, latin1) -> io_lib:fwrite(<<"~p">>, [Symbol]); 2527pp_symbol({_,_,Symbol}, _Enc) -> io_lib:fwrite(<<"~tp">>, [Symbol]); 2528pp_symbol({Symbol, _}, _Enc) -> Symbol. 2529 2530pp_sep(Line, Line0, T0) when Line > Line0 -> 2531 ["\n " | pp_sep(Line - 1, Line0, T0)]; 2532pp_sep(_Line, _Line0, {'.',_}) -> 2533 ""; 2534pp_sep(_Line, _Line0, _T0) -> 2535 " ". 2536 2537set_encoding(#yecc{encoding = none}, Port) -> 2538 ok = io:setopts(Port, [{encoding, epp:default_encoding()}]); 2539set_encoding(#yecc{encoding = E}, Port) -> 2540 ok = io:setopts(Port, [{encoding, E}]). 2541 2542output_encoding_comment(#yecc{encoding = none}=St) -> 2543 St; 2544output_encoding_comment(#yecc{encoding = Encoding}=St) -> 2545 fwrite(St, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]). 2546 2547output_file_directive(St, Filename, Line) when St#yecc.file_attrs -> 2548 fwrite(St, <<"-file(~ts, ~w).\n">>, 2549 [format_filename(Filename, St), Line]); 2550output_file_directive(St, _Filename, _Line) -> 2551 St. 2552 2553first_line(Tokens) -> 2554 location(anno(hd(Tokens))). 2555 2556last_line(Tokens) -> 2557 location(anno(lists:last(Tokens))). 2558 2559location(none) -> none; 2560location(Anno) -> 2561 erl_anno:line(Anno). 2562 2563anno(Token) -> 2564 element(2, Token). 2565 2566%% Keep track of the current line in the generated file. 2567fwrite(#yecc{outport = Outport, line = Line}=St, Format, Args) -> 2568 NLines = count_nl(Format), 2569 io:fwrite(Outport, Format, Args), 2570 St#yecc{line = Line + NLines}. 2571 2572%% Assumes \n is used, and never ~n. 2573count_nl(<<$\n,Rest/binary>>) -> 2574 1 + count_nl(Rest); 2575count_nl(<<_,Rest/binary>>) -> 2576 count_nl(Rest); 2577count_nl(<<>>) -> 2578 0. 2579 2580nl(#yecc{outport = Outport, line = Line}=St) -> 2581 io:nl(Outport), 2582 St#yecc{line = Line + 1}. 2583 2584format_filename(Filename0, St) -> 2585 Filename = filename:flatten(Filename0), 2586 case lists:keyfind(encoding, 1, io:getopts(St#yecc.outport)) of 2587 {encoding, unicode} -> io_lib:write_string(Filename); 2588 _ -> io_lib:write_string_as_latin1(Filename) 2589 end. 2590 2591format_assoc(left) -> 2592 "Left"; 2593format_assoc(right) -> 2594 "Right"; 2595format_assoc(unary) -> 2596 "Unary"; 2597format_assoc(nonassoc) -> 2598 "Nonassoc". 2599 2600format_symbol(Symbol) -> 2601 String = concat([Symbol]), 2602 case erl_scan:string(String) of 2603 {ok, [{atom, _, _}], _} -> 2604 io_lib:fwrite(<<"~tw">>, [Symbol]); 2605 {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' -> 2606 case erl_scan:reserved_word(Word) of 2607 true -> 2608 String; 2609 false -> 2610 io_lib:fwrite(<<"~tw">>, [Symbol]) 2611 end; 2612 {ok, [{var, _, _}], _} -> 2613 String; 2614 _ -> 2615 io_lib:fwrite(<<"~tw">>, [Symbol]) 2616 end. 2617 2618inverse(L) -> 2619 sort([{A,B} || {B,A} <- L]). 2620 2621family(L) -> 2622 sofs:to_external(sofs:relation_to_family(sofs:relation(L))). 2623 2624seq1(To) when To < 1 -> 2625 []; 2626seq1(To) -> 2627 lists:seq(1, To). 2628 2629count(From, L) -> 2630 lists:zip(L, lists:seq(From, length(L)-1+From)). 2631 2632family_with_domain(L, DL) -> 2633 sofs:to_external(sofs_family_with_domain(sofs:relation(L), sofs:set(DL))). 2634 2635sofs_family_with_domain(R0, D) -> 2636 R = sofs:restriction(R0, D), 2637 F = sofs:relation_to_family(R), 2638 FD = sofs:constant_function(D, sofs:from_term([])), 2639 sofs:family_union(F, FD). 2640