1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 1996-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20%% Yacc like LALR-1 parser generator for Erlang. 21%% Ref: Aho & Johnson: "LR Parsing", ACM Computing Surveys, vol. 6:2, 1974. 22%% Auxiliary files: yeccgramm.yrl, yeccparser.erl, yeccpre.hrl, yeccscan.erl. 23%% 24 25-module(yecc). 26 27-export([compile/3, file/1, file/2, format_error/1]). 28 29%% Kept for compatibility with R10B. 30-export([yecc/2, yecc/3, yecc/4]). 31 32-import(lists, [append/1, append/2, concat/1, delete/2, filter/2, 33 flatmap/2, foldl/3, foldr/3, foreach/2, keydelete/3, 34 keysort/2, last/1, map/2, member/2, reverse/1, 35 sort/1, usort/1]). 36 37-include("erl_compile.hrl"). 38-include("ms_transform.hrl"). 39 40-record(yecc, { 41 infile, 42 outfile, 43 includefile, 44 includefile_version, 45 module, 46 encoding = none, 47 options = [], 48 verbose = false, 49 file_attrs = true, 50 errors = [], 51 warnings = [], 52 conflicts_done = false, 53 shift_reduce = [], 54 reduce_reduce = [], 55 n_states = 0, 56 inport, 57 outport, 58 line, 59 60 parse_actions, 61 symbol_tab, 62 inv_symbol_tab, 63 state_tab, 64 prec_tab, 65 goto_tab, 66 67 terminals = [], 68 nonterminals = [], 69 all_symbols = [], 70 prec = [], 71 rules_list = [], 72 rules, % a tuple of rules_list 73 rule_pointer2rule, 74 rootsymbol = [], 75 endsymbol = [], 76 expect_shift_reduce = [], 77 expect_n_states = [], 78 header = [], 79 erlang_code = none 80 }). 81 82-record(rule, { 83 n, % rule n in the grammar file 84 location, 85 symbols, % the names of symbols 86 tokens, 87 is_guard, % the action is a guard (not used) 88 is_well_formed % can be parsed (without macro expansion) 89 }). 90 91-record(reduce, { 92 rule_nmbr, 93 head, 94 nmbr_of_daughters, 95 prec, 96 unused % assure that #reduce{} comes before #shift{} when sorting 97 }). 98 99-record(shift, { 100 state, 101 pos, 102 prec, 103 rule_nmbr 104 }). 105 106-record(user_code, {state, terminal, funname, action}). 107 108-record(symbol, {anno = none, name}). 109 110%% ACCEPT is neither an atom nor a non-terminal. 111-define(ACCEPT, {}). 112 113%% During the phase 'compute_states' terminals in lookahead sets are 114%% coded as integers; sets of terminals are integer bit masks. This is 115%% for efficiency only. '$empty' is always given the mask 1. The 116%% behaviour can be turned off by un-defining SYMBOLS_AS_CODES (useful 117%% when debugging). 118 119%% Non-terminals are also given integer codes, starting with -1. The 120%% absolut value of the code is used for indexing a tuple of lists of 121%% rules. 122 123-define(SYMBOLS_AS_CODES, true). 124 125-ifdef(SYMBOLS_AS_CODES). 126-define(EMPTY, 0). 127-else. 128-define(EMPTY, '$empty'). 129-endif. 130 131%%% 132%%% Exported functions 133%%% 134 135%%% Interface to erl_compile. 136 137compile(Input0, Output0, 138 #options{warning = WarnLevel, verbose=Verbose, includes=Includes, 139 specific=Specific}) -> 140 Input = shorten_filename(Input0), 141 Output = shorten_filename(Output0), 142 Includefile = lists:sublist(Includes, 1), 143 Werror = proplists:get_bool(warnings_as_errors, Specific), 144 Opts = [{parserfile,Output}, {includefile,Includefile}, {verbose,Verbose}, 145 {report_errors, true}, {report_warnings, WarnLevel > 0}, 146 {warnings_as_errors, Werror}], 147 case file(Input, Opts) of 148 {ok, _OutFile} -> 149 ok; 150 error -> 151 error 152 end. 153 154-spec format_error(ErrorDescriptor) -> io_lib:chars() when 155 ErrorDescriptor :: term(). 156 157format_error(bad_declaration) -> 158 io_lib:fwrite("unknown or bad declaration, ignored", []); 159format_error({bad_expect, SymName}) -> 160 io_lib:fwrite("argument ~ts of Expect is not an integer", 161 [format_symbol(SymName)]); 162format_error({bad_rootsymbol, SymName}) -> 163 io_lib:fwrite("rootsymbol ~ts is not a nonterminal", 164 [format_symbol(SymName)]); 165format_error({bad_states, SymName}) -> 166 io_lib:fwrite("argument ~ts of States is not an integer", 167 [format_symbol(SymName)]); 168format_error({conflict, Conflict}) -> 169 format_conflict(Conflict); 170format_error({conflicts, SR, RR}) -> 171 io_lib:fwrite("conflicts: ~w shift/reduce, ~w reduce/reduce", [SR, RR]); 172format_error({duplicate_declaration, Tag}) -> 173 io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]); 174format_error({duplicate_nonterminal, Nonterminal}) -> 175 io_lib:fwrite("duplicate non-terminals ~ts", 176 [format_symbol(Nonterminal)]); 177format_error({duplicate_precedence, Op}) -> 178 io_lib:fwrite("duplicate precedence operator ~ts", 179 [format_symbol(Op)]); 180format_error({duplicate_terminal, Terminal}) -> 181 io_lib:fwrite("duplicate terminal ~ts", 182 [format_symbol(Terminal)]); 183format_error({endsymbol_is_nonterminal, Symbol}) -> 184 io_lib:fwrite("endsymbol ~ts is a nonterminal", 185 [format_symbol(Symbol)]); 186format_error({endsymbol_is_terminal, Symbol}) -> 187 io_lib:fwrite("endsymbol ~ts is a terminal", 188 [format_symbol(Symbol)]); 189format_error({error, Module, Error}) -> 190 Module:format_error(Error); 191format_error({file_error, Reason}) -> 192 io_lib:fwrite("~ts",[file:format_error(Reason)]); 193format_error(illegal_empty) -> 194 io_lib:fwrite("illegal use of empty symbol", []); 195format_error({internal_error, Error}) -> 196 io_lib:fwrite("internal yecc error: ~w", [Error]); 197format_error({missing_syntax_rule, Nonterminal}) -> 198 io_lib:fwrite("no syntax rule for non-terminal symbol ~ts", 199 [format_symbol(Nonterminal)]); 200format_error({n_states, Exp, N}) -> 201 io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]); 202format_error(no_grammar_rules) -> 203 io_lib:fwrite("grammar rules are missing", []); 204format_error(nonterminals_missing) -> 205 io_lib:fwrite("Nonterminals is missing", []); 206format_error({precedence_op_is_endsymbol, SymName}) -> 207 io_lib:fwrite("precedence operator ~ts is endsymbol", 208 [format_symbol(SymName)]); 209format_error({precedence_op_is_unknown, SymName}) -> 210 io_lib:fwrite("unknown precedence operator ~ts", 211 [format_symbol(SymName)]); 212format_error({reserved, N}) -> 213 io_lib:fwrite("the use of ~w should be avoided", [N]); 214format_error({symbol_terminal_and_nonterminal, SymName}) -> 215 io_lib:fwrite("symbol ~ts is both a terminal and nonterminal", 216 [format_symbol(SymName)]); 217format_error(rootsymbol_missing) -> 218 io_lib:fwrite("Rootsymbol is missing", []); 219format_error(terminals_missing) -> 220 io_lib:fwrite("Terminals is missing", []); 221format_error({undefined_nonterminal, Symbol}) -> 222 io_lib:fwrite("undefined nonterminal: ~ts", [format_symbol(Symbol)]); 223format_error({undefined_pseudo_variable, Atom}) -> 224 io_lib:fwrite("undefined pseudo variable ~w", [Atom]); 225format_error({undefined_symbol, SymName}) -> 226 io_lib:fwrite("undefined rhs symbol ~ts", [format_symbol(SymName)]); 227format_error({unused_nonterminal, Nonterminal}) -> 228 io_lib:fwrite("non-terminal symbol ~ts not used", 229 [format_symbol(Nonterminal)]); 230format_error({unused_terminal, Terminal}) -> 231 io_lib:fwrite("terminal symbol ~ts not used", 232 [format_symbol(Terminal)]); 233format_error({bad_symbol, String}) -> 234 io_lib:fwrite("bad symbol ~ts", [String]); 235format_error(cannot_parse) -> 236 io_lib:fwrite("cannot parse; possibly encoding mismatch", []). 237 238-type error_info() :: {erl_anno:location() | 'none', 239 module(), ErrorDescriptor :: term()}. 240-type errors() :: [{file:filename(), [error_info()]}]. 241-type warnings() :: [{file:filename(), [error_info()]}]. 242-type ok_ret() :: {'ok', Parserfile :: file:filename()} 243 | {'ok', Parserfile :: file:filename(), warnings()}. 244-type error_ret() :: 'error' 245 | {'error', Errors :: errors(), Warnings :: warnings()}. 246-type yecc_ret() :: ok_ret() | error_ret(). 247 248-spec file(FileName) -> yecc_ret() when 249 FileName :: file:filename(). 250 251file(GrammarFile) -> 252 file(GrammarFile, [report_errors, report_warnings]). 253 254-spec file(Grammarfile, Options) -> yecc_ret() when 255 Grammarfile :: file:filename(), 256 Options :: Option | [Option], 257 Option :: {'error_location', 'column' | 'line'} 258 | {'includefile', Includefile :: file:filename()} 259 | {'report_errors', boolean()} 260 | {'report_warnings', boolean()} 261 | {'report', boolean()} 262 | {'return_errors', boolean()} 263 | {'return_warnings', boolean()} 264 | {'return', boolean()} 265 | {'parserfile', Parserfile :: file:filename()} 266 | {'verbose', boolean()} 267 | {'warnings_as_errors', boolean()} 268 | 'report_errors' | 'report_warnings' | 'report' 269 | 'return_errors' | 'return_warnings' | 'return' 270 | 'verbose' | 'warnings_as_errors'. 271 272file(File, Options0) when is_list(Options0) -> 273 case is_filename(File) of 274 no -> erlang:error(badarg, [File, Options0]); 275 _ -> ok 276 end, 277 EnvOpts0 = env_default_opts(), 278 EnvOpts = select_recognized_opts(EnvOpts0), 279 Options = Options0 ++ EnvOpts, 280 case options(Options) of 281 badarg -> 282 erlang:error(badarg, [File, Options]); 283 OptionValues -> 284 Self = self(), 285 Flag = process_flag(trap_exit, false), 286 Pid = spawn_link(fun() -> infile(Self, File, OptionValues) end), 287 receive 288 {Pid, Rep} -> 289 receive after 1 -> ok end, 290 process_flag(trap_exit, Flag), 291 Rep 292 end 293 end; 294file(File, Option) -> 295 file(File, [Option]). 296 297%% Kept for backward compatibility. 298yecc(Infile, Outfile) -> 299 yecc(Infile, Outfile, false, []). 300 301yecc(Infile, Outfile, Verbose) -> 302 yecc(Infile, Outfile, Verbose, []). 303 304yecc(Infilex, Outfilex, Verbose, Includefilex) -> 305 _ = statistics(runtime), 306 case file(Infilex, [{parserfile, Outfilex}, 307 {verbose, Verbose}, 308 {report, true}, 309 {includefile, Includefilex}]) of 310 {ok, _File} -> 311 statistics(runtime); 312 error -> 313 exit(error) 314 end. 315 316%%% 317%%% Local functions 318%%% 319 320%% Copied from compile.erl. 321env_default_opts() -> 322 Key = "ERL_COMPILER_OPTIONS", 323 case os:getenv(Key) of 324 false -> []; 325 Str when is_list(Str) -> 326 case erl_scan:string(Str) of 327 {ok,Tokens,_} -> 328 Dot = {dot, erl_anno:new(1)}, 329 case erl_parse:parse_term(Tokens ++ [Dot]) of 330 {ok,List} when is_list(List) -> List; 331 {ok,Term} -> [Term]; 332 {error,_Reason} -> 333 io:format("Ignoring bad term in ~s\n", [Key]), 334 [] 335 end; 336 {error, {_,_,_Reason}, _} -> 337 io:format("Ignoring bad term in ~s\n", [Key]), 338 [] 339 end 340 end. 341 342select_recognized_opts(Options0) -> 343 Options = preprocess_options(Options0), 344 AllOptions = all_options(), 345 [Option || 346 {Name, _} = Option <- Options, 347 lists:member(Name, AllOptions)]. 348 349options(Options0) -> 350 Options1 = preprocess_options(Options0), 351 AllOptions = all_options(), 352 case check_options(Options1, AllOptions, []) of 353 badarg -> 354 badarg; 355 OptionValues -> 356 AllOptionValues = 357 [case lists:keyfind(Option, 1, OptionValues) of 358 false -> 359 {Option, default_option(Option)}; 360 OptionValue -> 361 OptionValue 362 end || Option <- AllOptions], 363 foldr(fun({_, false}, L) -> L; 364 ({Option, true}, L) -> [Option | L]; 365 (OptionValue, L) -> [OptionValue | L] 366 end, [], AllOptionValues) 367 end. 368 369preprocess_options(Options) -> 370 foldr(fun preproc_opt/2, [], Options). 371 372preproc_opt(return, Os) -> 373 [{return_errors, true}, {return_warnings, true} | Os]; 374preproc_opt(report, Os) -> 375 [{report_errors, true}, {report_warnings, true} | Os]; 376preproc_opt({return, T}, Os) -> 377 [{return_errors, T}, {return_warnings, T} | Os]; 378preproc_opt({report, T}, Os) -> 379 [{report_errors, T}, {report_warnings, T} | Os]; 380preproc_opt(Option, Os) -> 381 [try atom_option(Option) catch error:_ -> Option end | Os]. 382 383check_options([{Option, FileName0} | Options], AllOptions, L) 384 when Option =:= includefile; Option =:= parserfile -> 385 case is_filename(FileName0) of 386 no -> 387 badarg; 388 Filename -> 389 check_options(Options, AllOptions, [{Option, Filename} | L]) 390 end; 391check_options([{error_location, ELoc}=OptionValue | Options], AllOptions, L) 392 when ELoc =:= column; ELoc =:= line -> 393 check_options(Options, AllOptions, [OptionValue | L]); 394check_options([{Option, Boolean}=OptionValue | Options], AllOptions, L) 395 when is_boolean(Boolean) -> 396 case lists:member(Option, AllOptions) of 397 true -> 398 check_options(Options, AllOptions, [OptionValue | L]); 399 false -> 400 badarg 401 end; 402check_options([], _AllOptions, L) -> 403 L; 404check_options(_Options, _, _L) -> 405 badarg. 406 407all_options() -> 408 [error_location, file_attributes, includefile, parserfile, 409 report_errors, report_warnings, return_errors, return_warnings, 410 time, verbose, warnings_as_errors]. 411 412default_option(error_location) -> column; 413default_option(file_attributes) -> true; 414default_option(includefile) -> []; 415default_option(parserfile) -> []; 416default_option(report_errors) -> true; 417default_option(report_warnings) -> true; 418default_option(return_errors) -> false; 419default_option(return_warnings) -> false; 420default_option(time) -> false; 421default_option(verbose) -> false; 422default_option(warnings_as_errors) -> false. 423 424atom_option(file_attributes) -> {file_attributes, true}; 425atom_option(report_errors) -> {report_errors, true}; 426atom_option(report_warnings) -> {report_warnings, true}; 427atom_option(return_errors) -> {return_errors, true}; 428atom_option(return_warnings) -> {return_warnings, true}; 429atom_option(time) -> {time, true}; 430atom_option(verbose) -> {verbose, true}; 431atom_option(warnings_as_errors) -> {warnings_as_errors, true}; 432atom_option(Key) -> Key. 433 434is_filename(T) -> 435 try filename:flatten(T) 436 catch error: _ -> no 437 end. 438 439shorten_filename(Name0) -> 440 {ok,Cwd} = file:get_cwd(), 441 case string:prefix(Name0, Cwd) of 442 nomatch -> Name0; 443 Rest -> 444 case unicode:characters_to_list(Rest) of 445 "/"++N -> N; 446 N -> N 447 end 448 end. 449 450start(Infilex, Options) -> 451 Infile = assure_extension(Infilex, ".yrl"), 452 {_, Outfilex0} = lists:keyfind(parserfile, 1, Options), 453 {_, Includefilex} = lists:keyfind(includefile, 1, Options), 454 Outfilex = case Outfilex0 of 455 [] -> filename:rootname(Infilex, ".yrl"); 456 _ -> Outfilex0 457 end, 458 Includefile = case Includefilex of 459 [] -> []; 460 _ -> assure_extension(Includefilex,".hrl") 461 end, 462 IncludefileVersion = includefile_version(Includefile), 463 Outfile = assure_extension(Outfilex, ".erl"), 464 Module = list_to_atom(filename:basename(Outfile, ".erl")), 465 #yecc{infile = Infile, 466 outfile = Outfile, 467 includefile = Includefile, 468 includefile_version = IncludefileVersion, 469 module = Module, 470 options = Options, 471 verbose = member(verbose, Options), 472 file_attrs = member(file_attributes, Options)}. 473 474assure_extension(File, Ext) -> 475 concat([strip_extension(File, Ext), Ext]). 476 477%% Assumes File is a filename. 478strip_extension(File, Ext) -> 479 case filename:extension(File) of 480 Ext -> filename:rootname(File); 481 _Other -> File 482 end. 483 484infile(Parent, Infilex, Options) -> 485 St0 = start(Infilex, Options), 486 St = case file:open(St0#yecc.infile, [read, read_ahead]) of 487 {ok, Inport} -> 488 try 489 Encoding = epp:set_encoding(Inport), 490 St1 = St0#yecc{inport = Inport, encoding = Encoding}, 491 outfile(St1) 492 after 493 ok = file:close(Inport) 494 end; 495 {error, Reason} -> 496 add_error(St0#yecc.infile, none, {file_error, Reason}, St0) 497 end, 498 case {St#yecc.errors, werror(St)} of 499 {[], false} -> ok; 500 _ -> _ = file:delete(St#yecc.outfile), ok 501 end, 502 Parent ! {self(), yecc_ret(St)}. 503 504werror(St) -> 505 St#yecc.warnings =/= [] 506 andalso member(warnings_as_errors, St#yecc.options). 507 508outfile(St0) -> 509 case file:open(St0#yecc.outfile, [write, delayed_write]) of 510 {ok, Outport} -> 511 try 512 %% Set the same encoding as infile: 513 set_encoding(St0, Outport), 514 generate(St0#yecc{outport = Outport, line = 1}) 515 catch 516 throw: St1 -> 517 St1; 518 exit: Reason -> 519 add_error({internal_error, Reason}, St0) 520 after 521 ok = file:close(Outport) 522 end; 523 {error, Reason} -> 524 add_error(St0#yecc.outfile, none, {file_error, Reason}, St0) 525 end. 526 527os_process_size() -> 528 case os:type() of 529 {unix, sunos} -> 530 Size = os:cmd("ps -o vsz -p " ++ os:getpid() ++ " | tail -1"), 531 list_to_integer(nonl(Size)); 532 _ -> 533 0 534 end. 535 536nonl([$\n]) -> []; 537nonl([]) -> []; 538nonl([H|T]) -> [H|nonl(T)]. 539 540timeit(Name, Fun, St0) -> 541 Time = runtime, 542 %% Time = wall_clock, 543 {Before, _} = statistics(Time), 544 St = Fun(St0), 545 {After, _} = statistics(Time), 546 Mem0 = erts_debug:flat_size(St)*erlang:system_info(wordsize), 547 Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])), 548 Sz = lists:flatten(io_lib:format("~.1f MB", [os_process_size()/1024])), 549 io:fwrite(" ~-30w: ~10.2f s ~12s ~10s\n", 550 [Name, (After-Before)/1000, Mem, Sz]), 551 St. 552 553-define(PASS(P), {P, fun P/1}). 554 555generate(St0) -> 556 St1 = output_encoding_comment(St0), 557 Passes = [?PASS(parse_grammar), ?PASS(check_grammar), 558 ?PASS(states_and_goto_table), ?PASS(parse_actions), 559 ?PASS(action_conflicts), ?PASS(write_file)], 560 F = case member(time, St1#yecc.options) of 561 true -> 562 io:fwrite(<<"Generating parser from grammar in ~ts\n">>, 563 [format_filename(St1#yecc.infile, St1)]), 564 fun timeit/3; 565 false -> 566 fun(_Name, Fn, St) -> Fn(St) end 567 end, 568 Fun = fun({Name, Fun}, St) -> 569 St2 = F(Name, Fun, St), 570 if 571 St2#yecc.errors =:= [] -> St2; 572 true -> throw(St2) 573 end 574 end, 575 foldl(Fun, St1, Passes). 576 577parse_grammar(St) -> 578 StartLocation = 579 case lists:keyfind(error_location, 1, St#yecc.options) of 580 {error_location, column} -> 581 {1, 1}; 582 _ -> 583 1 584 end, 585 parse_grammar(St#yecc.inport, StartLocation, St). 586 587parse_grammar(Inport, Location, St) -> 588 {NextLocation, Grammar} = read_grammar(Inport, St, Location), 589 parse_grammar(Grammar, Inport, NextLocation, St). 590 591parse_grammar(eof, _Inport, _NextLocation, St) -> 592 St; 593parse_grammar({#symbol{name = 'Header'}, Ss}, Inport, NextLocation, St0) -> 594 St1 = St0#yecc{header = [S || {string,_,S} <- Ss]}, 595 parse_grammar(Inport, NextLocation, St1); 596parse_grammar({#symbol{name = 'Erlang'}, [#symbol{name = code}]}, _Inport, 597 NextLocation, St) -> 598 St#yecc{erlang_code = line_of_location(NextLocation)}; 599parse_grammar(Grammar, Inport, NextLocation, St0) -> 600 St = parse_grammar(Grammar, St0), 601 parse_grammar(Inport, NextLocation, St). 602 603parse_grammar({error,ErrorLocation,Error}, St) -> 604 add_error(erl_anno:new(ErrorLocation), Error, St); 605parse_grammar({rule, Rule, Tokens, AllTokens}, St0) -> 606 NmbrOfDaughters = case Rule of 607 [_, #symbol{name = '$empty'}] -> 0; 608 _ -> length(Rule) - 1 609 end, 610 {IsGuard, IsWellFormed} = check_action(Tokens), 611 {Tokens1, St} = subst_pseudo_vars(AllTokens, 612 NmbrOfDaughters, 613 St0), 614 RuleDef = #rule{symbols = Rule, 615 tokens = Tokens1, 616 is_guard = IsGuard, 617 is_well_formed = IsWellFormed}, 618 St#yecc{rules_list = [RuleDef | St#yecc.rules_list]}; 619parse_grammar({prec, Prec}, St) -> 620 St#yecc{prec = Prec ++ St#yecc.prec}; 621parse_grammar({#symbol{}, [{string,Anno,String}]}, St) -> 622 add_error(Anno, {bad_symbol, String}, St); 623parse_grammar({#symbol{anno = Anno, name = Name}, Symbols}, St) -> 624 CF = fun(I) -> 625 case element(I, St) of 626 [] -> 627 setelement(I, St, Symbols); 628 _ -> 629 add_error(Anno, {duplicate_declaration, Name}, St) 630 end 631 end, 632 OneSymbol = length(Symbols) =:= 1, 633 case Name of 634 'Nonterminals' -> CF(#yecc.nonterminals); 635 'Terminals' -> CF(#yecc.terminals); 636 'Rootsymbol' when OneSymbol -> CF(#yecc.rootsymbol); 637 'Endsymbol' when OneSymbol -> CF(#yecc.endsymbol); 638 'Expect' when OneSymbol -> CF(#yecc.expect_shift_reduce); 639 'States' when OneSymbol -> CF(#yecc.expect_n_states); % undocumented 640 _ -> add_warning(Anno, bad_declaration, St) 641 end. 642 643read_grammar(Inport, St, Location) -> 644 case yeccscan:scan(Inport, '', Location) of 645 {eof, NextLocation} -> 646 {NextLocation, eof}; 647 {error, {ErrorLocation, Mod, What}, NextLocation} -> 648 {NextLocation, {error, ErrorLocation, {error, Mod, What}}}; 649 {error, terminated} -> 650 throw(St); 651 {error, _} -> 652 File = St#yecc.infile, 653 throw(add_error(File, none, cannot_parse, St)); 654 {ok, AllInput, Input, NextLocation} -> 655 {NextLocation, 656 case yeccparser:parse(Input) of 657 {error, {ErrorLocation, Mod, Message}} -> 658 {error, ErrorLocation, {error, Mod, Message}}; 659 {ok, {rule, Rule, {erlang_code, Tokens}}} -> 660 AllTokens0 = 661 lists:dropwhile(fun(T) -> element(1, T) =/= ':' end, 662 AllInput), 663 AllTokens = case AllTokens0 of 664 [] -> 665 Tokens; 666 _ -> % Remove ':' and dot. 667 tl(AllTokens0) -- [last(AllTokens0)] 668 end, 669 {rule, Rule, Tokens, AllTokens}; 670 {ok, {#symbol{name=P}, 671 [#symbol{name=I} | OpL]}=Ss} -> 672 A = precedence(P), 673 if 674 A =/= unknown, 675 is_integer(I), 676 OpL =/= [] -> 677 Ps = [{Op, I , A} || Op <- OpL], 678 {prec, Ps}; 679 true -> 680 Ss 681 end; 682 {ok, Ss} -> 683 Ss 684 end} 685 end. 686 687precedence('Left') -> left; 688precedence('Right') -> right; 689precedence('Unary') -> unary; 690precedence('Nonassoc') -> nonassoc; 691precedence(_) -> unknown. 692 693%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 694 695check_grammar(St0) -> 696 Empty = #symbol{anno = none, name = '$empty'}, 697 AllSymbols = St0#yecc.nonterminals ++ St0#yecc.terminals ++ [Empty], 698 St1 = St0#yecc{all_symbols = AllSymbols}, 699 Cs = [fun check_nonterminals/1, fun check_terminals/1, 700 fun check_rootsymbol/1, fun check_endsymbol/1, 701 fun check_expect/1, fun check_states/1, 702 fun check_precedences/1, fun check_rules/1], 703 foldl(fun(F, St) -> F(St) end, St1, Cs). 704 705check_nonterminals(St) -> 706 case St#yecc.nonterminals of 707 [] -> 708 add_error(nonterminals_missing, St); 709 Nonterminals -> 710 {Unique, Dups} = duplicates(names(Nonterminals)), 711 St1 = add_warnings(Dups, duplicate_nonterminal, St), 712 St2 = check_reserved(Unique, St1), 713 St2#yecc{nonterminals = [?ACCEPT | Unique]} 714 end. 715 716check_terminals(St0) -> 717 case St0#yecc.terminals of 718 [] -> 719 add_error(terminals_missing, St0); 720 Terminals -> 721 {Unique, Dups} = duplicates(names(Terminals)), 722 St1 = add_warnings(Dups, duplicate_terminal, St0), 723 Common = intersect(St1#yecc.nonterminals, Unique), 724 St2 = add_errors(Common, symbol_terminal_and_nonterminal, St1), 725 St3 = check_reserved(Unique, St2), 726 St3#yecc{terminals = ['$empty' | Unique]} 727 end. 728 729check_reserved(Names, St) -> 730 add_errors(intersect(Names, ['$empty', '$end', '$undefined']), 731 reserved, St). 732 733check_rootsymbol(St) -> 734 case St#yecc.rootsymbol of 735 [] -> 736 add_error(rootsymbol_missing, St); 737 [#symbol{anno = Anno, name = SymName}] -> 738 case kind_of_symbol(St, SymName) of 739 nonterminal -> 740 St#yecc{rootsymbol = SymName}; 741 _ -> 742 add_error(Anno, {bad_rootsymbol, SymName}, St) 743 end 744 end. 745 746check_endsymbol(St) -> 747 case St#yecc.endsymbol of 748 [] -> 749 St#yecc{endsymbol = '$end'}; 750 [#symbol{anno = Anno, name = SymName}] -> 751 case kind_of_symbol(St, SymName) of 752 nonterminal -> 753 add_error(Anno, {endsymbol_is_nonterminal, SymName}, St); 754 terminal -> 755 add_error(Anno, {endsymbol_is_terminal, SymName}, St); 756 _ -> 757 St#yecc{endsymbol = SymName} 758 end 759 end. 760 761check_expect(St0) -> 762 case St0#yecc.expect_shift_reduce of 763 [] -> 764 St0#yecc{expect_shift_reduce = 0}; 765 [#symbol{name = Expect}] when is_integer(Expect) -> 766 St0#yecc{expect_shift_reduce = Expect}; 767 [#symbol{anno = Anno, name = Name}] -> 768 St1 = add_error(Anno, {bad_expect, Name}, St0), 769 St1#yecc{expect_shift_reduce = 0} 770 end. 771 772check_states(St) -> 773 case St#yecc.expect_n_states of 774 [] -> 775 St; 776 [#symbol{name = NStates}] when is_integer(NStates) -> 777 St#yecc{expect_n_states = NStates}; 778 [#symbol{anno = Anno, name = Name}] -> 779 add_error(Anno, {bad_states, Name}, St) 780 end. 781 782check_precedences(St0) -> 783 {St1, _} = 784 foldr(fun({#symbol{anno = Anno, name = Op},_I,_A}, {St,Ps}) -> 785 case member(Op, Ps) of 786 true -> 787 {add_error(Anno, {duplicate_precedence,Op}, St), 788 Ps}; 789 false -> 790 {St, [Op | Ps]} 791 end 792 end, {St0,[]}, St0#yecc.prec), 793 foldl(fun({#symbol{anno = Anno, name = Op},I,A}, St) -> 794 case kind_of_symbol(St, Op) of 795 endsymbol -> 796 add_error(Anno,{precedence_op_is_endsymbol,Op}, St); 797 unknown -> 798 add_error(Anno, {precedence_op_is_unknown, Op}, St); 799 _ -> 800 St#yecc{prec = [{Op,I,A} | St#yecc.prec]} 801 end 802 end, St1#yecc{prec = []}, St1#yecc.prec). 803 804check_rule(Rule0, {St0,Rules}) -> 805 Symbols = Rule0#rule.symbols, 806 #symbol{anno = HeadAnno, name = Head} = hd(Symbols), 807 case member(Head, St0#yecc.nonterminals) of 808 false -> 809 {add_error(HeadAnno, {undefined_nonterminal, Head}, St0), Rules}; 810 true -> 811 St = check_rhs(tl(Symbols), St0), 812 Rule = Rule0#rule{location = location(HeadAnno), 813 symbols = names(Symbols)}, 814 {St, [Rule | Rules]} 815 end. 816 817check_rules(St0) -> 818 {St,Rules0} = foldl(fun check_rule/2, {St0,[]}, St0#yecc.rules_list), 819 case St#yecc.rules_list of 820 [] -> 821 add_error(no_grammar_rules, St); 822 _ -> 823 Rule = #rule{location = none, 824 symbols = [?ACCEPT, St#yecc.rootsymbol], 825 tokens = []}, 826 Rules1 = [Rule | Rules0], 827 Rules = map(fun({R,I}) -> R#rule{n = I} end, count(0, Rules1)), 828 St#yecc{rules_list = Rules, rules = list_to_tuple(Rules)} 829 end. 830 831duplicates(List) -> 832 Unique = usort(List), 833 {Unique, List -- Unique}. 834 835names(Symbols) -> 836 map(fun(Symbol) -> Symbol#symbol.name end, Symbols). 837 838symbol_anno(Name, St) -> 839 #symbol{anno = Anno} = symbol_find(Name, St#yecc.all_symbols), 840 Anno. 841 842symbol_member(Symbol, Symbols) -> 843 symbol_find(Symbol#symbol.name, Symbols) =/= false. 844 845symbol_find(Name, Symbols) -> 846 lists:keyfind(Name, #symbol.name, Symbols). 847 848states_and_goto_table(St0) -> 849 St1 = create_symbol_table(St0), 850 St = compute_states(St1), 851 create_precedence_table(St). 852 853parse_actions(St) -> 854 _ = erase(), % the pd is used when decoding lookahead sets 855 ParseActions = compute_parse_actions(St#yecc.n_states, St, []), 856 _ = erase(), 857 St#yecc{parse_actions = ParseActions, state_tab = []}. 858 859action_conflicts(St0) -> 860 St = find_action_conflicts(St0), 861 St#yecc{conflicts_done = true}. 862 863-record(state_info, {reduce_only, state_repr, comment}). 864 865write_file(St0) -> 866 #yecc{parse_actions = ParseActions, goto_tab = GotoTab} = St0, 867 Sorted = sort_parse_actions(ParseActions), 868 StateReprs = find_identical_shift_states(Sorted), 869 StateInfo = collect_some_state_info(Sorted, StateReprs), 870 StateJumps = find_partial_shift_states(Sorted, StateReprs), 871 UserCodeActions = find_user_code(Sorted, St0), 872 #yecc{infile = Infile, outfile = Outfile, 873 inport = Inport, outport = Outport, 874 nonterminals = Nonterminals} = St0, 875 {St10, N_lines, LastErlangCodeLine} = 876 output_prelude(Outport, Inport, St0), 877 St20 = St10#yecc{line = St10#yecc.line + N_lines}, 878 St25 = nl(St20), 879 St30 = output_file_directive(St25, Outfile, St25#yecc.line), 880 St40 = nl(St30), 881 St50 = output_actions(St40, StateJumps, StateInfo), 882 Go0 = [{Symbol,{From,To}} || {{From,Symbol},To} <- ets:tab2list(GotoTab)], 883 Go = family_with_domain(Go0, Nonterminals), 884 St60 = output_goto(St50, Go, StateInfo), 885 St70 = output_inlined(St60, UserCodeActions, Infile), 886 St = nl(St70), 887 case LastErlangCodeLine of 888 %% Just in case warnings or errors are emitted after the last 889 %% line of the file. 890 {last_erlang_code_line, Last_line} -> 891 output_file_directive(St, Infile, Last_line); 892 no_erlang_code -> 893 St 894 end. 895 896yecc_ret(St0) -> 897 St = check_expected(St0), 898 report_errors(St), 899 report_warnings(St), 900 Es = pack_errors(St#yecc.errors), 901 Ws = pack_warnings(St#yecc.warnings), 902 Werror = werror(St), 903 if 904 Werror -> 905 do_error_return(St, Es, Ws); 906 Es =:= [] -> 907 case member(return_warnings, St#yecc.options) of 908 true -> {ok, St#yecc.outfile, Ws}; 909 false -> {ok, St#yecc.outfile} 910 end; 911 true -> 912 do_error_return(St, Es, Ws) 913 end. 914 915do_error_return(St, Es, Ws) -> 916 case member(return_errors, St#yecc.options) of 917 true -> {error, Es, Ws}; 918 false -> error 919 end. 920 921check_expected(St0) -> 922 #yecc{shift_reduce = SR, reduce_reduce = RR, expect_shift_reduce = ExpSR, 923 n_states = NStates0, expect_n_states = ExpStates, 924 conflicts_done = Done} = St0, 925 N_RR = length(usort(RR)), 926 N_SR = length(usort(SR)), 927 St1 = if 928 not Done -> 929 St0; 930 N_SR =:= ExpSR, N_RR =:= 0 -> 931 St0; 932 true -> 933 add_warning(none, {conflicts, N_SR, N_RR}, St0) 934 end, 935 NStates = NStates0 + 1, 936 if 937 (not Done) or (ExpStates =:= []) or (NStates =:= ExpStates) -> 938 St1; 939 true -> 940 add_warning(none, {n_states, ExpStates, NStates}, St1) 941 end. 942 943pack_errors([{File,_} | _] = Es) -> 944 [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}]; 945pack_errors([]) -> 946 []. 947 948pack_warnings([{File,_} | _] = Ws) -> 949 [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}]; 950pack_warnings([]) -> 951 []. 952 953report_errors(St) -> 954 case member(report_errors, St#yecc.options) of 955 true -> 956 foreach(fun({File,{none,Mod,E}}) -> 957 io:fwrite(<<"~ts: ~ts\n">>, 958 [File,Mod:format_error(E)]); 959 ({File,{Location,Mod,E}}) -> 960 io:fwrite(<<"~ts:~s: ~ts\n">>, 961 [File,pos(Location),Mod:format_error(E)]) 962 end, sort(St#yecc.errors)); 963 false -> 964 ok 965 end. 966 967report_warnings(St) -> 968 Werror = member(warnings_as_errors, St#yecc.options), 969 Prefix = case Werror of 970 true -> ""; 971 false -> "Warning: " 972 end, 973 ReportWerror = Werror andalso member(report_errors, St#yecc.options), 974 case member(report_warnings, St#yecc.options) orelse ReportWerror of 975 true -> 976 foreach(fun({File,{none,Mod,W}}) -> 977 io:fwrite(<<"~ts: ~s~ts\n">>, 978 [File,Prefix, 979 Mod:format_error(W)]); 980 ({File,{Location,Mod,W}}) -> 981 io:fwrite(<<"~ts:~s: ~s~ts\n">>, 982 [File,pos(Location),Prefix, 983 Mod:format_error(W)]) 984 end, sort(St#yecc.warnings)); 985 false -> 986 ok 987 end. 988 989pos(Line) when is_integer(Line) -> 990 io_lib:format("~w", [Line]); 991pos({Line, Column}) when is_integer(Line), is_integer(Column) -> 992 io_lib:format("~w:~w", [Line, Column]). 993 994add_error(E, St) -> 995 add_error(none, E, St). 996 997add_error(Anno, E, St) -> 998 add_error(St#yecc.infile, Anno, E, St). 999 1000add_error(File, Anno, E, St) -> 1001 Loc = location(Anno), 1002 St#yecc{errors = [{File,{Loc,?MODULE,E}}|St#yecc.errors]}. 1003 1004add_errors(SymNames, E0, St0) -> 1005 foldl(fun(SymName, St) -> 1006 add_error(symbol_anno(SymName, St), {E0, SymName}, St) 1007 end, St0, SymNames). 1008 1009add_warning(Anno, W, St) -> 1010 Loc = location(Anno), 1011 St#yecc{warnings = [{St#yecc.infile,{Loc,?MODULE,W}}|St#yecc.warnings]}. 1012 1013add_warnings(SymNames, W0, St0) -> 1014 foldl(fun(SymName, St) -> 1015 add_warning(symbol_anno(SymName, St), {W0, SymName}, St) 1016 end, St0, SymNames). 1017 1018check_rhs([#symbol{name = '$empty'}], St) -> 1019 St; 1020check_rhs(Rhs, St0) -> 1021 case symbol_find('$empty', Rhs) of 1022 #symbol{anno = Anno} -> 1023 add_error(Anno, illegal_empty, St0); 1024 false -> 1025 foldl(fun(Sym, St) -> 1026 case symbol_member(Sym, St#yecc.all_symbols) of 1027 true -> 1028 St; 1029 false -> 1030 E = {undefined_symbol,Sym#symbol.name}, 1031 add_error(Sym#symbol.anno, E, St) 1032 end 1033 end, St0, Rhs) 1034 end. 1035 1036check_action(Tokens) -> 1037 case erl_parse:parse_exprs(add_roberts_dot(Tokens, erl_anno:new(0))) of 1038 {error, _Error} -> 1039 {false, false}; 1040 {ok, [Expr | Exprs]} -> 1041 IsGuard = Exprs =:= [] andalso erl_lint:is_guard_test(Expr), 1042 {IsGuard, true} 1043 end. 1044 1045add_roberts_dot([], Anno) -> 1046 [{'dot', Anno}]; 1047add_roberts_dot([{'dot', Anno} | _], _) -> 1048 [{'dot', Anno}]; 1049add_roberts_dot([Token | Tokens], _) -> 1050 [Token | add_roberts_dot(Tokens, element(2, Token))]. 1051 1052-define(PSEUDO_PREFIX, "___"). 1053-define(PSEUDO_VAR_1, '___1'). 1054 1055subst_pseudo_vars([], _, St) -> 1056 {[], St}; 1057subst_pseudo_vars([H0 | T0], NmbrOfDaughters, St0) -> 1058 {H, St1} = subst_pseudo_vars(H0, NmbrOfDaughters, St0), 1059 {T, St} = subst_pseudo_vars(T0, NmbrOfDaughters, St1), 1060 {[H | T], St}; 1061subst_pseudo_vars({atom, Anno, Atom}, NmbrOfDaughters, St0) -> 1062 case atom_to_list(Atom) of 1063 [$$ | Rest] -> 1064 try list_to_integer(Rest) of 1065 N when N > 0, N =< NmbrOfDaughters -> 1066 PseudoVarName = append(?PSEUDO_PREFIX, Rest), 1067 NewAnno = erl_anno:set_text(PseudoVarName, Anno), 1068 {{var, NewAnno, list_to_atom(PseudoVarName)}, St0}; 1069 _ -> 1070 St = add_error(Anno, 1071 {undefined_pseudo_variable, Atom}, 1072 St0), 1073 {{atom, Anno, '$undefined'}, St} 1074 catch 1075 error: _ -> {{atom, Anno, Atom}, St0} 1076 end; 1077 _ -> 1078 {{atom, Anno, Atom}, St0} 1079 end; 1080subst_pseudo_vars(Tuple, NmbrOfDaughters, St0) when is_tuple(Tuple) -> 1081 {L, St} = subst_pseudo_vars(tuple_to_list(Tuple), NmbrOfDaughters, St0), 1082 {list_to_tuple(L), St}; 1083subst_pseudo_vars(Something_else, _, St) -> 1084 {Something_else, St}. 1085 1086kind_of_symbol(St, SymName) -> 1087 case member(SymName, St#yecc.nonterminals) of 1088 false -> 1089 case member(SymName, St#yecc.terminals) of 1090 false -> 1091 case St#yecc.endsymbol of 1092 SymName -> 1093 endsymbol; 1094 _ -> 1095 unknown 1096 end; 1097 true -> 1098 terminal 1099 end; 1100 true -> 1101 nonterminal 1102 end. 1103 1104%%%%%%%%%%%%%%%%%%%%%%%%%%%% 1105% Computing parse states and goto table from grammar. 1106% Start item: {0, [Endsymbol]} <-> 1107% (['ACCEPT' '.', Rootsymbol], {'$'}) in Aho & Johnson 1108% where '$end' is the default end of input symbol of the 1109% scanner if no 'Endsymbol' has been declared in the syntax file. 1110 1111-record(tabs, { 1112 symbols, % ETS-set, keypos 1: {SymbolName, SymbolCode} 1113 inv_symbols, % ETS-set, keypos 2: {SymbolName, SymbolCode} 1114 state_id, % ETS-bag, keypos 1: {StateId, StateNum} 1115 % StateId is not unique for a state. 1116 rp_rhs, % rule pointer -> the remaining rhs symbols 1117 rp_info, % rule pointer -> expanding rules and lookahead 1118 goto % ETS-bag, keypos 1: first 1119 % {{FromStateNum, Symbol, ToStateNum}}, then 1120 % {{FromStateNum, Symbol}, ToStateNum} 1121 }). 1122 1123-record(item, { % what states are made of 1124 rule_pointer, 1125 look_ahead, 1126 rhs 1127 }). 1128 1129compute_states(St0) -> 1130 SymbolTab = St0#yecc.symbol_tab, 1131 CodedRules = map(fun(#rule{symbols = Syms} = R) -> 1132 R#rule{symbols = code_symbols(Syms, SymbolTab)} 1133 end, St0#yecc.rules_list), 1134 CodedNonterminals = code_symbols(St0#yecc.nonterminals, SymbolTab), 1135 %% Only coded in this phase; StC is thrown away. 1136 StC = St0#yecc{rules_list = CodedRules, 1137 rules = list_to_tuple(CodedRules), 1138 nonterminals = CodedNonterminals}, 1139 {RuleIndex, RulePointer2Rule} = 1140 make_rule_index(StC, St0#yecc.rules_list), 1141 StateTab0 = {}, 1142 StateIdTab = ets:new(yecc_state_id, [set]), 1143 GotoTab = ets:new(yecc_goto, [bag]), 1144 RulePointerRhs = make_rhs_index(StC#yecc.rules_list), 1145 RulePointerInfo = make_rule_pointer_info(StC, RulePointerRhs, RuleIndex), 1146 1147 Tables = #tabs{symbols = SymbolTab, 1148 state_id = StateIdTab, 1149 rp_rhs = RulePointerRhs, 1150 rp_info = RulePointerInfo, 1151 goto = GotoTab}, 1152 1153 _ = erase(), 1154 EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab), 1155 {StateId, State0} = compute_state([{EndsymCode, 1}], Tables), 1156 1157 StateNum0 = first_state(), 1158 FirstState = {StateNum0, State0}, 1159 StateTab1 = insert_state(Tables, StateTab0, FirstState, StateId), 1160 {StateTab, N} = 1161 compute_states1([{StateNum0, get_current_symbols(State0)}], 1162 FirstState, StateTab1, Tables), 1163 true = ets:delete(StateIdTab), 1164 St = St0#yecc{state_tab = StateTab, goto_tab = GotoTab, n_states = N, 1165 rule_pointer2rule = RulePointer2Rule}, 1166 decode_goto(GotoTab, St#yecc.inv_symbol_tab), 1167 check_usage(St). 1168 1169first_state() -> 1170 0. 1171 1172decode_goto(GotoTab, InvSymTab) -> 1173 G = ets:tab2list(GotoTab), 1174 ets:delete_all_objects(GotoTab), 1175 ets:insert(GotoTab, 1176 map(fun({{From, Sym, Next}}) -> 1177 {{From, decode_symbol(Sym, InvSymTab)}, Next} 1178 end, G)). 1179 1180check_usage(St0) -> 1181 SelSyms = ets:fun2ms(fun({{_,Sym},_}) -> Sym end), 1182 UsedSymbols = ets:select(St0#yecc.goto_tab, SelSyms), 1183 Syms = ordsets:from_list([?ACCEPT, '$empty' | UsedSymbols]), 1184 NonTerms = ordsets:from_list(St0#yecc.nonterminals), 1185 UnusedNonTerms = ordsets:to_list(ordsets:subtract(NonTerms, Syms)), 1186 St1 = add_warnings(UnusedNonTerms, unused_nonterminal, St0), 1187 Terms = ordsets:from_list(St0#yecc.terminals), 1188 St2 = add_warnings(ordsets:to_list(ordsets:subtract(Terms, Syms)), 1189 unused_terminal, St1), 1190 DefinedNonTerminals = map(fun(#rule{symbols = [Name | _]}) -> 1191 Name 1192 end, St2#yecc.rules_list), 1193 DefNonTerms = ordsets:from_list(DefinedNonTerminals), 1194 UndefNonTerms = ordsets:subtract(NonTerms, DefNonTerms), 1195 add_errors(ordsets:to_list(ordsets:subtract(UndefNonTerms, 1196 UnusedNonTerms)), 1197 missing_syntax_rule, St2). 1198 1199%% States are sometimes big, should not be copied to ETS tables. 1200%% Here an "extendible" tuple is used. 1201lookup_state(StateTab, N) -> 1202 element(N+1, StateTab). 1203 1204insert_state(#tabs{state_id = StateIdTab}, StateTab0, State, StateId) -> 1205 {N, _Items} = State, 1206 insert_state_id(StateIdTab, N, StateId), 1207 StateTab = if 1208 tuple_size(StateTab0) > N -> 1209 StateTab0; 1210 true -> 1211 list_to_tuple(tuple_to_list(StateTab0) ++ 1212 lists:duplicate(round(1 + N * 1.5), [])) 1213 end, 1214 setelement(N+1, StateTab, State). 1215 1216insert_state_id(StateIdTab, N, StateId) -> 1217 true = ets:insert(StateIdTab, {StateId, N}). 1218 1219compute_states1([], {N, _}=_CurrState, StateTab0, _Tables) -> 1220 {StateTab0, N}; 1221compute_states1([{N, Symbols} | Try], CurrState, StateTab, Tables) -> 1222 {_N, S} = lookup_state(StateTab, N), 1223 Seeds = state_seeds(S, Symbols), 1224 compute_states2(Seeds, N, Try, CurrState, StateTab, Tables). 1225 1226compute_states2([], _N, Try, CurrState, StateTab, Tables) -> 1227 compute_states1(Try, CurrState, StateTab, Tables); 1228compute_states2([{Sym,Seed} | Seeds], N, Try, CurrState, StateTab, Tables) -> 1229 {StateId, NewState} = compute_state(Seed, Tables), 1230 case check_states(NewState, StateId, StateTab, Tables) of 1231 add -> 1232 {M, _} = CurrState, 1233 %% io:fwrite(<<"Adding state ~w\n">>, [M + 1]), 1234 CurrentSymbols = get_current_symbols(NewState), 1235 Next = M + 1, 1236 NextState = {Next, NewState}, 1237 NewStateTab = insert_state(Tables, StateTab, NextState, StateId), 1238 insert_goto(Tables, N, Sym, Next), 1239 compute_states2(Seeds, N, [{Next, CurrentSymbols} | Try], 1240 NextState, NewStateTab, Tables); 1241 {old, M} -> 1242 %% io:fwrite(<<"Identical to old state ~w\n">>, [M]), 1243 insert_goto(Tables, N, Sym, M), 1244 compute_states2(Seeds, N, Try, CurrState, StateTab, Tables); 1245 {merge, M, NewCurrent} -> 1246 %% io:fwrite(<<"Merging with state ~w\n">>, [M]), 1247 Try1 = case lists:keyfind(M, 1, Try) of 1248 false -> 1249 [{M, NewCurrent} | Try]; 1250 {_, OldCurrent} -> 1251 case ordsets:is_subset(NewCurrent, OldCurrent) of 1252 true -> 1253 Try; 1254 false -> 1255 [{M, ordsets:union(NewCurrent, OldCurrent)} 1256 | keydelete(M, 1, Try)] 1257 end 1258 end, 1259 NewStateTab = merge_states(NewState, StateTab, Tables, M,StateId), 1260 insert_goto(Tables, N, Sym, M), 1261 compute_states2(Seeds, N, Try1, CurrState, NewStateTab, Tables) 1262 end. 1263 1264insert_goto(Tables, From, Sym, To) -> 1265 true = ets:insert(Tables#tabs.goto, {{From, Sym, To}}). 1266 1267%% Create an ets table for faster lookups. 1268create_symbol_table(St) -> 1269 #yecc{terminals = Terminals, endsymbol = Endsymbol} = St, 1270 SymbolTab = ets:new(yecc_symbols, [{keypos,1}]), 1271 %% '$empty' is always assigned 0 1272 Ts = ['$empty', Endsymbol | delete('$empty', Terminals)], 1273 TsC = count(0, Ts), 1274 NTsC = map(fun({NT,I}) -> {NT,-I} end, count(1, St#yecc.nonterminals)), 1275 Cs = TsC++NTsC, 1276 true = ets:insert(SymbolTab, Cs), 1277 1278 InvSymTable = ets:new(yecc_inverted_terminals, [{keypos,2}]), 1279 true = ets:insert(InvSymTable, Cs), 1280 1281 St#yecc{symbol_tab = SymbolTab, inv_symbol_tab = InvSymTable}. 1282 1283get_current_symbols(State) -> 1284 usort(get_current_symbols1(State, [])). 1285 1286get_current_symbols1([], Syms) -> 1287 Syms; 1288get_current_symbols1([#item{rhs = Rhs} | Items], Syms) -> 1289 case Rhs of 1290 [] -> 1291 get_current_symbols1(Items, Syms); 1292 [Symbol | _] -> 1293 get_current_symbols1(Items, [Symbol | Syms]) 1294 end. 1295 1296state_seeds(Items, Symbols) -> 1297 L = [{S,{LA,RP + 1}} || #item{rule_pointer = RP, look_ahead = LA, 1298 rhs = [S | _]} <- Items], 1299 state_seeds1(keysort(1, L), Symbols). 1300 1301state_seeds1(_L, []) -> 1302 []; 1303state_seeds1(L, [Symbol | Symbols]) -> 1304 state_seeds(L, Symbol, Symbols, []). 1305 1306state_seeds([{Symbol, Item} | L], Symbol, Symbols, Is) -> 1307 state_seeds(L, Symbol, Symbols, [Item | Is]); 1308state_seeds([{S, _Item} | L], Symbol, Symbols, Is) when S < Symbol -> 1309 state_seeds(L, Symbol, Symbols, Is); 1310state_seeds(L, Symbol, Symbols, Is) -> 1311 [{Symbol, Is} | state_seeds1(L, Symbols)]. 1312 1313compute_state(Seed, Tables) -> 1314 RpInfo = Tables#tabs.rp_info, 1315 foreach(fun({LA, RulePointer}) -> put(RulePointer, LA) end, Seed), 1316 foreach(fun({LA, RP}) -> compute_closure(LA, RP, RpInfo) end, Seed), 1317 Closure = keysort(1, erase()), 1318 state_items(Closure, [], [], Tables#tabs.rp_rhs). 1319 1320%% Collects a uniqe id for the state (all rule pointers). 1321state_items([{RP, LA} | L], Is, Id, RpRhs) -> 1322 I = #item{rule_pointer = RP, look_ahead = LA, rhs = element(RP, RpRhs)}, 1323 state_items(L, [I | Is], [RP | Id], RpRhs); 1324state_items(_, Is, Id, _RpRhs) -> 1325 {Id, Is}. 1326 1327-compile({inline,[compute_closure/3]}). 1328compute_closure(Lookahead, RulePointer, RpInfo) -> 1329 case element(RulePointer, RpInfo) of 1330 []=Void -> % no followers, or terminal 1331 Void; 1332 {no_union, ExpandingRules, NewLookahead} -> 1333 compute_closure1(ExpandingRules, NewLookahead, RpInfo); 1334 {union, ExpandingRules, Lookahead0} -> 1335 NewLookahead = set_union(Lookahead0, Lookahead), 1336 compute_closure1(ExpandingRules, NewLookahead, RpInfo); 1337 ExpandingRules -> 1338 compute_closure1(ExpandingRules, Lookahead, RpInfo) 1339 end. 1340 1341compute_closure1([RulePointer | Tail], NewLookahead, RpInfo) -> 1342 compute_closure1(Tail, NewLookahead, RpInfo), 1343 case get(RulePointer) of 1344 undefined -> % New 1345 put(RulePointer, NewLookahead), 1346 compute_closure(NewLookahead, RulePointer, RpInfo); 1347 Lookahead2 -> 1348 Lookahead = set_union(Lookahead2, NewLookahead), 1349 if 1350 Lookahead =:= Lookahead2 -> % Old 1351 Lookahead2; % void() 1352 true -> % Merge 1353 put(RulePointer, Lookahead), 1354 compute_closure(NewLookahead, RulePointer, RpInfo) 1355 end 1356 end; 1357compute_closure1(Nil, _, _RpInfo) -> 1358 Nil. 1359 1360%% Check if some old state is a superset of our NewState 1361check_states(NewState, StateId, StateTab, #tabs{state_id = StateIdTab}) -> 1362 try ets:lookup_element(StateIdTab, StateId, 2) of 1363 N -> 1364 {_N, OldState} = lookup_state(StateTab, N), 1365 check_state1(NewState, OldState, [], N) 1366 catch error:_ -> add 1367 end. 1368 1369check_state1([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], 1370 [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> 1371 case set_is_subset(Lookahead1, Lookahead2) of 1372 true -> 1373 check_state1(Items1, Items2, Symbols, N); 1374 false -> 1375 case Rhs of 1376 [] -> 1377 check_state2(Items1, Items2, Symbols, N); 1378 [Symbol | _] -> 1379 check_state2(Items1, Items2, [Symbol | Symbols], N) 1380 end 1381 end; 1382check_state1([], [], _Symbols, N) -> 1383 {old, N}. 1384 1385check_state2([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1], 1386 [#item{look_ahead = Lookahead2} | Items2], Symbols, N) -> 1387 case set_is_subset(Lookahead1, Lookahead2) of 1388 true -> 1389 check_state2(Items1, Items2, Symbols, N); 1390 false -> 1391 case Rhs of 1392 [] -> 1393 check_state2(Items1, Items2, Symbols, N); 1394 [Symbol | _] -> 1395 check_state2(Items1, Items2, [Symbol | Symbols], N) 1396 end 1397 end; 1398check_state2([], [], Symbols, N) -> 1399 {merge, N, usort(Symbols)}. 1400 1401merge_states(NewState, StateTab, Tables, M, StateId) -> 1402 {_M, Old_state} = lookup_state(StateTab, M), 1403 MergedState = merge_states1(NewState, Old_state), 1404 insert_state(Tables, StateTab, {M, MergedState}, StateId). 1405 1406merge_states1([Item1 | Items1], [Item2 | Items2]) -> 1407 LA1 = Item1#item.look_ahead, 1408 LA2 = Item2#item.look_ahead, 1409 if 1410 LA1 =:= LA2 -> 1411 [Item1 | merge_states1(Items1, Items2)]; 1412 true -> 1413 [Item1#item{look_ahead = set_union(LA1, LA2)} 1414 | merge_states1(Items1, Items2)] 1415 end; 1416merge_states1(_, _) -> 1417 []. 1418 1419%% RulePointer -> Rhs. Every position Rhs in has its unique "rule pointer". 1420make_rhs_index(RulesList) -> 1421 Index = flatmap(fun(#rule{symbols = [_Non | Daughters]}) -> 1422 suffixes0(Daughters) 1423 end, RulesList), 1424 list_to_tuple(Index). 1425 1426suffixes0([?EMPTY]) -> 1427 [[], []]; 1428suffixes0(L) -> 1429 suffixes(L). 1430 1431suffixes([]=L) -> 1432 [L]; 1433suffixes([_ | T]=L) -> 1434 [L | suffixes(T)]. 1435 1436%% Setup info about lookahead and expanding rules for each point 1437%% ("rule pointer") in the grammar. 1438make_rule_pointer_info(StC, RpRhs, RuleIndex) -> 1439 SymbolTab = StC#yecc.symbol_tab, 1440 LcTab = make_left_corner_table(StC), 1441 LA_index = map(fun(Syms) -> 1442 rp_info(Syms, SymbolTab, LcTab, RuleIndex) 1443 end, tuple_to_list(RpRhs)), 1444 list_to_tuple(LA_index). 1445 1446rp_info([], _SymbolTab, _LcTab, _RuleIndex) -> 1447 []; 1448rp_info([Category | Followers], SymbolTab, LcTab, RuleIndex) -> 1449 case maps:find(Category, RuleIndex) of 1450 error -> % terminal 1451 []; 1452 {ok, ExpandingRules} when Followers =:= [] -> 1453 ExpandingRules; 1454 {ok, ExpandingRules} -> 1455 case make_lookahead(Followers, SymbolTab, LcTab, set_empty()) of 1456 {empty, LA} -> 1457 {union, ExpandingRules, LA}; 1458 LA -> 1459 {no_union, ExpandingRules, LA} 1460 end 1461 end. 1462 1463%% Lookahead computation is complicated by the possible existence 1464%% of null string rewriting rules, such as A -> '$empty'. 1465make_lookahead([], _, _, LA) -> 1466 {empty, LA}; 1467make_lookahead([Symbol | Symbols], SymbolTab, LcTab, LA) -> 1468 case maps:find(Symbol, LcTab) of 1469 {ok, LeftCorner} -> % nonterminal 1470 case empty_member(LeftCorner) of 1471 true -> 1472 make_lookahead(Symbols, SymbolTab, LcTab, 1473 set_union(empty_delete(LeftCorner), LA)); 1474 false -> 1475 set_union(LeftCorner, LA) 1476 end; 1477 error -> % terminal 1478 set_add(Symbol, LA) 1479 end. 1480 1481%% -> map-of({Nonterminal, [Terminal]}). 1482%% The algorithm FIRST/1 from the Dragon Book. 1483%% Left corner table, all terminals (including '$empty') that can 1484%% begin strings generated by Nonterminal. 1485make_left_corner_table(#yecc{rules_list = RulesList} = St) -> 1486 SymbolTab = left_corner_symbol_table(St), 1487 Rules = map(fun(#rule{symbols = [Lhs | Rhs]}) -> 1488 {Lhs,{Lhs, Rhs}} 1489 end, RulesList), 1490 LeftHandTab = maps:from_list(family(Rules)), 1491 X0 = [{S,H} || {H,{H,Rhs}} <- Rules, 1492 S <- Rhs, 1493 not is_terminal(SymbolTab, S)], 1494 XL = family_with_domain(X0, St#yecc.nonterminals), 1495 X = maps:from_list(XL), 1496 Xref = fun(NT) -> maps:get(NT, X) end, 1497 E = set_empty(), 1498 LC0 = maps:from_list([{H, E} || {H,_} <- XL]), 1499 %% Handle H -> a S, where a is a terminal ('$empty' inclusive). 1500 {Q, LC1} = 1501 foldl(fun({H,{H,[S | _]}}, {Q0, LC}) -> 1502 case ets:lookup(SymbolTab, S) of 1503 [{_,Num}=SymbolAndNum] when Num >= 0 -> 1504 F = set_add_terminal(SymbolAndNum, E), 1505 {[Xref(H) | Q0], upd_first(H, F, LC)}; 1506 _ -> 1507 {Q0, LC} 1508 end 1509 end, {[], LC0}, Rules), 1510 left_corners(Q, LC1, LeftHandTab, SymbolTab, Xref). 1511 1512left_corners(Q0, LC0, LeftHandTab, SymbolTab, Xref) -> 1513 case usort(append(Q0)) of 1514 [] -> 1515 LC0; 1516 Q1 -> 1517 Rs = flatmap(fun(NT) -> maps:get(NT, LeftHandTab) end, Q1), 1518 {LC, Q} = left_corners2(Rs, LC0, [], SymbolTab, Xref), 1519 left_corners(Q, LC, LeftHandTab, SymbolTab, Xref) 1520 end. 1521 1522left_corners2([], LC, Q, _SymbolTab, _Xref) -> 1523 {LC, Q}; 1524left_corners2([{Head,Rhs} | Rs], LC, Q0, SymbolTab, Xref) -> 1525 Ts = left_corner_rhs(Rhs, Head, LC, set_empty(), SymbolTab), 1526 First0 = maps:get(Head, LC), 1527 case set_is_subset(Ts, First0) of 1528 true -> 1529 left_corners2(Rs, LC, Q0, SymbolTab, Xref); 1530 false -> 1531 LC1 = upd_first(Head, Ts, LC), 1532 left_corners2(Rs, LC1, [Xref(Head) | Q0], SymbolTab, Xref) 1533 end. 1534 1535upd_first(NT, Ts, LC) -> 1536 maps:update_with(NT, fun(First) -> set_union(First, Ts) end, LC). 1537 1538left_corner_rhs([S | Ss], Head, LC, Ts, SymbolTab) -> 1539 case ets:lookup(SymbolTab, S) of 1540 [{_,Num}=SymbolAndNum] when Num >= 0 -> 1541 set_add_terminal(SymbolAndNum, Ts); 1542 [_NonTerminalSymbol] -> 1543 First = maps:get(S, LC), 1544 case empty_member(First) of 1545 true -> 1546 NTs = set_union(empty_delete(First), Ts), 1547 left_corner_rhs(Ss, Head, LC, NTs, SymbolTab); 1548 false -> 1549 set_union(First, Ts) 1550 end 1551 end; 1552left_corner_rhs([], _Head, _LC, Ts, _SymbolTab) -> 1553 set_add(?EMPTY, Ts). 1554 1555%% For every non-terminal return a list of "rule pointers" for rules 1556%% expanding the non-terminal. 1557%% Also assigns a unique number to each point in the grammar, "rule pointer". 1558make_rule_index(#yecc{nonterminals = Nonterminals, 1559 rules_list = RulesList}, RulesListNoCodes) -> 1560 {RulesL, _N} = 1561 lists:mapfoldl(fun(#rule{symbols = [Nonterminal | Daughters]}, I) -> 1562 I1 = I + length(Daughters)+1, 1563 {{Nonterminal, I}, I1} 1564 end, 1, RulesList), 1565 IndexedTab = family_with_domain(RulesL, Nonterminals), 1566 1567 Symbol2Rule = [{Foo,R} || #rule{symbols = Symbols}=R <- RulesListNoCodes, 1568 Foo <- Symbols], 1569 Pointer2Rule = [{I, R} || {{_Foo,R},I} <- count(1, Symbol2Rule)], 1570 {maps:from_list(IndexedTab), maps:from_list(Pointer2Rule)}. 1571 1572%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 1573% Computing parse action table from list of states and goto table: 1574 1575compute_parse_actions(N, St, StateActions) -> 1576 case N < first_state() of 1577 true -> 1578 StateActions; 1579 false -> 1580 {N, StateN} = lookup_state(St#yecc.state_tab, N), 1581 %% There can be duplicates in Actions. 1582 Actions = compute_parse_actions1(StateN, N, St), 1583 compute_parse_actions(N - 1, St, [{N, Actions} | StateActions]) 1584 end. 1585 1586compute_parse_actions1([], _, _) -> 1587 []; 1588compute_parse_actions1([#item{rule_pointer = RulePointer, 1589 look_ahead = Lookahead0, 1590 rhs = Rhs} | Items], N, St) -> 1591 case Rhs of 1592 [] -> 1593 Lookahead = decode_terminals(Lookahead0, St#yecc.inv_symbol_tab), 1594 case rule(RulePointer, St) of 1595 {[?ACCEPT | _], _RuleLocation, _} -> 1596 [{Lookahead, accept} 1597 | compute_parse_actions1(Items, N, St)]; 1598 %% Head is placed after the daughters when finding the 1599 %% precedence. This is how giving precedence to 1600 %% non-terminals takes effect. 1601 {[Head | Daughters0], _RuleLocation, _} -> 1602 Daughters = delete('$empty', Daughters0), 1603 [{Lookahead, 1604 #reduce{rule_nmbr = RulePointer, head = Head, 1605 nmbr_of_daughters = length(Daughters), 1606 prec = get_prec(Daughters ++ [Head], St)}} 1607 | compute_parse_actions1(Items, N, St)] 1608 end; 1609 [Symbol | Daughters] -> 1610 case is_terminal(St#yecc.symbol_tab, Symbol) of 1611 true -> 1612 DecSymbol = decode_symbol(Symbol, St#yecc.inv_symbol_tab), 1613 {[Head | _], _RuleLocation, _} = rule(RulePointer, St), 1614 %% A bogus shift-shift conflict can be introduced 1615 %% here if some terminal occurs in different rules 1616 %% which have been given precedence "one level up". 1617 Prec1 = case Daughters of 1618 [] -> get_prec([DecSymbol, Head], St); 1619 _ -> get_prec([DecSymbol], St) 1620 end, 1621 Pos = case Daughters of 1622 [] -> z; 1623 _ -> a 1624 end, 1625 [{[DecSymbol], 1626 #shift{state = goto(N, DecSymbol, St), 1627 pos = Pos, 1628 prec = Prec1, 1629 rule_nmbr = RulePointer}} 1630 | compute_parse_actions1(Items, N, St)]; 1631 false -> 1632 compute_parse_actions1(Items, N, St) 1633 end 1634 end. 1635 1636get_prec(Symbols, St) -> 1637 get_prec1(Symbols, St#yecc.prec_tab, {0, none}). 1638 1639get_prec1([], _, P) -> 1640 P; 1641get_prec1([Symbol | T], PrecTab, P) -> 1642 case ets:lookup(PrecTab, Symbol) of 1643 [] -> 1644 get_prec1(T, PrecTab, P); 1645 [{_, N, Ass}] -> 1646 get_prec1(T, PrecTab, {N, Ass}) 1647 end. 1648 1649create_precedence_table(St) -> 1650 PrecTab = ets:new(yecc_precedences, []), 1651 true = ets:insert(PrecTab, St#yecc.prec), 1652 St#yecc{prec_tab = PrecTab}. 1653 1654-record(cxt, {terminal, state_n, yecc, res}). 1655 1656%% Detects shift-reduce and reduce-reduce conflicts. 1657%% Also removes all but one conflicting action. As a consequence the 1658%% lookahead sets for a state are always disjoint. 1659%% Reduce/reduce conflicts are considered errors. 1660find_action_conflicts(St0) -> 1661 Cxt0 = #cxt{yecc = St0, res = []}, 1662 {#cxt{yecc = St, res = Res}, NewParseActions0} = 1663 foldl(fun({N, Actions0}, {Cxt1, StateActions}) -> 1664 L = [{Terminal, Act} || {Lookahead, Act} <- Actions0, 1665 Terminal <- Lookahead], 1666 {Cxt, Actions} = 1667 foldl(fun({Terminal, As}, {Cxt2,Acts0}) -> 1668 Cxt3 = Cxt2#cxt{terminal = Terminal, 1669 state_n = N}, 1670 {Action, Cxt} = 1671 find_action_conflicts2(As, Cxt3), 1672 {Cxt,[{Action,Terminal} | Acts0]} 1673 end, {Cxt1,[]}, family(L)), 1674 {Cxt,[{N,inverse(family(Actions))} | StateActions]} 1675 end, {Cxt0, []}, St0#yecc.parse_actions), 1676 if 1677 length(Res) > 0, St#yecc.verbose -> 1678 io:fwrite(<<"\n*** Conflicts resolved by operator " 1679 "precedences:\n\n">>), 1680 foreach(fun({Confl, Name}) -> 1681 report_conflict(Confl, St, Name, prec) 1682 end, reverse(Res)), 1683 io:fwrite(<<"*** End of resolved conflicts\n\n">>); 1684 true -> 1685 ok 1686 end, 1687 NewParseActions = reverse(NewParseActions0), 1688 St#yecc{parse_actions = NewParseActions}. 1689 1690find_action_conflicts2([Action], Cxt) -> 1691 {Action, Cxt}; 1692find_action_conflicts2([#shift{state = St, pos = Pos, prec = Prec}, 1693 #shift{state = St}=S | As], 1694 Cxt) when Pos =:= a; Prec =:= {0,none} -> 1695 %% This is a kludge to remove the bogus shift-shift conflict 1696 %% introduced in compute_parse_actions1(). 1697 find_action_conflicts2([S | As], Cxt); 1698find_action_conflicts2([#shift{state = NewState, pos = z}=S1, 1699 #shift{state = NewState}=S2 | _], Cxt) -> 1700 %% This is even worse than last clause. Give up. 1701 Confl = conflict(S1, S2, Cxt), 1702 #cxt{yecc = St0} = Cxt, 1703 St = conflict_error(Confl, St0), 1704 {S1, Cxt#cxt{yecc = St}}; % return any action 1705find_action_conflicts2([#shift{prec = {P1, Ass1}}=S | Rs], Cxt0) -> 1706 {R, Cxt1} = find_reduce_reduce(Rs, Cxt0), 1707 #cxt{res = Res0, yecc = St0} = Cxt1, 1708 #reduce{prec = {P2, Ass2}} = R, 1709 Confl = conflict(R, S, Cxt1), 1710 if 1711 P1 > P2 -> 1712 {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; 1713 P2 > P1 -> 1714 {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; 1715 Ass1 =:= left, Ass2 =:= left -> 1716 {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}}; 1717 Ass1 =:= right, Ass2 =:= right -> 1718 {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}}; 1719 Ass1 =:= nonassoc, Ass2 =:= nonassoc -> 1720 {nonassoc, Cxt1}; 1721 P1 =:= 0, P2 =:= 0 -> 1722 report_conflict(Confl, St0, shift, default), 1723 St = add_conflict(Confl, St0), 1724 {S, Cxt1#cxt{yecc = St}}; 1725 true -> 1726 St = conflict_error(Confl, St0), 1727 {S, Cxt1#cxt{yecc = St}} % return any action 1728 end; 1729find_action_conflicts2(Rs, Cxt0) -> 1730 find_reduce_reduce(Rs, Cxt0). 1731 1732find_reduce_reduce([R], Cxt) -> 1733 {R, Cxt}; 1734find_reduce_reduce([accept=A, #reduce{}=R | Rs], Cxt0) -> 1735 Confl = conflict(R, A, Cxt0), 1736 St = conflict_error(Confl, Cxt0#cxt.yecc), 1737 Cxt = Cxt0#cxt{yecc = St}, 1738 find_reduce_reduce([R | Rs], Cxt); 1739find_reduce_reduce([#reduce{head = Categ1, prec = {P1, _}}=R1, 1740 #reduce{head = Categ2, prec = {P2, _}}=R2 | Rs], Cxt0) -> 1741 #cxt{res = Res0, yecc = St0} = Cxt0, 1742 Confl = conflict(R1, R2, Cxt0), 1743 {R, Res, St} = 1744 if 1745 P1 > P2 -> 1746 {R1, [{Confl, Categ1} | Res0], St0}; 1747 P2 > P1 -> 1748 {R2, [{Confl, Categ2} | Res0], St0}; 1749 true -> 1750 St1 = conflict_error(Confl, St0), 1751 {R1, Res0, St1} 1752 end, 1753 Cxt = Cxt0#cxt{res = Res, yecc = St}, 1754 find_reduce_reduce([R | Rs], Cxt). 1755 1756%% Since the lookahead sets are disjoint (assured by 1757%% find_action_conflicts), the order between actions can be chosen 1758%% almost arbitrarily. nonassoc has to come last, though (but is later 1759%% discarded!). And shift has to come before reduce. 1760sort_parse_actions([]) -> 1761 []; 1762sort_parse_actions([{N, La_actions} | Tail]) -> 1763 [{N, sort_parse_actions1(La_actions)} | sort_parse_actions(Tail)]. 1764 1765sort_parse_actions1(LaActions) -> 1766 As = filter(fun({_LA, A}) -> A =:= accept end, LaActions), 1767 Ss = filter(fun({_LA, A}) -> is_record(A, shift) end, LaActions), 1768 Rs = filter(fun({_LA, A}) -> is_record(A, reduce) end, LaActions), 1769 Ns = filter(fun({_LA, A}) -> A =:= nonassoc end, LaActions), 1770 As ++ Ss ++ Rs ++ Ns. 1771 1772%% -> {State, StateRepr}. StateRepr has the same set of shift actions 1773%% as State. No code will be output for State if State =/= StateRepr. 1774find_identical_shift_states(StateActions) -> 1775 L1 = [{Actions, State} || {State,Actions} <- StateActions], 1776 {SO, NotSO} = lists:partition(fun({Actions,_States}) -> 1777 shift_actions_only(Actions) 1778 end, family(L1)), 1779 R = [{State, hd(States)} || {_Actions, States} <- SO, State <- States] 1780 ++ 1781 [{State, State} || {_Actions, States} <- NotSO, State <- States], 1782 lists:keysort(1, R). 1783 1784-record(part_data, {name, eq_state, actions, n_actions, states}). 1785 1786%% Replace {SStates,Actions} with {SStates,{Actions,Jump}} where 1787%% Jump describes which clauses that have been extracted from shift 1788%% states so that they can be used from other states. Some space is 1789%% saved. 1790find_partial_shift_states(StateActionsL, StateReprs) -> 1791 L = [{State, Actions} || {{State,Actions}, {State,State}} <- 1792 lists:zip(StateActionsL, StateReprs), 1793 shift_actions_only(Actions)], 1794 StateActions = sofs:family(L, [{state,[action]}]), 1795 StateAction = sofs:family_to_relation(StateActions), 1796 1797 %% Two actions are equal if they occur in the same states: 1798 Parts = sofs:partition(sofs:range(StateActions)), 1799 PartsL = sofs:to_external(Parts), 1800 %% Assign temporary names to the parts of the partition (of actions): 1801 PartNameL = lists:zip(seq1(length(PartsL)), PartsL), 1802 ActPartL = [{Action,PartName} || 1803 {PartName,Actions} <- PartNameL, 1804 Action <- Actions], 1805 ActionPartName = sofs:relation(ActPartL, [{action,partname}]), 1806 StatePartName = sofs:relative_product(StateAction, ActionPartName), 1807 PartInStates = sofs:relation_to_family(sofs:converse(StatePartName)), 1808 1809 %% Parts that equal all actions of a state: 1810 PartActions = sofs:family(PartNameL, [{partname,[action]}]), 1811 PartState = 1812 sofs:relative_product(PartActions, sofs:converse(StateActions)), 1813 PartStates = sofs_family_with_domain(PartState, sofs:domain(PartActions)), 1814 1815 PartDataL = [#part_data{name = Nm, eq_state = EqS, actions = P, 1816 n_actions = length(P), 1817 states = ordsets:from_list(S)} || 1818 {{Nm,P}, {Nm,S}, {Nm,EqS}} <- 1819 lists:zip3(PartNameL, 1820 sofs:to_external(PartInStates), 1821 sofs:to_external(PartStates))], 1822 true = length(PartDataL) =:= length(PartNameL), 1823 Ps = select_parts(PartDataL), 1824 1825 J1 = [{State, Actions, {jump_some,hd(States)}} || 1826 {_W, #part_data{actions = Actions, eq_state = [], 1827 states = States}} <- Ps, 1828 State <- States], 1829 J2 = [{State, Actions, {jump_all,To}} || 1830 {_W, #part_data{actions = Actions, eq_state = EqS, 1831 states = States}} <- Ps, 1832 To <- EqS, 1833 State <- States, 1834 State =/= To], 1835 J = lists:keysort(1, J1 ++ J2), 1836 1837 JumpStates = ordsets:from_list([S || {S,_,_} <- J]), 1838 {JS, NJS} = 1839 sofs:partition(1, sofs:relation(StateActionsL, [{state, actions}]), 1840 sofs:set(JumpStates, [state])), 1841 R = 1842 [{S, {Actions,jump_none}} || {S,Actions} <- sofs:to_external(NJS)] 1843 ++ 1844 [{S, {Actions--Part, {Tag,ToS,Part}}} || 1845 {{S,Actions}, {S,Part,{Tag,ToS}}} <- 1846 lists:zip(sofs:to_external(JS), J)], 1847 true = length(StateActionsL) =:= length(R), 1848 lists:keysort(1, R). 1849 1850%% Very greedy. By no means optimal. 1851select_parts([]) -> 1852 []; 1853select_parts(PartDataL) -> 1854 T1 = [{score(PD), PD} || PD <- PartDataL], 1855 [{W, PD} | Ws] = lists:reverse(lists:keysort(1, T1)), 1856 #part_data{n_actions = NActions, states = S} = PD, 1857 if 1858 W < 8 -> % don't bother 1859 []; 1860 true -> 1861 %% Cannot extract more clauses from the chosen part's states: 1862 NL = [D#part_data{states = NewS} || 1863 {W1, #part_data{states = S0}=D} <- Ws, 1864 W1 > 0, 1865 (NewS = ordsets:subtract(S0, S)) =/= []], 1866 if 1867 length(S) =:= 1; NActions =:= 1 -> 1868 select_parts(NL); 1869 true -> 1870 [{W,PD} | select_parts(NL)] 1871 end 1872 end. 1873 1874%% Does it pay off to extract clauses into a new function? 1875%% Assumptions: 1876%% - a call costs 8 (C = 8); 1877%% - a clause (per action) costs 20 plus 8 (select) (Cl = 28); 1878%% - a new function costs 20 (funinfo) plus 16 (select) (F = 36). 1879%% A is number of actions, S is number of states. 1880%% Special case (the part equals all actions of some state): 1881%% C * (S - 1) < (S - 1) * A * Cl 1882%% Normal case (introduce new function): 1883%% F + A * Cl + C * S < S * A * Cl 1884score(#part_data{n_actions = NActions, eq_state = [], states = S}) -> 1885 (length(S) * NActions * 28) - (36 + NActions * 28 + length(S) * 8); 1886score(#part_data{n_actions = NActions, states = S}) -> 1887 ((length(S) - 1) * NActions * 28) - (8 * (length(S) - 1)). 1888 1889shift_actions_only(Actions) -> 1890 length([foo || {_Ts,{shift,_,_,_,_}} <- Actions]) =:= length(Actions). 1891 1892collect_some_state_info(StateActions, StateReprs) -> 1893 RF = fun({_LA, A}) -> is_record(A, reduce) end, 1894 L = [{State, 1895 begin 1896 RO = lists:all(RF, LaActions), 1897 %% C is currently always ""; identical states are all shift. 1898 C = [io_lib:fwrite(<<" %% ~w\n">>, [State]) || 1899 true <- [RO], Repr =/= State], 1900 #state_info{reduce_only = RO, state_repr = Repr, comment = C} 1901 end} || 1902 {{State, LaActions}, {State, Repr}} <- 1903 lists:zip(StateActions, StateReprs)], 1904 list_to_tuple(L). 1905 1906conflict_error(Conflict, St0) -> 1907 St1 = add_conflict(Conflict, St0), 1908 add_error({conflict, Conflict}, St1). 1909 1910report_conflict(Conflict, St, ActionName, How) -> 1911 if 1912 St#yecc.verbose -> 1913 io:fwrite(<<"~s\n">>, [format_conflict(Conflict)]), 1914 Formated = format_symbol(ActionName), 1915 case How of 1916 prec -> 1917 io:fwrite(<<"Resolved in favor of ~ts.\n\n">>, [Formated]); 1918 default -> 1919 io:fwrite(<<"Conflict resolved in favor of ~ts.\n\n">>, 1920 [Formated]) 1921 end; 1922 true -> 1923 ok 1924 end. 1925 1926add_conflict(Conflict, St) -> 1927 case Conflict of 1928 {Symbol, StateN, _, {reduce, _, _, _}} -> 1929 St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]}; 1930 {Symbol, StateN, _, {accept, _}} -> 1931 St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]}; 1932 {Symbol, StateN, _, {shift, _, _}} -> 1933 St#yecc{shift_reduce = [{StateN,Symbol} | St#yecc.shift_reduce]}; 1934 {_Symbol, _StateN, {one_level_up, _, _}, _Confl} -> 1935 St 1936 end. 1937 1938conflict(#shift{prec = Prec1, rule_nmbr = RuleNmbr1}, 1939 #shift{prec = Prec2, rule_nmbr = RuleNmbr2}, Cxt) -> 1940 %% Conflict due to precedences "one level up". Kludge. 1941 #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt, 1942 {_, L1, RuleN1} = rule(RuleNmbr1, St), 1943 {_, L2, RuleN2} = rule(RuleNmbr2, St), 1944 Confl = {one_level_up, {L1, RuleN1, Prec1}, {L2, RuleN2, Prec2}}, 1945 {Symbol, N, Confl, Confl}; 1946conflict(#reduce{rule_nmbr = RuleNmbr1}, NewAction, Cxt) -> 1947 #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt, 1948 {R1, RuleLocation1, RuleN1} = rule(RuleNmbr1, St), 1949 Confl = case NewAction of 1950 accept -> 1951 {accept, St#yecc.rootsymbol}; 1952 #reduce{rule_nmbr = RuleNmbr2} -> 1953 {R2, RuleLocation2, RuleN2} = rule(RuleNmbr2, St), 1954 {reduce, R2, RuleN2, RuleLocation2}; 1955 #shift{state = NewState} -> 1956 {shift, NewState, last(R1)} 1957 end, 1958 {Symbol, N, {R1, RuleN1, RuleLocation1}, Confl}. 1959 1960format_conflict({Symbol, N, _, {one_level_up, 1961 {L1, RuleN1, {P1, Ass1}}, 1962 {L2, RuleN2, {P2, Ass2}}}}) -> 1963 S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when " 1964 "scanning ~ts in state ~w:\n">>, 1965 [format_symbol(Symbol), N]), 1966 S2 = io_lib:fwrite(<<" ~s ~w (rule ~w ~s)\n" 1967 " vs.\n">>, 1968 [format_assoc(Ass1), P1, RuleN1, rule_pos(L1)]), 1969 S3 = io_lib:fwrite(<<" ~s ~w (rule ~w ~s)\n">>, 1970 [format_assoc(Ass2), P2, RuleN2, rule_pos(L2)]), 1971 [S1, S2, S3]; 1972format_conflict({Symbol, N, Reduce, Confl}) -> 1973 S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol " 1974 "~ts in state ~w:\n">>, [format_symbol(Symbol), N]), 1975 S2 = case Reduce of 1976 {[HR | TR], RuleNmbr, RuleLocation} -> 1977 io_lib:fwrite(<<" Reduce to ~ts from ~ts (rule ~w " 1978 "~s)\n vs.\n">>, 1979 [format_symbol(HR), format_symbols(TR), 1980 RuleNmbr, rule_pos(RuleLocation)]) 1981 end, 1982 S3 = case Confl of 1983 {reduce, [HR2|TR2], RuleNmbr2, RuleLocation2} -> 1984 io_lib:fwrite(<<" reduce to ~ts from ~ts " 1985 "(rule ~w ~s).">>, 1986 [format_symbol(HR2), format_symbols(TR2), 1987 RuleNmbr2, rule_pos(RuleLocation2)]); 1988 {shift, NewState, Sym} -> 1989 io_lib:fwrite(<<" shift to state ~w, adding right " 1990 "sisters to ~ts.">>, 1991 [NewState, format_symbol(Sym)]); 1992 {accept, Rootsymbol} -> 1993 io_lib:fwrite(<<" reduce to rootsymbol ~ts.">>, 1994 [format_symbol(Rootsymbol)]) 1995 end, 1996 [S1, S2, S3]. 1997 1998rule_pos(Line) when is_integer(Line) -> 1999 io_lib:format("at line ~w", [Line]); 2000rule_pos({Line, Column}) when is_integer(Line), is_integer(Column) -> 2001 io_lib:format("at location ~w:~w", [Line, Column]). 2002 2003%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2004% Code generation: 2005 2006%% The version up to and including parsetools-1.3 is called "1.0". 2007%% 2008%% "1.1", parsetools-1.4: 2009%% - the prologue file has been updated; 2010%% - nonassoc is new; 2011%% - different order of clauses; 2012%% - never more than one clause matching a given symbol in a given state; 2013%% - file attributes relate messages to .yrl file; 2014%% - actions put in inlined functions; 2015%% - a few other minor fixes. 2016%% 2017%% "1.2", parsetools-1.4.2: 2018%% - the generated code has been changed as follows: 2019%% - yeccpars2() calls the functions yeccpars2_State(); 2020%% - several states can share yeccpars2_State(), which reduces code size; 2021%% - yeccgoto() has been split on one function per nonterminal; 2022%% - several minor changes have made the loaded code smaller. 2023%% - the include file yeccpre.hrl has been changed incompatibly. 2024%% 2025%% "1.3", parsetools-1.4.4: 2026%% - the generated code has been changed as follows: 2027%% - yeccgoto_T() no longer returns the next state, but calls yeccpars_S(); 2028%% - yeccpars2() is not called when it is known which yeccpars2_S() to call; 2029%% - "__Stack" has been substituted for "Stack"; 2030%% - several states can share yeccpars2_S_cont(), which reduces code size; 2031%% - instead if calling lists:nthtail() matching code is emitted. 2032%% 2033%% "1.4", parsetools-2.0.4: 2034%% - yeccerror() is called when a syntax error is found (as in version 1.1). 2035%% - the include file yeccpre.hrl has been changed. 2036 2037-define(CODE_VERSION, "1.4"). 2038-define(YECC_BUG(M, A), 2039 unicode:characters_to_binary( 2040 [" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",", 2041 io_lib:fwrite(M, A), "}).\n\n"])). 2042 2043%% Returns number of newlines in included files. 2044output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] -> 2045 St5 = output_header(St0), 2046 #yecc{infile = Infile, module = Module} = St5, 2047 St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), 2048 St20 = 2049 fwrite(St10, 2050 <<"-export([parse/1, parse_and_scan/1, format_error/1]).\n">>, 2051 []), 2052 {St25, N_lines_1, LastErlangCodeLine} = 2053 case St20#yecc.erlang_code of 2054 none -> 2055 {St20, 0, no_erlang_code}; 2056 Next_line -> 2057 St_10 = output_file_directive(St20, Infile, Next_line-1), 2058 Last_line = include1([], Inport, Outport, Infile, 2059 Next_line, St_10), 2060 Nmbr_of_lines = Last_line - Next_line, 2061 {St_10, Nmbr_of_lines, {last_erlang_code_line, Last_line}} 2062 end, 2063 St30 = nl(St25), 2064 IncludeFile = 2065 filename:join([code:lib_dir(parsetools), "include","yeccpre.hrl"]), 2066 %% Maybe one could assume there are no warnings in this file. 2067 St = output_file_directive(St30, IncludeFile, 0), 2068 N_lines_2 = include(St, IncludeFile, Outport), 2069 {St, N_lines_1 + N_lines_2, LastErlangCodeLine}; 2070output_prelude(Outport, Inport, St0) -> 2071 St5 = output_header(St0), 2072 #yecc{infile = Infile, module = Module, includefile = Includefile} = St5, 2073 St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]), 2074 St20 = output_file_directive(St10, Includefile, 0), 2075 N_lines_1 = include(St20, Includefile, Outport), 2076 St30 = nl(St20), 2077 case St30#yecc.erlang_code of 2078 none -> 2079 {St30, N_lines_1, no_erlang_code}; 2080 Next_line -> 2081 St = output_file_directive(St30, Infile, Next_line-1), 2082 Last_line = include1([], Inport, Outport, Infile, Next_line, St), 2083 Nmbr_of_lines = Last_line - Next_line, 2084 {St, Nmbr_of_lines + N_lines_1, {last_erlang_code_line, Last_line}} 2085 end. 2086 2087output_header(St0) -> 2088 lists:foldl(fun(Str, St) -> fwrite(St, <<"~ts\n">>, [Str]) 2089 end, St0, St0#yecc.header). 2090 2091output_goto(St, [{_Nonterminal, []} | Go], StateInfo) -> 2092 output_goto(St, Go, StateInfo); 2093output_goto(St0, [{Nonterminal, List} | Go], StateInfo) -> 2094 F = function_name(St0, yeccgoto, Nonterminal), 2095 St05 = output_nowarn(St0, F, '', 7), 2096 St10 = output_goto1(St05, List, F, StateInfo, true), 2097 St = output_goto_fini(F, Nonterminal, St10), 2098 output_goto(St, Go, StateInfo); 2099output_goto(St, [], _StateInfo) -> 2100 St. 2101 2102output_goto1(St0, [{From, To} | Tail], F, StateInfo, IsFirst) -> 2103 St10 = delim(St0, IsFirst), 2104 {To, ToInfo} = lookup_state(StateInfo, To), 2105 #state_info{reduce_only = RO, state_repr = Repr, comment = C} = ToInfo, 2106 if 2107 RO -> 2108 %% Reduce actions do not use the state, so we just pass 2109 %% the old (now bogus) on: 2110 FromS = io_lib:fwrite("~w=_S", [From]), 2111 ToS = "_S"; 2112 true -> 2113 FromS = io_lib:fwrite("~w", [From]), 2114 ToS = io_lib:fwrite("~w", [To]) 2115 end, 2116 St20 = fwrite(St10, <<"~w(~s, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>, 2117 [F,FromS]), 2118 St30 = fwrite(St20, <<"~s">>, [C]), 2119 %% Short-circuit call to yeccpars2: 2120 St = fwrite(St30, <<" yeccpars2_~w(~s, Cat, Ss, Stack, T, Ts, Tzr)">>, 2121 [Repr, ToS]), 2122 output_goto1(St, Tail, F, StateInfo, false); 2123output_goto1(St, [], _F, _StateInfo, _IsFirst) -> 2124 St. 2125 2126output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) -> 2127 %% Backward compatibility. 2128 St10 = delim(St0, false), 2129 St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>, 2130 [F]), 2131 fwrite(St, 2132 ?YECC_BUG(<<"{~ts, State, missing_in_goto_table}">>, 2133 [quoted_atom(St0, NT)]), 2134 []); 2135output_goto_fini(_F, _NT, St) -> 2136 fwrite(St, <<".\n\n">>, []). 2137 2138%% Find actions having user code. 2139find_user_code(ParseActions, St) -> 2140 [#user_code{state = State, 2141 terminal = Terminal, 2142 funname = inlined_function_name(St, State, Terminal), 2143 action = Action} || 2144 {State, La_actions} <- ParseActions, 2145 {Action, Terminals, RuleNmbr, NmbrOfDaughters} 2146 <- find_user_code2(La_actions), 2147 case tokens(RuleNmbr, St) of 2148 [{var, _, ?PSEUDO_VAR_1}] -> NmbrOfDaughters =/= 1; 2149 _ -> true 2150 end, 2151 Terminal <- Terminals]. 2152 2153find_user_code2([]) -> 2154 []; 2155find_user_code2([{_, #reduce{rule_nmbr = RuleNmbr, 2156 nmbr_of_daughters = NmbrOfDaughters} 2157 =Action}]) -> 2158 %% Same optimization as in output_state_actions. 2159 [{Action, ["Cat"], RuleNmbr, NmbrOfDaughters}]; 2160find_user_code2([{La, #reduce{rule_nmbr = RuleNmbr, 2161 nmbr_of_daughters = NmbrOfDaughters} 2162 =Action} | T]) -> 2163 [{Action,La, RuleNmbr, NmbrOfDaughters} | find_user_code2(T)]; 2164find_user_code2([_ | T]) -> 2165 find_user_code2(T). 2166 2167output_actions(St0, StateJumps, StateInfo) -> 2168 %% Not all the clauses of the dispatcher function yeccpars2() can 2169 %% be reached. Only when shifting, that is, calling yeccpars1(), 2170 %% will yeccpars2() be called. 2171 Y2CL = [NewState || {_State,{Actions,J}} <- StateJumps, 2172 {_LA, #shift{state = NewState}} <- 2173 (Actions 2174 ++ [A || {_Tag,_To,Part} <- [J], A <- Part])], 2175 Y2CS = ordsets:from_list([0 | Y2CL]), 2176 Y2S = ordsets:from_list([S || {S,_} <- StateJumps]), 2177 NY2CS = ordsets:subtract(Y2S, Y2CS), 2178 Sel = [{S,true} || S <- ordsets:to_list(Y2CS)] ++ 2179 [{S,false} || S <- ordsets:to_list(NY2CS)], 2180 2181 SelS = [{State,Called} || 2182 {{State,_JActions}, {State,Called}} <- 2183 lists:zip(StateJumps, lists:keysort(1, Sel))], 2184 St05 = output_nowarn(St0, yeccpars2, '', 7), 2185 St10 = foldl(fun({State, Called}, St_0) -> 2186 {State, #state_info{state_repr = IState}} = 2187 lookup_state(StateInfo, State), 2188 output_state_selection(St_0, State, IState, Called) 2189 end, St05, SelS), 2190 St20 = fwrite(St10, <<"yeccpars2(Other, _, _, _, _, _, _) ->\n">>, []), 2191 St = fwrite(St20, 2192 ?YECC_BUG(<<"{missing_state_in_action_table, Other}">>, []), 2193 []), 2194 foldl(fun({State, JActions}, St_0) -> 2195 {State, #state_info{state_repr = IState}} = 2196 lookup_state(StateInfo, State), 2197 output_state_actions(St_0, State, IState, 2198 JActions, StateInfo) 2199 end, St, StateJumps). 2200 2201output_state_selection(St0, State, IState, Called) -> 2202 Comment = [<<"%% ">> || false <- [Called]], 2203 St = fwrite(St0, <<"~syeccpars2(~w=S, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>, 2204 [Comment, State]), 2205 fwrite(St, 2206 <<"~s yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr);\n">>, 2207 [Comment, IState]). 2208 2209output_state_actions(St, State, State, {Actions,jump_none}, SI) -> 2210 St1 = output_state_actions_begin(St, State, Actions), 2211 output_state_actions1(St1, State, Actions, true, normal, SI); 2212output_state_actions(St0, State, State, {Actions, Jump}, SI) -> 2213 {Tag, To, Common} = Jump, 2214 CS = case Tag of 2215 jump_some -> list_to_atom(lists:concat([cont_, To])); 2216 jump_all -> To 2217 end, 2218 St = output_state_actions1(St0, State, Actions, true, {to, CS}, SI), 2219 if 2220 To =:= State -> 2221 St1 = output_state_actions_begin(St, State, Actions), 2222 output_state_actions1(St1, CS, Common, true, normal, SI); 2223 true -> 2224 St 2225 end; 2226output_state_actions(St, State, JState, _XActions, _SI) -> 2227 fwrite(St, <<"%% yeccpars2_~w: see yeccpars2_~w\n\n">>, [State, JState]). 2228 2229output_state_actions_begin(St, State, _Actions) -> 2230 output_nowarn(St, yeccpars2_, State, 7). 2231 2232output_state_actions1(St, State, [], IsFirst, normal, _SI) -> 2233 output_state_actions_fini(State, IsFirst, St); 2234output_state_actions1(St0, State, [], IsFirst, {to, ToS}, _SI) -> 2235 St = delim(St0, IsFirst), 2236 fwrite(St, 2237 <<"yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr) ->\n" 2238 " yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr).\n\n">>, 2239 [State, ToS]); 2240output_state_actions1(St0, State, [{_, #reduce{}=Action}], 2241 IsFirst, _End, SI) -> 2242 St = output_reduce(St0, State, "Cat", Action, IsFirst, SI), 2243 fwrite(St, <<".\n\n">>, []); 2244output_state_actions1(St0, State, [{Lookahead,Action} | Tail], 2245 IsFirst, End, SI) -> 2246 {_, St} = 2247 foldl(fun(Terminal, {IsFst,St_0}) -> 2248 {false, 2249 output_action(St_0, State, Terminal, Action, IsFst,SI)} 2250 end, {IsFirst,St0}, Lookahead), 2251 output_state_actions1(St, State, Tail, false, End, SI). 2252 2253output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) -> 2254 output_reduce(St, State, Terminal, Action, IsFirst, SI); 2255output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) -> 2256 St10 = delim(St0, IsFirst), 2257 St = fwrite(St10, <<"yeccpars2_~w(S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>, 2258 [State, quoted_atom(St10, Terminal)]), 2259 output_call_to_includefile(NewState, St); 2260output_action(St0, State, Terminal, accept, IsFirst, _SI) -> 2261 St10 = delim(St0, IsFirst), 2262 St = fwrite(St10, 2263 <<"yeccpars2_~w(_S, ~ts, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>, 2264 [State, quoted_atom(St10, Terminal)]), 2265 fwrite(St, <<" {ok, hd(Stack)}">>, []); 2266output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) -> 2267 St. 2268 2269output_call_to_includefile(NewState, #yecc{includefile_version = {1,1}}=St) -> 2270 %% Backward compatibility. 2271 fwrite(St, <<" yeccpars1(Ts, Tzr, ~w, [S | Ss], [T | Stack])">>, 2272 [NewState]); 2273output_call_to_includefile(NewState, St) -> 2274 fwrite(St, <<" yeccpars1(S, ~w, Ss, Stack, T, Ts, Tzr)">>, 2275 [NewState]). 2276 2277output_state_actions_fini(State, IsFirst, St0) -> 2278 %% Backward compatible. 2279 St10 = delim(St0, IsFirst), 2280 St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]), 2281 fwrite(St, <<" yeccerror(T).\n\n">>, []). 2282 2283output_reduce(St0, State, Terminal, 2284 #reduce{rule_nmbr = RuleNmbr, 2285 head = Head, 2286 nmbr_of_daughters = NmbrOfDaughters}, 2287 IsFirst, StateInfo) -> 2288 St10 = delim(St0, IsFirst), 2289 QuotedTerminal = if 2290 is_atom(Terminal) -> quoted_atom(St10, Terminal); 2291 true -> Terminal 2292 end, 2293 St20 = fwrite(St10, 2294 <<"yeccpars2_~w(_S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>, 2295 [State, QuotedTerminal]), 2296 St30 = 2297 if 2298 NmbrOfDaughters < 2 -> 2299 Ns = "Ss", 2300 St20; 2301 true -> 2302 Ns = "Nss", 2303 Tmp = lists:join(",", 2304 lists:duplicate(NmbrOfDaughters - 1, "_")), 2305 fwrite(St20, <<" [~s|Nss] = Ss,\n">>, [Tmp]) 2306 end, 2307 St40 = case tokens(RuleNmbr, St30) of 2308 [{var, _, ?PSEUDO_VAR_1}] when NmbrOfDaughters =:= 1 -> 2309 NewStack = "Stack", 2310 St30; 2311 _ -> 2312 NewStack = "NewStack", 2313 fwrite(St30, <<" NewStack = ~w(Stack),\n">>, 2314 [inlined_function_name(St30, State, Terminal)]) 2315 end, 2316 if 2317 NmbrOfDaughters =:= 0 -> 2318 NextState = goto(State, Head, St40), 2319 {NextState, I} = lookup_state(StateInfo, NextState), 2320 #state_info{reduce_only = RO, state_repr = Repr, comment = C} = I, 2321 %% Reduce actions do not use the state, so we just pass 2322 %% the old (now bogus) on: 2323 if 2324 RO -> NextS = "_S"; 2325 true -> NextS = io_lib:fwrite("~w", [NextState]) 2326 end, 2327 St = fwrite(St40, <<"~s">>, [C]), 2328 %% Short-circuit call to yeccpars2: 2329 fwrite(St, 2330 <<" yeccpars2_~w(~s, ~ts, [~w | Ss], ~s, T, Ts, Tzr)">>, 2331 [Repr, NextS, QuotedTerminal, State, NewStack]); 2332 true -> 2333 fwrite(St40, 2334 <<" ~w(hd(~s), ~ts, ~s, ~s, T, Ts, Tzr)">>, 2335 [function_name(St40, yeccgoto, Head), Ns, 2336 QuotedTerminal, Ns, NewStack]) 2337 end. 2338 2339delim(St, true) -> 2340 St; 2341delim(St, false) -> 2342 fwrite(St, <<";\n">>, []). 2343 2344quoted_atom(#yecc{encoding = latin1}, Atom) when is_atom(Atom) -> 2345 io_lib:write_atom_as_latin1(Atom); 2346quoted_atom(_St, Atomic) -> 2347 io_lib:write(Atomic). 2348 2349output_inlined(St, UserCodeActions, Infile) -> 2350 foldl(fun(#user_code{funname = InlinedFunctionName, 2351 action = Action}, St_0) -> 2352 output_inlined(St_0, InlinedFunctionName, 2353 Action, Infile) 2354 end, St, UserCodeActions). 2355 2356%% Each action with user code is placed in a separate inlined function. 2357%% The purpose is to be able to pinpoint errors and warnings correctly. 2358output_inlined(St0, FunctionName, Reduce, Infile) -> 2359 #reduce{rule_nmbr = RuleNmbr, nmbr_of_daughters = N_daughters} = Reduce, 2360 #rule{tokens = Tokens, is_well_formed = WF} = get_rule(RuleNmbr, St0), 2361 Line0 = first_line(Tokens), 2362 NLines = last_line(Tokens) - Line0, 2363 2364 St5 = if 2365 WF -> 2366 St0; 2367 not WF -> 2368 %% The compiler will generate an error message for 2369 %% the inlined function (unless the reason that yecc 2370 %% failed to parse the action was some macro). The 2371 %% line number of the message will be correct since 2372 %% we are keeping track of the current line of the 2373 %% output file... 2374 #yecc{outfile = Outfile, line = CurLine} = St0, 2375 output_file_directive(St0, Outfile, CurLine) 2376 end, 2377 2378 CodeStartLine = lists:max([0, Line0 - 4]), 2379 St10 = fwrite(St5, <<"-compile({inline,~w/1}).\n">>, [FunctionName]), 2380 St15 = output_nowarn(St10, FunctionName, '', 1), 2381 St20 = output_file_directive(St15, Infile, CodeStartLine), 2382 St30 = fwrite(St20, <<"~w(__Stack0) ->\n">>, [FunctionName]), 2383 %% Currently the (old) inliner emits less code if matching the 2384 %% stack inside the body rather than in the head... 2385 St40 = case N_daughters of 2386 0 -> 2387 Stack = "__Stack0", 2388 St30; 2389 _ -> 2390 Stack = "__Stack", 2391 A = concat(flatmap(fun(I) -> [",",?PSEUDO_PREFIX,I] end, 2392 lists:seq(N_daughters, 1, -1))), 2393 fwrite(St30, <<" ~s = __Stack0,\n">>, 2394 [append(["[", tl(A), " | __Stack]"])]) 2395 end, 2396 St = St40#yecc{line = St40#yecc.line + NLines}, 2397 fwrite(St, <<" [begin\n~ts\n end | ~s].\n\n">>, 2398 [pp_tokens(Tokens), Stack]). 2399 2400output_nowarn(St, Function, Suffix, Arity) -> 2401 S = case Suffix of '' -> ""; _ -> integer_to_list(Suffix) end, 2402 St1 = fwrite(St, <<"-dialyzer({nowarn_function, ~w~s/~w}).\n">>, 2403 [Function, S, Arity]), 2404 fwrite(St1, <<"-compile({nowarn_unused_function, ~w~s/~w}).\n">>, 2405 [Function, S, Arity]). 2406 2407inlined_function_name(St, State, Terminal) -> 2408 End = case Terminal of 2409 "Cat" -> []; 2410 _ -> [quoted_atom(St, Terminal)] 2411 end, 2412 list_to_atom(concat([yeccpars2_, State, '_'] ++ End)). 2413 2414-compile({nowarn_unused_function,function_name/3}). 2415function_name(St, Name, Suf) -> 2416 list_to_atom(concat([Name, '_'] ++ [quoted_atom(St, Suf)])). 2417 2418rule(RulePointer, St) -> 2419 #rule{n = N, location = Location, symbols = Symbols} = 2420 maps:get(RulePointer, St#yecc.rule_pointer2rule), 2421 {Symbols, Location, N}. 2422 2423get_rule(RuleNmbr, St) -> 2424 maps:get(RuleNmbr, St#yecc.rule_pointer2rule). 2425 2426tokens(RuleNmbr, St) -> 2427 Rule = maps:get(RuleNmbr, St#yecc.rule_pointer2rule), 2428 Rule#rule.tokens. 2429 2430goto(From, Symbol, St) -> 2431 case ets:lookup(St#yecc.goto_tab, {From, Symbol}) of 2432 [{_, To}] -> 2433 To; 2434 [] -> 2435 erlang:error({error_in_goto_table, From, Symbol}) 2436 end. 2437 2438%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2439% Auxiliaries: 2440 2441-ifdef(SYMBOLS_AS_CODES). 2442 2443%%% Bit mask operations. 2444 2445-compile({inline,[set_empty/0]}). 2446set_empty() -> 2447 0. 2448 2449set_add(I, BM) -> 2450 (1 bsl I) bor BM. 2451 2452-compile({inline,[set_member/2]}). 2453set_member(I, BM) -> 2454 ((1 bsl I) band BM) =/= 0. 2455 2456%% Assumes I is a member... 2457-compile({inline,[set_delete/2]}). 2458set_delete(I, BM) -> 2459 (1 bsl I) bxor BM. 2460 2461-compile({inline,[set_union/2]}). 2462set_union(BM1, BM2) -> 2463 BM1 bor BM2. 2464 2465-compile({inline,[set_is_subset/2]}). 2466set_is_subset(BM1, BM2) -> 2467 (BM1 band BM2) =:= BM1. 2468 2469empty_member(BM) -> 2470 set_member(0, BM). 2471 2472empty_delete(BM) -> 2473 set_delete(0, BM). 2474 2475code_symbols(Ss, SymbolTable) -> 2476 map(fun(S) -> ets:lookup_element(SymbolTable, S, 2) end, Ss). 2477 2478decode_symbol(C, InvSymbolTable) -> 2479 ets:lookup_element(InvSymbolTable, C, 1). 2480 2481code_terminal(T, SymbolTab) -> 2482 set_add(ets:lookup_element(SymbolTab, T, 2), 0). 2483 2484decode_terminals(BM, InvSymbolTab) -> 2485 case get(BM) of 2486 undefined -> 2487 Symbols = decode_terminals(BM, 0, InvSymbolTab), 2488 put(BM, Symbols), 2489 Symbols; 2490 Symbols -> 2491 Symbols 2492 end. 2493 2494decode_terminals(0, _I, _InvSymbolTab) -> 2495 []; 2496decode_terminals(BM, I, InvSymbolTab) -> 2497 case set_member(I, BM) of 2498 true -> 2499 [ets:lookup_element(InvSymbolTab, I, 1) 2500 | decode_terminals(set_delete(I, BM), I+1, InvSymbolTab)]; 2501 false -> 2502 decode_terminals(BM, I+1, InvSymbolTab) 2503 end. 2504 2505set_add_terminal({_Symbol, TerminalNum}, BM) -> 2506 set_add(TerminalNum, BM). 2507 2508-compile({inline,[is_terminal/2]}). 2509is_terminal(_Tab, SymbolCode) -> 2510 SymbolCode >= 0. 2511 2512left_corner_symbol_table(St) -> 2513 St#yecc.inv_symbol_tab. 2514 2515-else. 2516 2517set_empty() -> 2518 []. 2519 2520set_add(Symbol, L) -> 2521 ordsets:union([Symbol], L). 2522 2523set_union(Es1, Es2) -> 2524 ordsets:union(Es1, Es2). 2525 2526set_is_subset(Es1, Es2) -> 2527 ordsets:is_subset(Es1, Es2). 2528 2529code_symbols(Ss, _SymbolTab) -> 2530 Ss. 2531 2532decode_symbol(S, _InvSymbolTab) -> 2533 S. 2534 2535code_terminal(T, _SymbolTab) -> 2536 [T]. 2537 2538decode_terminals(Ts, _InvSymbolTab) -> 2539 Ts. 2540 2541empty_member(['$empty' | _]) -> 2542 true; 2543empty_member(_) -> 2544 false. 2545 2546empty_delete(['$empty' | Terminals]) -> 2547 Terminals. 2548 2549set_add_terminal({Symbol, _TerminalNum}, L) -> 2550 set_add(Symbol, L). 2551 2552is_terminal(Tab, SymbolName) -> 2553 ets:lookup_element(Tab, SymbolName, 2) >= 0. 2554 2555left_corner_symbol_table(St) -> 2556 St#yecc.symbol_tab. 2557 2558-endif. % SYMBOLS_AS_CODES 2559 2560intersect(L1, L2) -> 2561 ordsets:to_list(ordsets:intersection(ordsets:from_list(L1), 2562 ordsets:from_list(L2))). 2563 2564format_symbols([Sym | Syms]) -> 2565 concat([format_symbol(Sym) | format_symbols1(Syms)]). 2566 2567format_symbols1([]) -> 2568 []; 2569format_symbols1([H | T]) -> 2570 [" ", format_symbol(H) | format_symbols1(T)]. 2571 2572include(St, File, Outport) -> 2573 case file:open(File, [read]) of 2574 {error, Reason} -> 2575 throw(add_error(File, none, {file_error, Reason}, St)); 2576 {ok, Inport} -> 2577 _ = epp:set_encoding(Inport), 2578 Line = io:get_line(Inport, ''), 2579 try include1(Line, Inport, Outport, File, 1, St) - 1 2580 after ok = file:close(Inport) 2581 end 2582 end. 2583 2584include1(eof, _, _, _File, L, _St) -> 2585 L; 2586include1({error, _}=_Error, _Inport, _Outport, File, L, St) -> 2587 throw(add_error(File, erl_anno:new(L), cannot_parse, St)); 2588include1(Line, Inport, Outport, File, L, St) -> 2589 Incr = case member($\n, Line) of 2590 true -> 1; 2591 false -> 0 2592 end, 2593 io:put_chars(Outport, Line), 2594 include1(io:get_line(Inport, ''), Inport, Outport, File, L + Incr, St). 2595 2596includefile_version([]) -> 2597 {1,4}; 2598includefile_version(Includefile) -> 2599 case epp:open(Includefile, []) of 2600 {ok, Epp} -> 2601 try 2602 parse_file(Epp) 2603 after 2604 epp:close(Epp) 2605 end; 2606 {error, _Error} -> 2607 {1,1} 2608 end. 2609 2610parse_file(Epp) -> 2611 case epp:parse_erl_form(Epp) of 2612 {ok, {function,_Anno,yeccpars1,7,_Clauses}} -> 2613 {1,4}; 2614 {eof,_Location} -> 2615 {1,1}; 2616 _Form -> 2617 parse_file(Epp) 2618 end. 2619 2620%% Keeps the line breaks and the column numbers of the original code. 2621pp_tokens(Tokens) -> 2622 C = first_column(Tokens), 2623 Indent = lists:duplicate(C - 1, $\s), 2624 Text = lists:append([erl_anno:text(anno(T)) || T <- Tokens]), 2625 [Indent, Text]. 2626 2627set_encoding(#yecc{encoding = none}, Port) -> 2628 ok = io:setopts(Port, [{encoding, epp:default_encoding()}]); 2629set_encoding(#yecc{encoding = E}, Port) -> 2630 ok = io:setopts(Port, [{encoding, E}]). 2631 2632output_encoding_comment(#yecc{encoding = none}=St) -> 2633 St; 2634output_encoding_comment(#yecc{encoding = Encoding}=St) -> 2635 fwrite(St, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]). 2636 2637output_file_directive(St, Filename, Line) when St#yecc.file_attrs -> 2638 fwrite(St, <<"-file(~ts, ~w).\n">>, 2639 [format_filename(Filename, St), Line]); 2640output_file_directive(St, _Filename, _Line) -> 2641 St. 2642 2643first_column(Tokens) -> 2644 case erl_anno:column(anno(hd(Tokens))) of 2645 undefined -> 2646 1; 2647 Column -> 2648 Column 2649 end. 2650 2651first_line(Tokens) -> 2652 line(anno(hd(Tokens))). 2653 2654last_line(Tokens) -> 2655 line(anno(lists:last(Tokens))). 2656 2657line(Anno) -> 2658 erl_anno:line(Anno). 2659 2660location(none) -> none; 2661location(Anno) -> 2662 erl_anno:location(Anno). 2663 2664anno(Token) -> 2665 element(2, Token). 2666 2667%% Keep track of the current line in the generated file. 2668fwrite(#yecc{outport = Outport, line = Line}=St, Format, Args) -> 2669 NLines = count_nl(Format), 2670 io:fwrite(Outport, Format, Args), 2671 St#yecc{line = Line + NLines}. 2672 2673%% Assumes \n is used, and never ~n. 2674count_nl(<<$\n,Rest/binary>>) -> 2675 1 + count_nl(Rest); 2676count_nl(<<_,Rest/binary>>) -> 2677 count_nl(Rest); 2678count_nl(<<>>) -> 2679 0. 2680 2681nl(#yecc{outport = Outport, line = Line}=St) -> 2682 io:nl(Outport), 2683 St#yecc{line = Line + 1}. 2684 2685format_filename(Filename0, St) -> 2686 Filename = filename:flatten(Filename0), 2687 case lists:keyfind(encoding, 1, io:getopts(St#yecc.outport)) of 2688 {encoding, unicode} -> io_lib:write_string(Filename); 2689 _ -> io_lib:write_string_as_latin1(Filename) 2690 end. 2691 2692format_assoc(left) -> 2693 "Left"; 2694format_assoc(right) -> 2695 "Right"; 2696format_assoc(unary) -> 2697 "Unary"; 2698format_assoc(nonassoc) -> 2699 "Nonassoc". 2700 2701format_symbol(Symbol) -> 2702 String = concat([Symbol]), 2703 case erl_scan:string(String) of 2704 {ok, [{atom, _, _}], _} -> 2705 io_lib:fwrite(<<"~tw">>, [Symbol]); 2706 {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' -> 2707 case erl_scan:reserved_word(Word) of 2708 true -> 2709 String; 2710 false -> 2711 io_lib:fwrite(<<"~tw">>, [Symbol]) 2712 end; 2713 {ok, [{var, _, _}], _} -> 2714 String; 2715 _ -> 2716 io_lib:fwrite(<<"~tw">>, [Symbol]) 2717 end. 2718 2719line_of_location(Line) when is_integer(Line) -> 2720 Line; 2721line_of_location({Line, Column}) when is_integer(Line), is_integer(Column) -> 2722 Line. 2723 2724inverse(L) -> 2725 sort([{A,B} || {B,A} <- L]). 2726 2727family(L) -> 2728 sofs:to_external(sofs:relation_to_family(sofs:relation(L))). 2729 2730seq1(To) when To < 1 -> 2731 []; 2732seq1(To) -> 2733 lists:seq(1, To). 2734 2735count(From, L) -> 2736 lists:zip(L, lists:seq(From, length(L)-1+From)). 2737 2738family_with_domain(L, DL) -> 2739 sofs:to_external(sofs_family_with_domain(sofs:relation(L), sofs:set(DL))). 2740 2741sofs_family_with_domain(R0, D) -> 2742 R = sofs:restriction(R0, D), 2743 F = sofs:relation_to_family(R), 2744 FD = sofs:constant_function(D, sofs:from_term([])), 2745 sofs:family_union(F, FD). 2746