1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 1996-2018. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20%% Yacc like LALR-1 parser generator for Erlang.
21%% Ref: Aho & Johnson: "LR Parsing", ACM Computing Surveys, vol. 6:2, 1974.
22%% Auxiliary files: yeccgramm.yrl, yeccparser.erl, yeccpre.hrl, yeccscan.erl.
23%%
24
25-module(yecc).
26
27-export([compile/3, file/1, file/2, format_error/1]).
28
29%% Kept for compatibility with R10B.
30-export([yecc/2, yecc/3, yecc/4]).
31
32-import(lists, [append/1, append/2, concat/1, delete/2, filter/2,
33                flatmap/2, foldl/3, foldr/3, foreach/2, keydelete/3,
34                keysort/2, last/1, map/2, member/2, reverse/1,
35                sort/1, usort/1]).
36
37-include("erl_compile.hrl").
38-include("ms_transform.hrl").
39
40-record(yecc, {
41          infile,
42          outfile,
43          includefile,
44          includefile_version,
45          module,
46          encoding = none,
47          options = [],
48          verbose = false,
49          file_attrs = true,
50          errors = [],
51          warnings = [],
52          conflicts_done = false,
53          shift_reduce = [],
54          reduce_reduce = [],
55          n_states = 0,
56          inport,
57          outport,
58          line,
59
60          parse_actions,
61          symbol_tab,
62          inv_symbol_tab,
63          state_tab,
64          prec_tab,
65          goto_tab,
66
67          terminals = [],
68          nonterminals = [],
69          all_symbols = [],
70          prec = [],
71          rules_list = [],
72          rules, % a tuple of rules_list
73          rule_pointer2rule,
74          rootsymbol = [],
75          endsymbol = [],
76          expect_shift_reduce = [],
77          expect_n_states = [],
78          header = [],
79          erlang_code = none
80         }).
81
82-record(rule, {
83          n,             % rule n in the grammar file
84          anno,
85          symbols,       % the names of symbols
86          tokens,
87          is_guard,      % the action is a guard (not used)
88          is_well_formed % can be parsed (without macro expansion)
89         }).
90
91-record(reduce, {
92          rule_nmbr,
93          head,
94          nmbr_of_daughters,
95          prec,
96          unused % assure that #reduce{} comes before #shift{} when sorting
97         }).
98
99-record(shift, {
100          state,
101          pos,
102          prec,
103          rule_nmbr
104         }).
105
106-record(user_code, {state, terminal, funname, action}).
107
108-record(symbol, {anno = none, name}).
109
110%% ACCEPT is neither an atom nor a non-terminal.
111-define(ACCEPT, {}).
112
113%% During the phase 'compute_states' terminals in lookahead sets are
114%% coded as integers; sets of terminals are integer bit masks. This is
115%% for efficiency only. '$empty' is always given the mask 1. The
116%% behaviour can be turned off by un-defining SYMBOLS_AS_CODES (useful
117%% when debugging).
118
119%% Non-terminals are also given integer codes, starting with -1. The
120%% absolut value of the code is used for indexing a tuple of lists of
121%% rules.
122
123-define(SYMBOLS_AS_CODES, true).
124
125-ifdef(SYMBOLS_AS_CODES).
126-define(EMPTY, 0).
127-else.
128-define(EMPTY, '$empty').
129-endif.
130
131%%%
132%%% Exported functions
133%%%
134
135%%% Interface to erl_compile.
136
137compile(Input0, Output0,
138        #options{warning = WarnLevel, verbose=Verbose, includes=Includes,
139		 specific=Specific}) ->
140    Input = shorten_filename(Input0),
141    Output = shorten_filename(Output0),
142    Includefile = lists:sublist(Includes, 1),
143    Werror = proplists:get_bool(warnings_as_errors, Specific),
144    Opts = [{parserfile,Output}, {includefile,Includefile}, {verbose,Verbose},
145            {report_errors, true}, {report_warnings, WarnLevel > 0},
146	    {warnings_as_errors, Werror}],
147    case file(Input, Opts) of
148        {ok, _OutFile} ->
149            ok;
150        error ->
151            error
152    end.
153
154format_error(bad_declaration) ->
155    io_lib:fwrite("unknown or bad declaration, ignored", []);
156format_error({bad_expect, SymName}) ->
157    io_lib:fwrite("argument ~ts of Expect is not an integer",
158                  [format_symbol(SymName)]);
159format_error({bad_rootsymbol, SymName}) ->
160    io_lib:fwrite("rootsymbol ~ts is not a nonterminal",
161                  [format_symbol(SymName)]);
162format_error({bad_states, SymName}) ->
163    io_lib:fwrite("argument ~ts of States is not an integer",
164                  [format_symbol(SymName)]);
165format_error({conflict, Conflict}) ->
166    format_conflict(Conflict);
167format_error({conflicts, SR, RR}) ->
168    io_lib:fwrite("conflicts: ~w shift/reduce, ~w reduce/reduce", [SR, RR]);
169format_error({duplicate_declaration, Tag}) ->
170    io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]);
171format_error({duplicate_nonterminal, Nonterminal}) ->
172    io_lib:fwrite("duplicate non-terminals ~ts",
173                  [format_symbol(Nonterminal)]);
174format_error({duplicate_precedence, Op}) ->
175    io_lib:fwrite("duplicate precedence operator ~ts",
176                  [format_symbol(Op)]);
177format_error({duplicate_terminal, Terminal}) ->
178    io_lib:fwrite("duplicate terminal ~ts",
179                  [format_symbol(Terminal)]);
180format_error({endsymbol_is_nonterminal, Symbol}) ->
181    io_lib:fwrite("endsymbol ~ts is a nonterminal",
182                  [format_symbol(Symbol)]);
183format_error({endsymbol_is_terminal, Symbol}) ->
184    io_lib:fwrite("endsymbol ~ts is a terminal",
185                  [format_symbol(Symbol)]);
186format_error({error, Module, Error}) ->
187    Module:format_error(Error);
188format_error({file_error, Reason}) ->
189    io_lib:fwrite("~ts",[file:format_error(Reason)]);
190format_error(illegal_empty) ->
191    io_lib:fwrite("illegal use of empty symbol", []);
192format_error({internal_error, Error}) ->
193    io_lib:fwrite("internal yecc error: ~w", [Error]);
194format_error({missing_syntax_rule, Nonterminal}) ->
195    io_lib:fwrite("no syntax rule for non-terminal symbol ~ts",
196                  [format_symbol(Nonterminal)]);
197format_error({n_states, Exp, N}) ->
198    io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]);
199format_error(no_grammar_rules) ->
200    io_lib:fwrite("grammar rules are missing", []);
201format_error(nonterminals_missing) ->
202    io_lib:fwrite("Nonterminals is missing", []);
203format_error({precedence_op_is_endsymbol, SymName}) ->
204    io_lib:fwrite("precedence operator ~ts is endsymbol",
205                  [format_symbol(SymName)]);
206format_error({precedence_op_is_unknown, SymName}) ->
207    io_lib:fwrite("unknown precedence operator ~ts",
208                  [format_symbol(SymName)]);
209format_error({reserved, N}) ->
210    io_lib:fwrite("the use of ~w should be avoided", [N]);
211format_error({symbol_terminal_and_nonterminal, SymName}) ->
212    io_lib:fwrite("symbol ~ts is both a terminal and nonterminal",
213                  [format_symbol(SymName)]);
214format_error(rootsymbol_missing) ->
215    io_lib:fwrite("Rootsymbol is missing", []);
216format_error(terminals_missing) ->
217    io_lib:fwrite("Terminals is missing", []);
218format_error({undefined_nonterminal, Symbol}) ->
219    io_lib:fwrite("undefined nonterminal: ~ts", [format_symbol(Symbol)]);
220format_error({undefined_pseudo_variable, Atom}) ->
221    io_lib:fwrite("undefined pseudo variable ~w", [Atom]);
222format_error({undefined_symbol, SymName}) ->
223    io_lib:fwrite("undefined rhs symbol ~ts", [format_symbol(SymName)]);
224format_error({unused_nonterminal, Nonterminal}) ->
225    io_lib:fwrite("non-terminal symbol ~ts not used",
226                  [format_symbol(Nonterminal)]);
227format_error({unused_terminal, Terminal}) ->
228    io_lib:fwrite("terminal symbol ~ts not used",
229                  [format_symbol(Terminal)]);
230format_error({bad_symbol, String}) ->
231    io_lib:fwrite("bad symbol ~ts", [String]);
232format_error(cannot_parse) ->
233    io_lib:fwrite("cannot parse; possibly encoding mismatch", []).
234
235file(File) ->
236    file(File, [report_errors, report_warnings]).
237
238file(File, Options) ->
239    case is_filename(File) of
240        no -> erlang:error(badarg, [File, Options]);
241        _ -> ok
242    end,
243    case options(Options) of
244        badarg ->
245            erlang:error(badarg, [File, Options]);
246        OptionValues ->
247            Self = self(),
248            Flag = process_flag(trap_exit, false),
249            Pid = spawn_link(fun() -> infile(Self, File, OptionValues) end),
250            receive
251                {Pid, Rep} ->
252                    receive after 1 -> ok end,
253                    process_flag(trap_exit, Flag),
254                    Rep
255            end
256    end.
257
258%% Kept for backward compatibility.
259yecc(Infile, Outfile) ->
260    yecc(Infile, Outfile, false, []).
261
262yecc(Infile, Outfile, Verbose) ->
263    yecc(Infile, Outfile, Verbose, []).
264
265yecc(Infilex, Outfilex, Verbose, Includefilex) ->
266    _ = statistics(runtime),
267    case file(Infilex, [{parserfile, Outfilex},
268                        {verbose, Verbose},
269                        {report, true},
270                        {includefile, Includefilex}]) of
271        {ok, _File} ->
272            statistics(runtime);
273        error ->
274            exit(error)
275    end.
276
277%%%
278%%% Local functions
279%%%
280
281options(Options0) when is_list(Options0) ->
282    try
283        Options = flatmap(fun(return) -> short_option(return, true);
284                             (report) -> short_option(report, true);
285                             ({return,T}) -> short_option(return, T);
286                             ({report,T}) -> short_option(report, T);
287                             (T) -> [T]
288                          end, Options0),
289        options(Options, [file_attributes, includefile, parserfile,
290                          report_errors, report_warnings, warnings_as_errors,
291                          return_errors, return_warnings, time, verbose], [])
292    catch error: _ -> badarg
293    end;
294options(Option) ->
295    options([Option]).
296
297short_option(return, T) ->
298    [{return_errors,T}, {return_warnings,T}];
299short_option(report, T) ->
300    [{report_errors,T}, {report_warnings,T}].
301
302options(Options0, [Key | Keys], L) when is_list(Options0) ->
303    Options = case member(Key, Options0) of
304                  true ->
305                      [atom_option(Key) | delete(Key, Options0)];
306                  false ->
307                      Options0
308              end,
309    V = case lists:keyfind(Key, 1, Options) of
310            {Key, Filename0} when Key =:= includefile;
311                                  Key =:= parserfile ->
312                case is_filename(Filename0) of
313                    no ->
314                        badarg;
315                    Filename ->
316                        {ok, [{Key, Filename}]}
317                end;
318            {Key, Bool} = KB when is_boolean(Bool) ->
319                {ok, [KB]};
320            {Key, _} ->
321                badarg;
322            false ->
323                {ok, [{Key, default_option(Key)}]}
324        end,
325    case V of
326        badarg ->
327            badarg;
328        {ok, KeyValueL} ->
329            NewOptions = keydelete(Key, 1, Options),
330            options(NewOptions, Keys, KeyValueL ++ L)
331    end;
332options([], [], L) ->
333    foldl(fun({_,false}, A) -> A;
334             ({Tag,true}, A) -> [Tag | A];
335             (F, A) -> [F | A]
336          end, [], L);
337options(_Options, _, _L) ->
338    badarg.
339
340default_option(file_attributes) -> true;
341default_option(includefile) -> [];
342default_option(parserfile) -> [];
343default_option(report_errors) -> true;
344default_option(report_warnings) -> true;
345default_option(warnings_as_errors) -> false;
346default_option(return_errors) -> false;
347default_option(return_warnings) -> false;
348default_option(time) -> false;
349default_option(verbose) -> false.
350
351atom_option(file_attributes) -> {file_attributes, true};
352atom_option(report_errors) -> {report_errors, true};
353atom_option(report_warnings) -> {report_warnings, true};
354atom_option(warnings_as_errors) -> {warnings_as_errors,true};
355atom_option(return_errors) -> {return_errors, true};
356atom_option(return_warnings) -> {return_warnings, true};
357atom_option(time) -> {time, true};
358atom_option(verbose) -> {verbose, true};
359atom_option(Key) -> Key.
360
361is_filename(T) ->
362    try filename:flatten(T)
363    catch error: _ -> no
364    end.
365
366shorten_filename(Name0) ->
367    {ok,Cwd} = file:get_cwd(),
368    case string:prefix(Name0, Cwd) of
369        nomatch -> Name0;
370        Rest ->
371            case unicode:characters_to_list(Rest) of
372                "/"++N -> N;
373                N -> N
374            end
375    end.
376
377start(Infilex, Options) ->
378    Infile = assure_extension(Infilex, ".yrl"),
379    {_, Outfilex0} = lists:keyfind(parserfile, 1, Options),
380    {_, Includefilex} = lists:keyfind(includefile, 1, Options),
381    Outfilex = case Outfilex0 of
382                   [] -> filename:rootname(Infilex, ".yrl");
383                   _ -> Outfilex0
384               end,
385    Includefile = case Includefilex of
386                      [] -> [];
387                      _ -> assure_extension(Includefilex,".hrl")
388                  end,
389    IncludefileVersion = includefile_version(Includefile),
390    Outfile = assure_extension(Outfilex, ".erl"),
391    Module = list_to_atom(filename:basename(Outfile, ".erl")),
392    #yecc{infile = Infile,
393          outfile = Outfile,
394          includefile = Includefile,
395          includefile_version = IncludefileVersion,
396          module = Module,
397          options = Options,
398          verbose = member(verbose, Options),
399          file_attrs = member(file_attributes, Options)}.
400
401assure_extension(File, Ext) ->
402    concat([strip_extension(File, Ext), Ext]).
403
404%% Assumes File is a filename.
405strip_extension(File, Ext) ->
406    case filename:extension(File) of
407        Ext -> filename:rootname(File);
408        _Other -> File
409    end.
410
411infile(Parent, Infilex, Options) ->
412    St0 = start(Infilex, Options),
413    St = case file:open(St0#yecc.infile, [read, read_ahead]) of
414             {ok, Inport} ->
415                 try
416                     Encoding = epp:set_encoding(Inport),
417                     St1 = St0#yecc{inport = Inport, encoding = Encoding},
418                     outfile(St1)
419                 after
420                     ok = file:close(Inport)
421                 end;
422             {error, Reason} ->
423                 add_error(St0#yecc.infile, none, {file_error, Reason}, St0)
424         end,
425    case {St#yecc.errors, werror(St)} of
426        {[], false} -> ok;
427        _ -> _ = file:delete(St#yecc.outfile), ok
428    end,
429    Parent ! {self(), yecc_ret(St)}.
430
431werror(St) ->
432    St#yecc.warnings =/= []
433	andalso member(warnings_as_errors, St#yecc.options).
434
435outfile(St0) ->
436    case file:open(St0#yecc.outfile, [write, delayed_write]) of
437        {ok, Outport} ->
438            try
439                %% Set the same encoding as infile:
440                set_encoding(St0, Outport),
441                generate(St0#yecc{outport = Outport, line = 1})
442            catch
443                throw: St1  ->
444                    St1;
445                exit: Reason ->
446                    add_error({internal_error, Reason}, St0)
447            after
448               ok = file:close(Outport)
449            end;
450        {error, Reason} ->
451            add_error(St0#yecc.outfile, none, {file_error, Reason}, St0)
452    end.
453
454os_process_size() ->
455    case os:type() of
456        {unix, sunos} ->
457            Size = os:cmd("ps -o vsz -p " ++ os:getpid() ++ " | tail -1"),
458            list_to_integer(nonl(Size));
459        _ ->
460            0
461    end.
462
463nonl([$\n]) -> [];
464nonl([]) -> [];
465nonl([H|T]) -> [H|nonl(T)].
466
467timeit(Name, Fun, St0) ->
468    Time = runtime,
469    %% Time = wall_clock,
470    {Before, _} = statistics(Time),
471    St = Fun(St0),
472    {After, _} = statistics(Time),
473    Mem0 = erts_debug:flat_size(St)*erlang:system_info(wordsize),
474    Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])),
475    Sz = lists:flatten(io_lib:format("~.1f MB", [os_process_size()/1024])),
476    io:fwrite(" ~-30w: ~10.2f s ~12s ~10s\n",
477              [Name, (After-Before)/1000, Mem, Sz]),
478    St.
479
480-define(PASS(P), {P, fun P/1}).
481
482generate(St0) ->
483    St1 = output_encoding_comment(St0),
484    Passes = [?PASS(parse_grammar), ?PASS(check_grammar),
485              ?PASS(states_and_goto_table), ?PASS(parse_actions),
486              ?PASS(action_conflicts), ?PASS(write_file)],
487    F = case member(time, St1#yecc.options) of
488            true ->
489                io:fwrite(<<"Generating parser from grammar in ~ts\n">>,
490                          [format_filename(St1#yecc.infile, St1)]),
491                fun timeit/3;
492            false ->
493                fun(_Name, Fn, St) -> Fn(St) end
494        end,
495    Fun = fun({Name, Fun}, St) ->
496                  St2 = F(Name, Fun, St),
497                  if
498                      St2#yecc.errors =:= [] -> St2;
499                      true -> throw(St2)
500                  end
501          end,
502    foldl(Fun, St1, Passes).
503
504parse_grammar(St) ->
505    parse_grammar(St#yecc.inport, 1, St).
506
507parse_grammar(Inport, Line, St) ->
508    {NextLine, Grammar} = read_grammar(Inport, St, Line),
509    parse_grammar(Grammar, Inport, NextLine, St).
510
511parse_grammar(eof, _Inport, _NextLine, St) ->
512    St;
513parse_grammar({#symbol{name = 'Header'}, Ss}, Inport, NextLine, St0) ->
514    St1 = St0#yecc{header = [S || {string,_,S} <- Ss]},
515    parse_grammar(Inport, NextLine, St1);
516parse_grammar({#symbol{name = 'Erlang'}, [#symbol{name = code}]}, _Inport,
517              NextLine, St) ->
518    St#yecc{erlang_code = NextLine};
519parse_grammar(Grammar, Inport, NextLine, St0) ->
520    St = parse_grammar(Grammar, St0),
521    parse_grammar(Inport, NextLine, St).
522
523parse_grammar({error,ErrorLine,Error}, St) ->
524    add_error(erl_anno:new(ErrorLine), Error, St);
525parse_grammar({rule, Rule, Tokens}, St0) ->
526    NmbrOfDaughters = case Rule of
527                          [_, #symbol{name = '$empty'}]  -> 0;
528                          _ -> length(Rule) - 1
529                      end,
530    {IsGuard, IsWellFormed} = check_action(Tokens),
531    {Tokens1, St} = subst_pseudo_vars(Tokens,
532                                      NmbrOfDaughters,
533                                      St0),
534    RuleDef = #rule{symbols = Rule,
535                    tokens = Tokens1,
536                    is_guard = IsGuard,
537                    is_well_formed = IsWellFormed},
538    St#yecc{rules_list = [RuleDef | St#yecc.rules_list]};
539parse_grammar({prec, Prec}, St) ->
540    St#yecc{prec = Prec ++ St#yecc.prec};
541parse_grammar({#symbol{}, [{string,Anno,String}]}, St) ->
542    add_error(Anno, {bad_symbol, String}, St);
543parse_grammar({#symbol{anno = Anno, name = Name}, Symbols}, St) ->
544    CF = fun(I) ->
545                 case element(I, St) of
546                     [] ->
547                         setelement(I, St, Symbols);
548                     _ ->
549                         add_error(Anno, {duplicate_declaration, Name}, St)
550                 end
551         end,
552    OneSymbol = length(Symbols) =:= 1,
553    case Name of
554        'Nonterminals' -> CF(#yecc.nonterminals);
555        'Terminals' -> CF(#yecc.terminals);
556        'Rootsymbol' when OneSymbol -> CF(#yecc.rootsymbol);
557        'Endsymbol' when OneSymbol ->  CF(#yecc.endsymbol);
558        'Expect' when OneSymbol -> CF(#yecc.expect_shift_reduce);
559        'States' when OneSymbol -> CF(#yecc.expect_n_states); % undocumented
560        _ -> add_warning(Anno, bad_declaration, St)
561    end.
562
563read_grammar(Inport, St, Line) ->
564    case yeccscan:scan(Inport, '', Line) of
565        {eof, NextLine} ->
566            {NextLine, eof};
567        {error, {ErrorLine, Mod, What}, NextLine} ->
568            {NextLine, {error, ErrorLine, {error, Mod, What}}};
569        {error, terminated} ->
570            throw(St);
571        {error, _} ->
572            File = St#yecc.infile,
573            throw(add_error(File, none, cannot_parse, St));
574        {ok, Input, NextLine} ->
575            {NextLine, case yeccparser:parse(Input) of
576                           {error, {ErrorLine, Mod, Message}} ->
577                               {error, ErrorLine, {error, Mod, Message}};
578                           {ok, {rule, Rule, {erlang_code, Tokens}}} ->
579                               {rule, Rule, Tokens};
580                           {ok, {#symbol{name=P},
581                                 [#symbol{name=I} | OpL]}=Ss} ->
582                               A = precedence(P),
583                               if
584                                   A =/= unknown,
585                                   is_integer(I),
586                                   OpL =/= [] ->
587                                       Ps = [{Op, I , A} || Op <- OpL],
588                                       {prec, Ps};
589                                   true ->
590                                       Ss
591                               end;
592                           {ok, Ss} ->
593                               Ss
594                       end}
595    end.
596
597precedence('Left') -> left;
598precedence('Right') -> right;
599precedence('Unary') -> unary;
600precedence('Nonassoc') -> nonassoc;
601precedence(_) -> unknown.
602
603%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
604
605check_grammar(St0) ->
606    Empty = #symbol{anno = none, name = '$empty'},
607    AllSymbols = St0#yecc.nonterminals ++ St0#yecc.terminals ++ [Empty],
608    St1 = St0#yecc{all_symbols = AllSymbols},
609    Cs = [fun check_nonterminals/1, fun check_terminals/1,
610          fun check_rootsymbol/1, fun check_endsymbol/1,
611          fun check_expect/1, fun check_states/1,
612          fun check_precedences/1, fun check_rules/1],
613    foldl(fun(F, St) -> F(St) end, St1, Cs).
614
615check_nonterminals(St) ->
616    case St#yecc.nonterminals of
617        [] ->
618            add_error(nonterminals_missing, St);
619        Nonterminals ->
620            {Unique, Dups} = duplicates(names(Nonterminals)),
621            St1 = add_warnings(Dups, duplicate_nonterminal, St),
622            St2 = check_reserved(Unique, St1),
623            St2#yecc{nonterminals = [?ACCEPT | Unique]}
624    end.
625
626check_terminals(St0) ->
627    case St0#yecc.terminals of
628        [] ->
629            add_error(terminals_missing, St0);
630        Terminals ->
631            {Unique, Dups} = duplicates(names(Terminals)),
632            St1 = add_warnings(Dups, duplicate_terminal, St0),
633            Common = intersect(St1#yecc.nonterminals, Unique),
634            St2 = add_errors(Common, symbol_terminal_and_nonterminal, St1),
635            St3 = check_reserved(Unique, St2),
636            St3#yecc{terminals = ['$empty' | Unique]}
637    end.
638
639check_reserved(Names, St) ->
640    add_errors(intersect(Names, ['$empty', '$end', '$undefined']),
641               reserved, St).
642
643check_rootsymbol(St) ->
644    case St#yecc.rootsymbol of
645        [] ->
646            add_error(rootsymbol_missing, St);
647        [#symbol{anno = Anno, name = SymName}] ->
648            case kind_of_symbol(St, SymName) of
649                nonterminal ->
650                    St#yecc{rootsymbol = SymName};
651                _ ->
652                    add_error(Anno, {bad_rootsymbol, SymName}, St)
653            end
654    end.
655
656check_endsymbol(St) ->
657    case St#yecc.endsymbol of
658        [] ->
659            St#yecc{endsymbol = '$end'};
660        [#symbol{anno = Anno, name = SymName}] ->
661            case kind_of_symbol(St, SymName) of
662                nonterminal ->
663                    add_error(Anno, {endsymbol_is_nonterminal, SymName}, St);
664                terminal ->
665                    add_error(Anno, {endsymbol_is_terminal, SymName}, St);
666                _ ->
667                    St#yecc{endsymbol = SymName}
668            end
669    end.
670
671check_expect(St0) ->
672    case St0#yecc.expect_shift_reduce of
673        [] ->
674            St0#yecc{expect_shift_reduce = 0};
675        [#symbol{name = Expect}] when is_integer(Expect) ->
676            St0#yecc{expect_shift_reduce = Expect};
677        [#symbol{anno = Anno, name = Name}] ->
678            St1 = add_error(Anno, {bad_expect, Name}, St0),
679            St1#yecc{expect_shift_reduce = 0}
680    end.
681
682check_states(St) ->
683    case St#yecc.expect_n_states of
684        [] ->
685            St;
686        [#symbol{name = NStates}] when is_integer(NStates) ->
687            St#yecc{expect_n_states = NStates};
688        [#symbol{anno = Anno, name = Name}] ->
689            add_error(Anno, {bad_states, Name}, St)
690    end.
691
692check_precedences(St0) ->
693    {St1, _} =
694        foldr(fun({#symbol{anno = Anno, name = Op},_I,_A}, {St,Ps}) ->
695                      case member(Op, Ps) of
696                          true ->
697                              {add_error(Anno, {duplicate_precedence,Op}, St),
698                               Ps};
699                          false ->
700                              {St, [Op | Ps]}
701                      end
702              end, {St0,[]}, St0#yecc.prec),
703    foldl(fun({#symbol{anno = Anno, name = Op},I,A}, St) ->
704                  case kind_of_symbol(St, Op) of
705                      endsymbol ->
706                          add_error(Anno,{precedence_op_is_endsymbol,Op}, St);
707                      unknown ->
708                          add_error(Anno, {precedence_op_is_unknown, Op}, St);
709                      _ ->
710                          St#yecc{prec = [{Op,I,A} | St#yecc.prec]}
711                  end
712          end, St1#yecc{prec = []}, St1#yecc.prec).
713
714check_rule(Rule0, {St0,Rules}) ->
715    Symbols = Rule0#rule.symbols,
716    #symbol{anno = HeadAnno, name = Head} = hd(Symbols),
717    case member(Head, St0#yecc.nonterminals) of
718        false ->
719            {add_error(HeadAnno, {undefined_nonterminal, Head}, St0), Rules};
720        true ->
721            St = check_rhs(tl(Symbols), St0),
722            Rule = Rule0#rule{anno = HeadAnno, symbols = names(Symbols)},
723            {St, [Rule | Rules]}
724    end.
725
726check_rules(St0) ->
727    {St,Rules0} = foldl(fun check_rule/2, {St0,[]}, St0#yecc.rules_list),
728    case St#yecc.rules_list of
729        [] ->
730            add_error(no_grammar_rules, St);
731        _ ->
732            Rule = #rule{anno = none,
733                         symbols = [?ACCEPT, St#yecc.rootsymbol],
734                         tokens = []},
735            Rules1 = [Rule | Rules0],
736            Rules = map(fun({R,I}) -> R#rule{n = I} end,  count(0, Rules1)),
737            St#yecc{rules_list = Rules, rules = list_to_tuple(Rules)}
738    end.
739
740duplicates(List) ->
741    Unique = usort(List),
742    {Unique, List -- Unique}.
743
744names(Symbols) ->
745    map(fun(Symbol) -> Symbol#symbol.name end, Symbols).
746
747symbol_anno(Name, St) ->
748    #symbol{anno = Anno} = symbol_find(Name, St#yecc.all_symbols),
749    Anno.
750
751symbol_member(Symbol, Symbols) ->
752    symbol_find(Symbol#symbol.name, Symbols) =/= false.
753
754symbol_find(Name, Symbols) ->
755    lists:keyfind(Name, #symbol.name, Symbols).
756
757states_and_goto_table(St0) ->
758    St1 = create_symbol_table(St0),
759    St = compute_states(St1),
760    create_precedence_table(St).
761
762parse_actions(St) ->
763    _ = erase(), % the pd is used when decoding lookahead sets
764    ParseActions = compute_parse_actions(St#yecc.n_states, St, []),
765    _ = erase(),
766    St#yecc{parse_actions = ParseActions, state_tab = []}.
767
768action_conflicts(St0) ->
769    St = find_action_conflicts(St0),
770    St#yecc{conflicts_done = true}.
771
772-record(state_info, {reduce_only, state_repr, comment}).
773
774write_file(St0) ->
775    #yecc{parse_actions = ParseActions, goto_tab = GotoTab} = St0,
776    Sorted = sort_parse_actions(ParseActions),
777    StateReprs = find_identical_shift_states(Sorted),
778    StateInfo = collect_some_state_info(Sorted, StateReprs),
779    StateJumps = find_partial_shift_states(Sorted, StateReprs),
780    UserCodeActions = find_user_code(Sorted, St0),
781    #yecc{infile = Infile, outfile = Outfile,
782          inport = Inport, outport = Outport,
783          nonterminals = Nonterminals} = St0,
784    {St10, N_lines, LastErlangCodeLine} =
785        output_prelude(Outport, Inport, St0),
786    St20 = St10#yecc{line = St10#yecc.line + N_lines},
787    St25 = nl(St20),
788    St30 = output_file_directive(St25, Outfile, St25#yecc.line),
789    St40 = nl(St30),
790    St50 = output_actions(St40, StateJumps, StateInfo),
791    Go0 = [{Symbol,{From,To}} || {{From,Symbol},To} <- ets:tab2list(GotoTab)],
792    Go = family_with_domain(Go0, Nonterminals),
793    St60 = output_goto(St50, Go, StateInfo),
794    St70 = output_inlined(St60, UserCodeActions, Infile),
795    St = nl(St70),
796    case LastErlangCodeLine of
797        %% Just in case warnings or errors are emitted after the last
798        %% line of the file.
799        {last_erlang_code_line, Last_line} ->
800            output_file_directive(St, Infile, Last_line);
801        no_erlang_code ->
802            St
803    end.
804
805yecc_ret(St0) ->
806    St = check_expected(St0),
807    report_errors(St),
808    report_warnings(St),
809    Es = pack_errors(St#yecc.errors),
810    Ws = pack_warnings(St#yecc.warnings),
811    Werror = werror(St),
812    if
813        Werror ->
814            do_error_return(St, Es, Ws);
815        Es =:= [] ->
816            case member(return_warnings, St#yecc.options) of
817                true -> {ok, St#yecc.outfile, Ws};
818                false -> {ok, St#yecc.outfile}
819            end;
820        true ->
821            do_error_return(St, Es, Ws)
822    end.
823
824do_error_return(St, Es, Ws) ->
825    case member(return_errors, St#yecc.options) of
826        true -> {error, Es, Ws};
827        false -> error
828    end.
829
830check_expected(St0) ->
831    #yecc{shift_reduce = SR, reduce_reduce = RR, expect_shift_reduce = ExpSR,
832          n_states = NStates0, expect_n_states = ExpStates,
833          conflicts_done = Done} = St0,
834    N_RR = length(usort(RR)),
835    N_SR = length(usort(SR)),
836    St1 = if
837              not Done ->
838                  St0;
839              N_SR =:= ExpSR, N_RR =:= 0 ->
840                  St0;
841              true ->
842                  add_warning(none, {conflicts, N_SR, N_RR}, St0)
843          end,
844    NStates = NStates0 + 1,
845    if
846        (not Done) or (ExpStates =:= []) or (NStates =:= ExpStates) ->
847            St1;
848        true ->
849            add_warning(none, {n_states, ExpStates, NStates}, St1)
850    end.
851
852pack_errors([{File,_} | _] = Es) ->
853    [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}];
854pack_errors([]) ->
855    [].
856
857pack_warnings([{File,_} | _] = Ws) ->
858    [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}];
859pack_warnings([]) ->
860    [].
861
862report_errors(St) ->
863    case member(report_errors, St#yecc.options) of
864        true ->
865            foreach(fun({File,{none,Mod,E}}) ->
866                            io:fwrite(<<"~ts: ~ts\n">>,
867                                      [File,Mod:format_error(E)]);
868                       ({File,{Line,Mod,E}}) ->
869                            io:fwrite(<<"~ts:~w: ~ts\n">>,
870                                      [File,Line,Mod:format_error(E)])
871                    end, sort(St#yecc.errors));
872        false ->
873            ok
874    end.
875
876report_warnings(St) ->
877    Werror = member(warnings_as_errors, St#yecc.options),
878    Prefix = case Werror of
879		 true -> "";
880		 false -> "Warning: "
881	     end,
882    ReportWerror = Werror andalso member(report_errors, St#yecc.options),
883    case member(report_warnings, St#yecc.options) orelse ReportWerror of
884        true ->
885            foreach(fun({File,{none,Mod,W}}) ->
886                            io:fwrite(<<"~ts: ~s~ts\n">>,
887                                      [File,Prefix,
888				       Mod:format_error(W)]);
889                       ({File,{Line,Mod,W}}) ->
890                            io:fwrite(<<"~ts:~w: ~s~ts\n">>,
891                                      [File,Line,Prefix,
892				       Mod:format_error(W)])
893                    end, sort(St#yecc.warnings));
894        false ->
895            ok
896    end.
897
898add_error(E, St) ->
899    add_error(none, E, St).
900
901add_error(Anno, E, St) ->
902    add_error(St#yecc.infile, Anno, E, St).
903
904add_error(File, Anno, E, St) ->
905    Loc = location(Anno),
906    St#yecc{errors = [{File,{Loc,?MODULE,E}}|St#yecc.errors]}.
907
908add_errors(SymNames, E0, St0) ->
909    foldl(fun(SymName, St) ->
910                  add_error(symbol_anno(SymName, St), {E0, SymName}, St)
911          end, St0, SymNames).
912
913add_warning(Anno, W, St) ->
914    Loc = location(Anno),
915    St#yecc{warnings = [{St#yecc.infile,{Loc,?MODULE,W}}|St#yecc.warnings]}.
916
917add_warnings(SymNames, W0, St0) ->
918    foldl(fun(SymName, St) ->
919                  add_warning(symbol_anno(SymName, St), {W0, SymName}, St)
920          end, St0, SymNames).
921
922check_rhs([#symbol{name = '$empty'}], St) ->
923    St;
924check_rhs(Rhs, St0) ->
925    case symbol_find('$empty', Rhs) of
926        #symbol{anno = Anno} ->
927            add_error(Anno, illegal_empty, St0);
928        false ->
929            foldl(fun(Sym, St) ->
930                          case symbol_member(Sym, St#yecc.all_symbols) of
931                              true ->
932                                  St;
933                              false ->
934                                  E = {undefined_symbol,Sym#symbol.name},
935                                  add_error(Sym#symbol.anno, E, St)
936                          end
937                  end, St0, Rhs)
938    end.
939
940check_action(Tokens) ->
941    case erl_parse:parse_exprs(add_roberts_dot(Tokens, erl_anno:new(0))) of
942        {error, _Error} ->
943            {false, false};
944        {ok, [Expr | Exprs]} ->
945            IsGuard = Exprs =:= [] andalso erl_lint:is_guard_test(Expr),
946            {IsGuard, true}
947    end.
948
949add_roberts_dot([], Anno) ->
950    [{'dot', Anno}];
951add_roberts_dot([{'dot', Anno} | _], _) ->
952    [{'dot', Anno}];
953add_roberts_dot([Token | Tokens], _) ->
954    [Token | add_roberts_dot(Tokens, element(2, Token))].
955
956subst_pseudo_vars([], _, St) ->
957    {[], St};
958subst_pseudo_vars([H0 | T0], NmbrOfDaughters, St0) ->
959    {H, St1} = subst_pseudo_vars(H0, NmbrOfDaughters, St0),
960    {T, St} = subst_pseudo_vars(T0, NmbrOfDaughters, St1),
961    {[H | T], St};
962subst_pseudo_vars({atom, Anno, Atom}, NmbrOfDaughters, St0) ->
963    case atom_to_list(Atom) of
964        [$$ | Rest] ->
965            try list_to_integer(Rest) of
966                N when N > 0, N =< NmbrOfDaughters ->
967                    {{var, Anno, list_to_atom(append("__", Rest))}, St0};
968                _ ->
969                    St = add_error(Anno,
970                                   {undefined_pseudo_variable, Atom},
971                                   St0),
972                    {{atom, Anno, '$undefined'}, St}
973            catch
974                error: _ -> {{atom, Anno, Atom}, St0}
975            end;
976        _ ->
977            {{atom, Anno, Atom}, St0}
978    end;
979subst_pseudo_vars(Tuple, NmbrOfDaughters, St0) when is_tuple(Tuple) ->
980    {L, St} = subst_pseudo_vars(tuple_to_list(Tuple), NmbrOfDaughters, St0),
981    {list_to_tuple(L), St};
982subst_pseudo_vars(Something_else, _, St) ->
983    {Something_else, St}.
984
985kind_of_symbol(St, SymName) ->
986    case member(SymName, St#yecc.nonterminals) of
987        false ->
988            case member(SymName, St#yecc.terminals) of
989                false ->
990                    case St#yecc.endsymbol of
991                        SymName ->
992                            endsymbol;
993                        _ ->
994                            unknown
995                    end;
996                true ->
997                    terminal
998            end;
999        true ->
1000            nonterminal
1001    end.
1002
1003%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1004% Computing parse states and goto table from grammar.
1005% Start item: {0, [Endsymbol]} <->
1006% (['ACCEPT' '.', Rootsymbol], {'$'}) in Aho & Johnson
1007% where '$end' is the default end of input symbol of the
1008% scanner if no 'Endsymbol' has been declared in the syntax file.
1009
1010-record(tabs, {
1011          symbols,      % ETS-set, keypos 1: {SymbolName, SymbolCode}
1012          inv_symbols,  % ETS-set, keypos 2: {SymbolName, SymbolCode}
1013          state_id,     % ETS-bag, keypos 1: {StateId, StateNum}
1014                        % StateId is not unique for a state.
1015          rp_rhs,       % rule pointer -> the remaining rhs symbols
1016          rp_info,      % rule pointer -> expanding rules and lookahead
1017          goto          % ETS-bag, keypos 1: first
1018                        % {{FromStateNum, Symbol, ToStateNum}}, then
1019                        % {{FromStateNum, Symbol}, ToStateNum}
1020         }).
1021
1022-record(item, { % what states are made of
1023          rule_pointer,
1024          look_ahead,
1025          rhs
1026         }).
1027
1028compute_states(St0) ->
1029    SymbolTab = St0#yecc.symbol_tab,
1030    CodedRules = map(fun(#rule{symbols = Syms} = R) ->
1031                             R#rule{symbols = code_symbols(Syms, SymbolTab)}
1032                     end, St0#yecc.rules_list),
1033    CodedNonterminals = code_symbols(St0#yecc.nonterminals, SymbolTab),
1034    %% Only coded in this phase; StC is thrown away.
1035    StC = St0#yecc{rules_list = CodedRules,
1036                   rules = list_to_tuple(CodedRules),
1037                   nonterminals = CodedNonterminals},
1038    {RuleIndex, RulePointer2Rule} =
1039        make_rule_index(StC, St0#yecc.rules_list),
1040    StateTab0 = {},
1041    StateIdTab = ets:new(yecc_state_id, [set]),
1042    GotoTab = ets:new(yecc_goto, [bag]),
1043    RulePointerRhs = make_rhs_index(StC#yecc.rules_list),
1044    RulePointerInfo = make_rule_pointer_info(StC, RulePointerRhs, RuleIndex),
1045
1046    Tables = #tabs{symbols = SymbolTab,
1047                   state_id = StateIdTab,
1048                   rp_rhs = RulePointerRhs,
1049                   rp_info = RulePointerInfo,
1050                   goto = GotoTab},
1051
1052    _ = erase(),
1053    EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab),
1054    {StateId, State0} = compute_state([{EndsymCode, 1}], Tables),
1055
1056    StateNum0 = first_state(),
1057    FirstState = {StateNum0, State0},
1058    StateTab1 = insert_state(Tables, StateTab0, FirstState, StateId),
1059    {StateTab, N} =
1060        compute_states1([{StateNum0, get_current_symbols(State0)}],
1061                        FirstState, StateTab1, Tables),
1062    true = ets:delete(StateIdTab),
1063    St = St0#yecc{state_tab = StateTab, goto_tab = GotoTab, n_states = N,
1064                  rule_pointer2rule = RulePointer2Rule},
1065    decode_goto(GotoTab, St#yecc.inv_symbol_tab),
1066    check_usage(St).
1067
1068first_state() ->
1069    0.
1070
1071decode_goto(GotoTab, InvSymTab) ->
1072    G = ets:tab2list(GotoTab),
1073    ets:delete_all_objects(GotoTab),
1074    ets:insert(GotoTab,
1075               map(fun({{From, Sym, Next}}) ->
1076                           {{From, decode_symbol(Sym, InvSymTab)}, Next}
1077                   end, G)).
1078
1079check_usage(St0) ->
1080    SelSyms = ets:fun2ms(fun({{_,Sym},_}) -> Sym end),
1081    UsedSymbols = ets:select(St0#yecc.goto_tab, SelSyms),
1082    Syms = ordsets:from_list([?ACCEPT, '$empty' | UsedSymbols]),
1083    NonTerms = ordsets:from_list(St0#yecc.nonterminals),
1084    UnusedNonTerms = ordsets:to_list(ordsets:subtract(NonTerms, Syms)),
1085    St1 = add_warnings(UnusedNonTerms, unused_nonterminal, St0),
1086    Terms = ordsets:from_list(St0#yecc.terminals),
1087    St2 = add_warnings(ordsets:to_list(ordsets:subtract(Terms, Syms)),
1088                       unused_terminal, St1),
1089    DefinedNonTerminals = map(fun(#rule{symbols = [Name | _]}) ->
1090                                            Name
1091                              end, St2#yecc.rules_list),
1092    DefNonTerms = ordsets:from_list(DefinedNonTerminals),
1093    UndefNonTerms = ordsets:subtract(NonTerms, DefNonTerms),
1094    add_errors(ordsets:to_list(ordsets:subtract(UndefNonTerms,
1095                                                UnusedNonTerms)),
1096               missing_syntax_rule, St2).
1097
1098%% States are sometimes big, should not be copied to ETS tables.
1099%% Here an "extendible" tuple is used.
1100lookup_state(StateTab, N) ->
1101    element(N+1, StateTab).
1102
1103insert_state(#tabs{state_id = StateIdTab}, StateTab0, State, StateId) ->
1104    {N, _Items} = State,
1105    insert_state_id(StateIdTab, N, StateId),
1106    StateTab = if
1107                   tuple_size(StateTab0) > N ->
1108                       StateTab0;
1109                   true ->
1110                       list_to_tuple(tuple_to_list(StateTab0) ++
1111                                     lists:duplicate(round(1 + N * 1.5), []))
1112               end,
1113    setelement(N+1, StateTab, State).
1114
1115insert_state_id(StateIdTab, N, StateId) ->
1116    true = ets:insert(StateIdTab, {StateId, N}).
1117
1118compute_states1([], {N, _}=_CurrState, StateTab0, _Tables) ->
1119    {StateTab0, N};
1120compute_states1([{N, Symbols} | Try], CurrState, StateTab, Tables) ->
1121    {_N, S} = lookup_state(StateTab, N),
1122    Seeds = state_seeds(S, Symbols),
1123    compute_states2(Seeds, N, Try, CurrState, StateTab, Tables).
1124
1125compute_states2([], _N, Try, CurrState, StateTab, Tables) ->
1126    compute_states1(Try, CurrState, StateTab, Tables);
1127compute_states2([{Sym,Seed} | Seeds], N, Try, CurrState, StateTab, Tables) ->
1128    {StateId, NewState} = compute_state(Seed, Tables),
1129    case check_states(NewState, StateId, StateTab, Tables) of
1130        add ->
1131            {M, _} = CurrState,
1132            %% io:fwrite(<<"Adding state ~w\n">>, [M + 1]),
1133            CurrentSymbols = get_current_symbols(NewState),
1134            Next = M + 1,
1135            NextState = {Next, NewState},
1136            NewStateTab = insert_state(Tables, StateTab, NextState, StateId),
1137            insert_goto(Tables, N, Sym, Next),
1138            compute_states2(Seeds, N, [{Next, CurrentSymbols} | Try],
1139                            NextState, NewStateTab, Tables);
1140        {old, M} ->
1141            %% io:fwrite(<<"Identical to old state ~w\n">>, [M]),
1142            insert_goto(Tables, N, Sym, M),
1143            compute_states2(Seeds, N, Try, CurrState, StateTab, Tables);
1144        {merge, M, NewCurrent} ->
1145            %% io:fwrite(<<"Merging with state ~w\n">>, [M]),
1146            Try1 = case lists:keyfind(M, 1, Try) of
1147                       false ->
1148                           [{M, NewCurrent} | Try];
1149                       {_, OldCurrent} ->
1150                           case ordsets:is_subset(NewCurrent, OldCurrent) of
1151                               true ->
1152                                   Try;
1153                               false ->
1154                                   [{M, ordsets:union(NewCurrent, OldCurrent)}
1155                                    | keydelete(M, 1, Try)]
1156                           end
1157                   end,
1158            NewStateTab = merge_states(NewState, StateTab, Tables, M,StateId),
1159            insert_goto(Tables, N, Sym, M),
1160            compute_states2(Seeds, N, Try1, CurrState, NewStateTab, Tables)
1161    end.
1162
1163insert_goto(Tables, From, Sym, To) ->
1164    true = ets:insert(Tables#tabs.goto, {{From, Sym, To}}).
1165
1166%% Create an ets table for faster lookups.
1167create_symbol_table(St) ->
1168    #yecc{terminals = Terminals, endsymbol = Endsymbol} = St,
1169    SymbolTab = ets:new(yecc_symbols, [{keypos,1}]),
1170    %% '$empty' is always assigned 0
1171    Ts = ['$empty', Endsymbol | delete('$empty', Terminals)],
1172    TsC = count(0, Ts),
1173    NTsC = map(fun({NT,I}) -> {NT,-I} end, count(1, St#yecc.nonterminals)),
1174    Cs = TsC++NTsC,
1175    true = ets:insert(SymbolTab, Cs),
1176
1177    InvSymTable = ets:new(yecc_inverted_terminals, [{keypos,2}]),
1178    true = ets:insert(InvSymTable, Cs),
1179
1180    St#yecc{symbol_tab = SymbolTab, inv_symbol_tab = InvSymTable}.
1181
1182get_current_symbols(State) ->
1183    usort(get_current_symbols1(State, [])).
1184
1185get_current_symbols1([], Syms) ->
1186    Syms;
1187get_current_symbols1([#item{rhs = Rhs} | Items], Syms) ->
1188    case Rhs of
1189        [] ->
1190            get_current_symbols1(Items, Syms);
1191        [Symbol | _] ->
1192            get_current_symbols1(Items, [Symbol | Syms])
1193    end.
1194
1195state_seeds(Items, Symbols) ->
1196    L = [{S,{LA,RP + 1}} || #item{rule_pointer = RP, look_ahead = LA,
1197                                  rhs = [S | _]} <- Items],
1198    state_seeds1(keysort(1, L), Symbols).
1199
1200state_seeds1(_L, []) ->
1201    [];
1202state_seeds1(L, [Symbol | Symbols]) ->
1203    state_seeds(L, Symbol, Symbols, []).
1204
1205state_seeds([{Symbol, Item} | L], Symbol, Symbols, Is) ->
1206    state_seeds(L, Symbol, Symbols, [Item | Is]);
1207state_seeds([{S, _Item} | L], Symbol, Symbols, Is) when S < Symbol ->
1208    state_seeds(L, Symbol, Symbols, Is);
1209state_seeds(L, Symbol, Symbols, Is) ->
1210    [{Symbol, Is} | state_seeds1(L, Symbols)].
1211
1212compute_state(Seed, Tables) ->
1213    RpInfo = Tables#tabs.rp_info,
1214    foreach(fun({LA, RulePointer}) -> put(RulePointer, LA) end, Seed),
1215    foreach(fun({LA, RP}) -> compute_closure(LA, RP, RpInfo) end, Seed),
1216    Closure = keysort(1, erase()),
1217    state_items(Closure, [], [], Tables#tabs.rp_rhs).
1218
1219%% Collects a uniqe id for the state (all rule pointers).
1220state_items([{RP, LA} | L], Is, Id, RpRhs) ->
1221    I = #item{rule_pointer = RP, look_ahead = LA, rhs = element(RP, RpRhs)},
1222    state_items(L, [I | Is], [RP | Id], RpRhs);
1223state_items(_, Is, Id, _RpRhs) ->
1224    {Id, Is}.
1225
1226-compile({inline,[compute_closure/3]}).
1227compute_closure(Lookahead, RulePointer, RpInfo) ->
1228    case element(RulePointer, RpInfo) of
1229        []=Void -> % no followers, or terminal
1230            Void;
1231        {no_union, ExpandingRules, NewLookahead} ->
1232            compute_closure1(ExpandingRules, NewLookahead, RpInfo);
1233        {union, ExpandingRules, Lookahead0} ->
1234            NewLookahead = set_union(Lookahead0, Lookahead),
1235            compute_closure1(ExpandingRules, NewLookahead, RpInfo);
1236        ExpandingRules ->
1237            compute_closure1(ExpandingRules, Lookahead, RpInfo)
1238    end.
1239
1240compute_closure1([RulePointer | Tail], NewLookahead, RpInfo) ->
1241    compute_closure1(Tail, NewLookahead, RpInfo),
1242    case get(RulePointer) of
1243        undefined -> % New
1244            put(RulePointer, NewLookahead),
1245            compute_closure(NewLookahead, RulePointer, RpInfo);
1246        Lookahead2 ->
1247            Lookahead = set_union(Lookahead2, NewLookahead),
1248            if
1249                Lookahead =:= Lookahead2 -> % Old
1250                    Lookahead2; % void()
1251                true -> % Merge
1252                    put(RulePointer, Lookahead),
1253                    compute_closure(NewLookahead, RulePointer, RpInfo)
1254            end
1255    end;
1256compute_closure1(Nil, _, _RpInfo) ->
1257    Nil.
1258
1259%% Check if some old state is a superset of our NewState
1260check_states(NewState, StateId, StateTab, #tabs{state_id = StateIdTab}) ->
1261    try ets:lookup_element(StateIdTab, StateId, 2) of
1262        N ->
1263            {_N, OldState} = lookup_state(StateTab, N),
1264            check_state1(NewState, OldState, [], N)
1265    catch error:_ -> add
1266    end.
1267
1268check_state1([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1],
1269             [#item{look_ahead = Lookahead2} | Items2], Symbols, N) ->
1270    case set_is_subset(Lookahead1, Lookahead2) of
1271        true ->
1272            check_state1(Items1, Items2, Symbols, N);
1273        false ->
1274            case Rhs of
1275                [] ->
1276                    check_state2(Items1, Items2, Symbols, N);
1277                [Symbol | _] ->
1278                    check_state2(Items1, Items2, [Symbol | Symbols], N)
1279            end
1280    end;
1281check_state1([], [], _Symbols, N) ->
1282    {old, N}.
1283
1284check_state2([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1],
1285             [#item{look_ahead = Lookahead2} | Items2], Symbols, N) ->
1286    case set_is_subset(Lookahead1, Lookahead2) of
1287        true ->
1288            check_state2(Items1, Items2, Symbols, N);
1289        false ->
1290            case Rhs of
1291                [] ->
1292                    check_state2(Items1, Items2, Symbols, N);
1293                [Symbol | _] ->
1294                    check_state2(Items1, Items2, [Symbol | Symbols], N)
1295            end
1296    end;
1297check_state2([], [], Symbols, N) ->
1298    {merge, N, usort(Symbols)}.
1299
1300merge_states(NewState, StateTab, Tables, M, StateId) ->
1301    {_M, Old_state} = lookup_state(StateTab, M),
1302    MergedState = merge_states1(NewState, Old_state),
1303    insert_state(Tables, StateTab, {M, MergedState}, StateId).
1304
1305merge_states1([Item1 | Items1], [Item2 | Items2]) ->
1306    LA1 = Item1#item.look_ahead,
1307    LA2 = Item2#item.look_ahead,
1308    if
1309        LA1 =:= LA2 ->
1310            [Item1 | merge_states1(Items1, Items2)];
1311        true ->
1312            [Item1#item{look_ahead = set_union(LA1, LA2)}
1313             | merge_states1(Items1, Items2)]
1314    end;
1315merge_states1(_, _) ->
1316    [].
1317
1318%% RulePointer -> Rhs. Every position Rhs in has its unique "rule pointer".
1319make_rhs_index(RulesList) ->
1320    Index = flatmap(fun(#rule{symbols = [_Non | Daughters]}) ->
1321                            suffixes0(Daughters)
1322                    end, RulesList),
1323    list_to_tuple(Index).
1324
1325suffixes0([?EMPTY]) ->
1326    [[], []];
1327suffixes0(L) ->
1328    suffixes(L).
1329
1330suffixes([]=L) ->
1331    [L];
1332suffixes([_ | T]=L) ->
1333    [L | suffixes(T)].
1334
1335%% Setup info about lookahead and expanding rules for each point
1336%% ("rule pointer") in the grammar.
1337make_rule_pointer_info(StC, RpRhs, RuleIndex) ->
1338    SymbolTab = StC#yecc.symbol_tab,
1339    LcTab = make_left_corner_table(StC),
1340    LA_index = map(fun(Syms) ->
1341                           rp_info(Syms, SymbolTab, LcTab, RuleIndex)
1342                   end, tuple_to_list(RpRhs)),
1343    list_to_tuple(LA_index).
1344
1345rp_info([], _SymbolTab, _LcTab, _RuleIndex) ->
1346    [];
1347rp_info([Category | Followers], SymbolTab, LcTab, RuleIndex) ->
1348    case maps:find(Category, RuleIndex) of
1349        error -> % terminal
1350            [];
1351        {ok, ExpandingRules} when Followers =:= [] ->
1352            ExpandingRules;
1353        {ok, ExpandingRules} ->
1354            case make_lookahead(Followers, SymbolTab, LcTab, set_empty()) of
1355                {empty, LA} ->
1356                    {union, ExpandingRules, LA};
1357                LA ->
1358                    {no_union, ExpandingRules, LA}
1359            end
1360    end.
1361
1362%% Lookahead computation is complicated by the possible existence
1363%% of null string rewriting rules, such as A -> '$empty'.
1364make_lookahead([], _, _, LA) ->
1365    {empty, LA};
1366make_lookahead([Symbol | Symbols], SymbolTab, LcTab, LA) ->
1367    case maps:find(Symbol, LcTab) of
1368        {ok, LeftCorner} -> % nonterminal
1369            case empty_member(LeftCorner) of
1370                true ->
1371                    make_lookahead(Symbols, SymbolTab, LcTab,
1372                                   set_union(empty_delete(LeftCorner), LA));
1373                false ->
1374                    set_union(LeftCorner, LA)
1375            end;
1376        error -> % terminal
1377            set_add(Symbol, LA)
1378    end.
1379
1380%% -> map-of({Nonterminal, [Terminal]}).
1381%% The algorithm FIRST/1 from the Dragon Book.
1382%% Left corner table, all terminals (including '$empty') that can
1383%% begin strings generated by Nonterminal.
1384make_left_corner_table(#yecc{rules_list = RulesList} = St) ->
1385    SymbolTab = left_corner_symbol_table(St),
1386    Rules = map(fun(#rule{symbols = [Lhs | Rhs]}) ->
1387                        {Lhs,{Lhs, Rhs}}
1388                end, RulesList),
1389    LeftHandTab = maps:from_list(family(Rules)),
1390    X0 = [{S,H} || {H,{H,Rhs}} <- Rules,
1391                   S <- Rhs,
1392                   not is_terminal(SymbolTab, S)],
1393    XL = family_with_domain(X0, St#yecc.nonterminals),
1394    X = maps:from_list(XL),
1395    Xref = fun(NT) -> maps:get(NT, X) end,
1396    E = set_empty(),
1397    LC0 = maps:from_list([{H, E} || {H,_} <- XL]),
1398    %% Handle H -> a S, where a is a terminal ('$empty' inclusive).
1399    {Q, LC1} =
1400        foldl(fun({H,{H,[S | _]}}, {Q0, LC}) ->
1401                      case ets:lookup(SymbolTab, S) of
1402                          [{_,Num}=SymbolAndNum] when Num >= 0 ->
1403                              F = set_add_terminal(SymbolAndNum, E),
1404                              {[Xref(H) | Q0], upd_first(H, F, LC)};
1405                          _ ->
1406                              {Q0, LC}
1407                      end
1408              end, {[], LC0}, Rules),
1409    left_corners(Q, LC1, LeftHandTab, SymbolTab, Xref).
1410
1411left_corners(Q0, LC0, LeftHandTab, SymbolTab, Xref) ->
1412    case usort(append(Q0)) of
1413        [] ->
1414            LC0;
1415        Q1 ->
1416            Rs = flatmap(fun(NT) -> maps:get(NT, LeftHandTab) end, Q1),
1417            {LC, Q} = left_corners2(Rs, LC0, [], SymbolTab, Xref),
1418            left_corners(Q, LC, LeftHandTab, SymbolTab, Xref)
1419    end.
1420
1421left_corners2([], LC, Q, _SymbolTab, _Xref) ->
1422    {LC, Q};
1423left_corners2([{Head,Rhs} | Rs], LC, Q0, SymbolTab, Xref) ->
1424    Ts = left_corner_rhs(Rhs, Head, LC, set_empty(), SymbolTab),
1425    First0 = maps:get(Head, LC),
1426    case set_is_subset(Ts, First0) of
1427        true ->
1428            left_corners2(Rs, LC, Q0, SymbolTab, Xref);
1429        false ->
1430            LC1 = upd_first(Head, Ts, LC),
1431            left_corners2(Rs, LC1, [Xref(Head) | Q0], SymbolTab, Xref)
1432    end.
1433
1434upd_first(NT, Ts, LC) ->
1435    maps:update_with(NT, fun(First) -> set_union(First, Ts) end, LC).
1436
1437left_corner_rhs([S | Ss], Head, LC, Ts, SymbolTab) ->
1438    case ets:lookup(SymbolTab, S) of
1439        [{_,Num}=SymbolAndNum] when Num >= 0 ->
1440            set_add_terminal(SymbolAndNum, Ts);
1441        [_NonTerminalSymbol] ->
1442            First = maps:get(S, LC),
1443            case empty_member(First) of
1444                true ->
1445                    NTs = set_union(empty_delete(First), Ts),
1446                    left_corner_rhs(Ss, Head, LC, NTs, SymbolTab);
1447                false ->
1448                    set_union(First, Ts)
1449            end
1450    end;
1451left_corner_rhs([], _Head, _LC, Ts, _SymbolTab) ->
1452    set_add(?EMPTY, Ts).
1453
1454%% For every non-terminal return a list of "rule pointers" for rules
1455%% expanding the non-terminal.
1456%% Also assigns a unique number to each point in the grammar, "rule pointer".
1457make_rule_index(#yecc{nonterminals = Nonterminals,
1458                      rules_list = RulesList}, RulesListNoCodes) ->
1459    {RulesL, _N} =
1460        lists:mapfoldl(fun(#rule{symbols = [Nonterminal | Daughters]}, I) ->
1461                               I1 = I + length(Daughters)+1,
1462                               {{Nonterminal, I}, I1}
1463                       end, 1, RulesList),
1464    IndexedTab = family_with_domain(RulesL, Nonterminals),
1465
1466    Symbol2Rule = [{Foo,R} || #rule{symbols = Symbols}=R <- RulesListNoCodes,
1467                              Foo <- Symbols],
1468    Pointer2Rule = [{I, R} || {{_Foo,R},I} <- count(1, Symbol2Rule)],
1469    {maps:from_list(IndexedTab), maps:from_list(Pointer2Rule)}.
1470
1471%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1472% Computing parse action table from list of states and goto table:
1473
1474compute_parse_actions(N, St, StateActions) ->
1475    case N < first_state() of
1476        true ->
1477            StateActions;
1478        false ->
1479            {N, StateN} = lookup_state(St#yecc.state_tab, N),
1480            %% There can be duplicates in Actions.
1481            Actions = compute_parse_actions1(StateN, N, St),
1482            compute_parse_actions(N - 1, St, [{N, Actions} | StateActions])
1483    end.
1484
1485compute_parse_actions1([], _, _) ->
1486    [];
1487compute_parse_actions1([#item{rule_pointer = RulePointer,
1488                              look_ahead = Lookahead0,
1489                              rhs = Rhs} | Items], N, St) ->
1490    case Rhs of
1491        [] ->
1492            Lookahead = decode_terminals(Lookahead0, St#yecc.inv_symbol_tab),
1493            case rule(RulePointer, St) of
1494                {[?ACCEPT | _], _RuleLine, _} ->
1495                    [{Lookahead, accept}
1496                     | compute_parse_actions1(Items, N, St)];
1497                %% Head is placed after the daughters when finding the
1498                %% precedence. This is how giving precedence to
1499                %% non-terminals takes effect.
1500                {[Head | Daughters0], _RuleLine, _} ->
1501                    Daughters = delete('$empty', Daughters0),
1502                    [{Lookahead,
1503                      #reduce{rule_nmbr = RulePointer, head = Head,
1504                              nmbr_of_daughters = length(Daughters),
1505                              prec = get_prec(Daughters ++ [Head], St)}}
1506                     | compute_parse_actions1(Items, N, St)]
1507            end;
1508        [Symbol | Daughters] ->
1509            case is_terminal(St#yecc.symbol_tab, Symbol) of
1510                true ->
1511                    DecSymbol = decode_symbol(Symbol, St#yecc.inv_symbol_tab),
1512                    {[Head | _], _RuleLine, _} = rule(RulePointer, St),
1513                    %% A bogus shift-shift conflict can be introduced
1514                    %% here if some terminal occurs in different rules
1515                    %% which have been given precedence "one level up".
1516                    Prec1 = case Daughters of
1517                                [] -> get_prec([DecSymbol, Head], St);
1518                                _ -> get_prec([DecSymbol], St)
1519                            end,
1520                    Pos = case Daughters of
1521                              [] -> z;
1522                              _ -> a
1523                          end,
1524                    [{[DecSymbol],
1525                      #shift{state = goto(N, DecSymbol, St),
1526                             pos = Pos,
1527                             prec = Prec1,
1528                             rule_nmbr = RulePointer}}
1529                     | compute_parse_actions1(Items, N, St)];
1530                false ->
1531                    compute_parse_actions1(Items, N, St)
1532            end
1533    end.
1534
1535get_prec(Symbols, St) ->
1536    get_prec1(Symbols, St#yecc.prec_tab, {0, none}).
1537
1538get_prec1([], _, P) ->
1539    P;
1540get_prec1([Symbol | T], PrecTab, P) ->
1541    case ets:lookup(PrecTab, Symbol) of
1542        [] ->
1543            get_prec1(T, PrecTab, P);
1544        [{_, N, Ass}] ->
1545            get_prec1(T, PrecTab, {N, Ass})
1546    end.
1547
1548create_precedence_table(St) ->
1549    PrecTab = ets:new(yecc_precedences, []),
1550    true = ets:insert(PrecTab, St#yecc.prec),
1551    St#yecc{prec_tab = PrecTab}.
1552
1553-record(cxt, {terminal, state_n, yecc, res}).
1554
1555%% Detects shift-reduce and reduce-reduce conflicts.
1556%% Also removes all but one conflicting action. As a consequence the
1557%% lookahead sets for a state are always disjoint.
1558%% Reduce/reduce conflicts are considered errors.
1559find_action_conflicts(St0) ->
1560    Cxt0 = #cxt{yecc = St0, res = []},
1561    {#cxt{yecc = St, res = Res}, NewParseActions0} =
1562        foldl(fun({N, Actions0}, {Cxt1, StateActions}) ->
1563                      L = [{Terminal, Act} || {Lookahead, Act} <- Actions0,
1564                                              Terminal <- Lookahead],
1565                      {Cxt, Actions} =
1566                          foldl(fun({Terminal, As}, {Cxt2,Acts0}) ->
1567                                        Cxt3 = Cxt2#cxt{terminal = Terminal,
1568                                                        state_n = N},
1569                                        {Action, Cxt} =
1570                                            find_action_conflicts2(As, Cxt3),
1571                                        {Cxt,[{Action,Terminal} | Acts0]}
1572                                end, {Cxt1,[]}, family(L)),
1573                      {Cxt,[{N,inverse(family(Actions))} | StateActions]}
1574              end, {Cxt0, []}, St0#yecc.parse_actions),
1575    if
1576        length(Res) > 0, St#yecc.verbose ->
1577            io:fwrite(<<"\n*** Conflicts resolved by operator "
1578                        "precedences:\n\n">>),
1579            foreach(fun({Confl, Name}) ->
1580                            report_conflict(Confl, St, Name, prec)
1581                    end, reverse(Res)),
1582            io:fwrite(<<"*** End of resolved conflicts\n\n">>);
1583        true ->
1584            ok
1585    end,
1586    NewParseActions = reverse(NewParseActions0),
1587    St#yecc{parse_actions = NewParseActions}.
1588
1589find_action_conflicts2([Action], Cxt) ->
1590    {Action, Cxt};
1591find_action_conflicts2([#shift{state = St, pos = Pos, prec = Prec},
1592                        #shift{state = St}=S | As],
1593                       Cxt) when Pos =:= a; Prec =:= {0,none} ->
1594    %% This is a kludge to remove the bogus shift-shift conflict
1595    %% introduced in compute_parse_actions1().
1596    find_action_conflicts2([S | As], Cxt);
1597find_action_conflicts2([#shift{state = NewState, pos = z}=S1,
1598                        #shift{state = NewState}=S2 | _], Cxt) ->
1599    %% This is even worse than last clause. Give up.
1600    Confl = conflict(S1, S2, Cxt),
1601    #cxt{yecc = St0} = Cxt,
1602    St = conflict_error(Confl, St0),
1603    {S1, Cxt#cxt{yecc = St}}; % return any action
1604find_action_conflicts2([#shift{prec = {P1, Ass1}}=S | Rs], Cxt0) ->
1605    {R, Cxt1} = find_reduce_reduce(Rs, Cxt0),
1606    #cxt{res = Res0, yecc = St0} = Cxt1,
1607    #reduce{prec = {P2, Ass2}} = R,
1608    Confl = conflict(R, S, Cxt1),
1609    if
1610        P1 > P2 ->
1611            {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}};
1612        P2 > P1 ->
1613            {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}};
1614        Ass1 =:= left, Ass2 =:= left ->
1615            {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}};
1616        Ass1 =:= right, Ass2 =:= right ->
1617            {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}};
1618        Ass1 =:= nonassoc, Ass2 =:= nonassoc ->
1619            {nonassoc, Cxt1};
1620        P1 =:= 0, P2 =:= 0 ->
1621            report_conflict(Confl, St0, shift, default),
1622            St = add_conflict(Confl, St0),
1623            {S, Cxt1#cxt{yecc = St}};
1624        true ->
1625            St = conflict_error(Confl, St0),
1626            {S, Cxt1#cxt{yecc = St}} % return any action
1627    end;
1628find_action_conflicts2(Rs, Cxt0) ->
1629    find_reduce_reduce(Rs, Cxt0).
1630
1631find_reduce_reduce([R], Cxt) ->
1632    {R, Cxt};
1633find_reduce_reduce([accept=A, #reduce{}=R | Rs], Cxt0) ->
1634    Confl = conflict(R, A, Cxt0),
1635    St = conflict_error(Confl, Cxt0#cxt.yecc),
1636    Cxt = Cxt0#cxt{yecc = St},
1637    find_reduce_reduce([R | Rs], Cxt);
1638find_reduce_reduce([#reduce{head = Categ1, prec = {P1, _}}=R1,
1639                    #reduce{head = Categ2, prec = {P2, _}}=R2 | Rs], Cxt0) ->
1640    #cxt{res = Res0, yecc = St0} = Cxt0,
1641    Confl = conflict(R1, R2, Cxt0),
1642    {R, Res, St} =
1643        if
1644            P1 > P2 ->
1645                {R1, [{Confl, Categ1} | Res0], St0};
1646            P2 > P1 ->
1647                {R2, [{Confl, Categ2} | Res0], St0};
1648            true ->
1649                St1 = conflict_error(Confl, St0),
1650                {R1, Res0, St1}
1651        end,
1652    Cxt = Cxt0#cxt{res = Res, yecc = St},
1653    find_reduce_reduce([R | Rs], Cxt).
1654
1655%% Since the lookahead sets are disjoint (assured by
1656%% find_action_conflicts), the order between actions can be chosen
1657%% almost arbitrarily. nonassoc has to come last, though (but is later
1658%% discarded!). And shift has to come before reduce.
1659sort_parse_actions([]) ->
1660    [];
1661sort_parse_actions([{N, La_actions} | Tail]) ->
1662    [{N, sort_parse_actions1(La_actions)} | sort_parse_actions(Tail)].
1663
1664sort_parse_actions1(LaActions) ->
1665    As = filter(fun({_LA, A}) -> A =:= accept end, LaActions),
1666    Ss = filter(fun({_LA, A}) -> is_record(A, shift) end, LaActions),
1667    Rs = filter(fun({_LA, A}) -> is_record(A, reduce) end, LaActions),
1668    Ns = filter(fun({_LA, A}) -> A =:= nonassoc end, LaActions),
1669    As ++ Ss ++ Rs ++ Ns.
1670
1671%% -> {State, StateRepr}. StateRepr has the same set of shift actions
1672%% as State. No code will be output for State if State =/= StateRepr.
1673find_identical_shift_states(StateActions) ->
1674    L1 = [{Actions, State} || {State,Actions} <- StateActions],
1675    {SO, NotSO} = lists:partition(fun({Actions,_States}) ->
1676                                          shift_actions_only(Actions)
1677                                  end, family(L1)),
1678    R = [{State, hd(States)} || {_Actions, States} <- SO, State <- States]
1679        ++
1680        [{State, State} || {_Actions, States} <- NotSO, State <- States],
1681    lists:keysort(1, R).
1682
1683-record(part_data, {name, eq_state, actions, n_actions, states}).
1684
1685%% Replace {SStates,Actions} with {SStates,{Actions,Jump}} where
1686%% Jump describes which clauses that have been extracted from shift
1687%% states so that they can be used from other states. Some space is
1688%% saved.
1689find_partial_shift_states(StateActionsL, StateReprs) ->
1690    L = [{State, Actions} || {{State,Actions}, {State,State}} <-
1691                                 lists:zip(StateActionsL, StateReprs),
1692                             shift_actions_only(Actions)],
1693    StateActions = sofs:family(L, [{state,[action]}]),
1694    StateAction = sofs:family_to_relation(StateActions),
1695
1696    %% Two actions are equal if they occur in the same states:
1697    Parts = sofs:partition(sofs:range(StateActions)),
1698    PartsL = sofs:to_external(Parts),
1699    %% Assign temporary names to the parts of the partition (of actions):
1700    PartNameL = lists:zip(seq1(length(PartsL)), PartsL),
1701    ActPartL = [{Action,PartName} ||
1702                   {PartName,Actions} <- PartNameL,
1703                   Action <- Actions],
1704    ActionPartName = sofs:relation(ActPartL, [{action,partname}]),
1705    StatePartName = sofs:relative_product(StateAction, ActionPartName),
1706    PartInStates = sofs:relation_to_family(sofs:converse(StatePartName)),
1707
1708    %% Parts that equal all actions of a state:
1709    PartActions = sofs:family(PartNameL, [{partname,[action]}]),
1710    PartState =
1711        sofs:relative_product(PartActions, sofs:converse(StateActions)),
1712    PartStates = sofs_family_with_domain(PartState, sofs:domain(PartActions)),
1713
1714    PartDataL = [#part_data{name = Nm, eq_state = EqS, actions = P,
1715                            n_actions = length(P),
1716                            states = ordsets:from_list(S)} ||
1717                    {{Nm,P}, {Nm,S}, {Nm,EqS}} <-
1718                        lists:zip3(PartNameL,
1719                                   sofs:to_external(PartInStates),
1720                                   sofs:to_external(PartStates))],
1721    true = length(PartDataL) =:= length(PartNameL),
1722    Ps = select_parts(PartDataL),
1723
1724    J1 = [{State, Actions, {jump_some,hd(States)}} ||
1725             {_W, #part_data{actions = Actions, eq_state = [],
1726                             states = States}} <- Ps,
1727             State <- States],
1728    J2 = [{State, Actions, {jump_all,To}} ||
1729             {_W, #part_data{actions = Actions, eq_state = EqS,
1730                             states = States}} <- Ps,
1731             To <- EqS,
1732             State <- States,
1733             State =/= To],
1734    J = lists:keysort(1, J1 ++ J2),
1735
1736    JumpStates = ordsets:from_list([S || {S,_,_} <- J]),
1737    {JS, NJS} =
1738        sofs:partition(1, sofs:relation(StateActionsL, [{state, actions}]),
1739                       sofs:set(JumpStates, [state])),
1740    R =
1741        [{S, {Actions,jump_none}} || {S,Actions} <- sofs:to_external(NJS)]
1742        ++
1743        [{S, {Actions--Part, {Tag,ToS,Part}}} ||
1744            {{S,Actions}, {S,Part,{Tag,ToS}}} <-
1745                lists:zip(sofs:to_external(JS), J)],
1746    true = length(StateActionsL) =:= length(R),
1747    lists:keysort(1, R).
1748
1749%% Very greedy. By no means optimal.
1750select_parts([]) ->
1751    [];
1752select_parts(PartDataL) ->
1753    T1 = [{score(PD), PD} || PD <- PartDataL],
1754    [{W, PD} | Ws] = lists:reverse(lists:keysort(1, T1)),
1755    #part_data{n_actions = NActions, states = S} = PD,
1756    if
1757        W < 8 -> % don't bother
1758            [];
1759        true ->
1760            %% Cannot extract more clauses from the chosen part's states:
1761            NL = [D#part_data{states = NewS} ||
1762                     {W1, #part_data{states = S0}=D} <- Ws,
1763                     W1 > 0,
1764                     (NewS = ordsets:subtract(S0, S)) =/= []],
1765            if
1766                length(S) =:= 1; NActions =:= 1 ->
1767                    select_parts(NL);
1768                true ->
1769                    [{W,PD} | select_parts(NL)]
1770            end
1771    end.
1772
1773%% Does it pay off to extract clauses into a new function?
1774%% Assumptions:
1775%% - a call costs 8 (C = 8);
1776%% - a clause (per action) costs 20 plus 8 (select) (Cl = 28);
1777%% - a new function costs 20 (funinfo) plus 16 (select) (F = 36).
1778%% A is number of actions, S is number of states.
1779%% Special case (the part equals all actions of some state):
1780%% C * (S - 1) < (S - 1) * A * Cl
1781%% Normal case (introduce new function):
1782%% F + A * Cl + C * S < S * A * Cl
1783score(#part_data{n_actions = NActions, eq_state = [], states = S}) ->
1784    (length(S) * NActions * 28) - (36 + NActions * 28 + length(S) * 8);
1785score(#part_data{n_actions = NActions, states = S}) ->
1786    ((length(S) - 1) * NActions * 28) - (8 * (length(S) - 1)).
1787
1788shift_actions_only(Actions) ->
1789    length([foo || {_Ts,{shift,_,_,_,_}} <- Actions]) =:= length(Actions).
1790
1791collect_some_state_info(StateActions, StateReprs) ->
1792    RF = fun({_LA, A}) -> is_record(A, reduce) end,
1793    L = [{State,
1794          begin
1795              RO = lists:all(RF, LaActions),
1796              %% C is currently always ""; identical states are all shift.
1797              C = [io_lib:fwrite(<<" %% ~w\n">>, [State]) ||
1798                      true <- [RO], Repr =/= State],
1799              #state_info{reduce_only = RO, state_repr = Repr, comment = C}
1800          end} ||
1801            {{State, LaActions}, {State, Repr}} <-
1802                lists:zip(StateActions, StateReprs)],
1803    list_to_tuple(L).
1804
1805conflict_error(Conflict, St0) ->
1806    St1 = add_conflict(Conflict, St0),
1807    add_error({conflict, Conflict}, St1).
1808
1809report_conflict(Conflict, St, ActionName, How) ->
1810    if
1811        St#yecc.verbose ->
1812            io:fwrite(<<"~s\n">>, [format_conflict(Conflict)]),
1813            Formated = format_symbol(ActionName),
1814            case How of
1815                prec ->
1816                    io:fwrite(<<"Resolved in favor of ~ts.\n\n">>, [Formated]);
1817                default ->
1818                    io:fwrite(<<"Conflict resolved in favor of ~ts.\n\n">>,
1819                              [Formated])
1820            end;
1821        true ->
1822            ok
1823    end.
1824
1825add_conflict(Conflict, St) ->
1826    case Conflict of
1827        {Symbol, StateN, _, {reduce, _, _, _}} ->
1828            St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]};
1829        {Symbol, StateN, _, {accept, _}} ->
1830            St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]};
1831        {Symbol, StateN, _, {shift, _, _}} ->
1832            St#yecc{shift_reduce = [{StateN,Symbol} | St#yecc.shift_reduce]};
1833        {_Symbol, _StateN, {one_level_up, _, _}, _Confl} ->
1834            St
1835    end.
1836
1837conflict(#shift{prec = Prec1, rule_nmbr = RuleNmbr1},
1838         #shift{prec = Prec2, rule_nmbr = RuleNmbr2}, Cxt) ->
1839    %% Conflict due to precedences "one level up". Kludge.
1840    #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt,
1841    {_, L1, RuleN1} = rule(RuleNmbr1, St),
1842    {_, L2, RuleN2} = rule(RuleNmbr2, St),
1843    Confl = {one_level_up, {L1, RuleN1, Prec1}, {L2, RuleN2, Prec2}},
1844    {Symbol, N, Confl, Confl};
1845conflict(#reduce{rule_nmbr = RuleNmbr1}, NewAction, Cxt) ->
1846    #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt,
1847    {R1, RuleLine1, RuleN1} = rule(RuleNmbr1, St),
1848    Confl = case NewAction of
1849                accept ->
1850                    {accept, St#yecc.rootsymbol};
1851                #reduce{rule_nmbr = RuleNmbr2} ->
1852                    {R2, RuleLine2, RuleN2} = rule(RuleNmbr2, St),
1853                    {reduce, R2, RuleN2, RuleLine2};
1854                #shift{state = NewState} ->
1855                    {shift, NewState, last(R1)}
1856            end,
1857    {Symbol, N, {R1, RuleN1, RuleLine1}, Confl}.
1858
1859format_conflict({Symbol, N, _, {one_level_up,
1860                                {L1, RuleN1, {P1, Ass1}},
1861                                {L2, RuleN2, {P2, Ass2}}}}) ->
1862    S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when "
1863                         "scanning ~ts in state ~w:\n">>,
1864                       [format_symbol(Symbol), N]),
1865    S2 = io_lib:fwrite(<<"   ~s ~w (rule ~w at line ~w)\n"
1866                          "      vs.\n">>,
1867                       [format_assoc(Ass1), P1, RuleN1, L1]),
1868    S3 = io_lib:fwrite(<<"   ~s ~w (rule ~w at line ~w)\n">>,
1869                       [format_assoc(Ass2), P2, RuleN2, L2]),
1870    [S1, S2, S3];
1871format_conflict({Symbol, N, Reduce, Confl}) ->
1872    S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol "
1873                         "~ts in state ~w:\n">>, [format_symbol(Symbol), N]),
1874    S2 = case Reduce of
1875             {[HR | TR], RuleNmbr, RuleLine} ->
1876                 io_lib:fwrite(<<"   Reduce to ~ts from ~ts (rule ~w at "
1877                                 "line ~w)\n      vs.\n">>,
1878                               [format_symbol(HR), format_symbols(TR),
1879                                RuleNmbr, RuleLine])
1880         end,
1881    S3 = case Confl of
1882             {reduce, [HR2|TR2], RuleNmbr2, RuleLine2} ->
1883                 io_lib:fwrite(<<"   reduce to ~ts from ~ts "
1884                                 "(rule ~w at line ~w).">>,
1885                               [format_symbol(HR2), format_symbols(TR2),
1886                                RuleNmbr2, RuleLine2]);
1887             {shift, NewState, Sym} ->
1888                 io_lib:fwrite(<<"   shift to state ~w, adding right "
1889                                 "sisters to ~ts.">>,
1890                               [NewState, format_symbol(Sym)]);
1891             {accept, Rootsymbol} ->
1892                 io_lib:fwrite(<<"   reduce to rootsymbol ~ts.">>,
1893                               [format_symbol(Rootsymbol)])
1894         end,
1895    [S1, S2, S3].
1896
1897%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1898% Code generation:
1899
1900%% The version up to and including parsetools-1.3 is called "1.0".
1901%%
1902%% "1.1", parsetools-1.4:
1903%% - the prologue file has been updated;
1904%% - nonassoc is new;
1905%% - different order of clauses;
1906%% - never more than one clause matching a given symbol in a given state;
1907%% - file attributes relate messages to .yrl file;
1908%% - actions put in inlined functions;
1909%% - a few other minor fixes.
1910%%
1911%% "1.2", parsetools-1.4.2:
1912%% - the generated code has been changed as follows:
1913%%   - yeccpars2() calls the functions yeccpars2_State();
1914%%   - several states can share yeccpars2_State(), which reduces code size;
1915%%   - yeccgoto() has been split on one function per nonterminal;
1916%%   - several minor changes have made the loaded code smaller.
1917%% - the include file yeccpre.hrl has been changed incompatibly.
1918%%
1919%% "1.3", parsetools-1.4.4:
1920%% - the generated code has been changed as follows:
1921%%   - yeccgoto_T() no longer returns the next state, but calls yeccpars_S();
1922%%   - yeccpars2() is not called when it is known which yeccpars2_S() to call;
1923%%   - "__Stack" has been substituted for "Stack";
1924%%   - several states can share yeccpars2_S_cont(), which reduces code size;
1925%%   - instead if calling lists:nthtail() matching code is emitted.
1926%%
1927%% "1.4", parsetools-2.0.4:
1928%% - yeccerror() is called when a syntax error is found (as in version 1.1).
1929%% - the include file yeccpre.hrl has been changed.
1930
1931-define(CODE_VERSION, "1.4").
1932-define(YECC_BUG(M, A),
1933        unicode:characters_to_binary(
1934          [" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",",
1935           io_lib:fwrite(M, A), "}).\n\n"])).
1936
1937%% Returns number of newlines in included files.
1938output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] ->
1939    St5 = output_header(St0),
1940    #yecc{infile = Infile, module = Module} = St5,
1941    St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]),
1942    St20 =
1943        fwrite(St10,
1944               <<"-export([parse/1, parse_and_scan/1, format_error/1]).\n">>,
1945               []),
1946    {St25, N_lines_1, LastErlangCodeLine} =
1947        case St20#yecc.erlang_code of
1948            none ->
1949                {St20, 0, no_erlang_code};
1950            Next_line ->
1951                St_10 = output_file_directive(St20, Infile, Next_line-1),
1952                Last_line = include1([], Inport, Outport, Infile,
1953                                     Next_line, St_10),
1954                Nmbr_of_lines = Last_line - Next_line,
1955                {St_10, Nmbr_of_lines, {last_erlang_code_line, Last_line}}
1956    end,
1957    St30 = nl(St25),
1958    IncludeFile =
1959        filename:join([code:lib_dir(parsetools), "include","yeccpre.hrl"]),
1960    %% Maybe one could assume there are no warnings in this file.
1961    St = output_file_directive(St30, IncludeFile, 0),
1962    N_lines_2 = include(St, IncludeFile, Outport),
1963    {St, N_lines_1 + N_lines_2, LastErlangCodeLine};
1964output_prelude(Outport, Inport, St0) ->
1965    St5 = output_header(St0),
1966    #yecc{infile = Infile, module = Module, includefile = Includefile} = St5,
1967    St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]),
1968    St20 = output_file_directive(St10, Includefile, 0),
1969    N_lines_1 = include(St20, Includefile, Outport),
1970    St30 = nl(St20),
1971    case St30#yecc.erlang_code of
1972        none ->
1973            {St30, N_lines_1, no_erlang_code};
1974        Next_line ->
1975            St = output_file_directive(St30, Infile, Next_line-1),
1976            Last_line = include1([], Inport, Outport, Infile, Next_line, St),
1977            Nmbr_of_lines = Last_line - Next_line,
1978            {St, Nmbr_of_lines + N_lines_1, {last_erlang_code_line, Last_line}}
1979    end.
1980
1981output_header(St0) ->
1982    lists:foldl(fun(Str, St) -> fwrite(St, <<"~ts\n">>, [Str])
1983                end, St0, St0#yecc.header).
1984
1985output_goto(St, [{_Nonterminal, []} | Go], StateInfo) ->
1986    output_goto(St, Go, StateInfo);
1987output_goto(St0, [{Nonterminal, List} | Go], StateInfo) ->
1988    F = function_name(St0, yeccgoto, Nonterminal),
1989    St05 = fwrite(St0, <<"-dialyzer({nowarn_function, ~w/7}).\n">>, [F]),
1990    St10 = output_goto1(St05, List, F, StateInfo, true),
1991    St = output_goto_fini(F, Nonterminal, St10),
1992    output_goto(St, Go, StateInfo);
1993output_goto(St, [], _StateInfo) ->
1994    St.
1995
1996output_goto1(St0, [{From, To} | Tail], F, StateInfo, IsFirst) ->
1997    St10 = delim(St0, IsFirst),
1998    {To, ToInfo} = lookup_state(StateInfo, To),
1999    #state_info{reduce_only = RO, state_repr = Repr, comment = C} = ToInfo,
2000    if
2001        RO ->
2002            %% Reduce actions do not use the state, so we just pass
2003            %% the old (now bogus) on:
2004            FromS = io_lib:fwrite("~w=_S", [From]),
2005            ToS = "_S";
2006        true ->
2007            FromS = io_lib:fwrite("~w", [From]),
2008            ToS = io_lib:fwrite("~w", [To])
2009    end,
2010    St20 = fwrite(St10, <<"~w(~s, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>,
2011                  [F,FromS]),
2012    St30 = fwrite(St20, <<"~s">>, [C]),
2013    %% Short-circuit call to yeccpars2:
2014    St = fwrite(St30, <<" yeccpars2_~w(~s, Cat, Ss, Stack, T, Ts, Tzr)">>,
2015                [Repr, ToS]),
2016    output_goto1(St, Tail, F, StateInfo, false);
2017output_goto1(St, [], _F, _StateInfo, _IsFirst) ->
2018    St.
2019
2020output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) ->
2021    %% Backward compatibility.
2022    St10 = delim(St0, false),
2023    St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>,
2024                [F]),
2025    fwrite(St,
2026           ?YECC_BUG(<<"{~ts, State, missing_in_goto_table}">>,
2027                     [quoted_atom(St0, NT)]),
2028           []);
2029output_goto_fini(_F, _NT, St) ->
2030    fwrite(St, <<".\n\n">>, []).
2031
2032%% Find actions having user code.
2033find_user_code(ParseActions, St) ->
2034    [#user_code{state = State,
2035                terminal = Terminal,
2036                funname = inlined_function_name(St, State, Terminal),
2037                action = Action} ||
2038        {State, La_actions} <- ParseActions,
2039        {Action, Terminals, RuleNmbr, NmbrOfDaughters}
2040            <- find_user_code2(La_actions),
2041        case tokens(RuleNmbr, St) of
2042            [{var, _, '__1'}] -> NmbrOfDaughters =/= 1;
2043            _ -> true
2044        end,
2045        Terminal <- Terminals].
2046
2047find_user_code2([]) ->
2048    [];
2049find_user_code2([{_, #reduce{rule_nmbr = RuleNmbr,
2050                             nmbr_of_daughters = NmbrOfDaughters}
2051                  =Action}]) ->
2052    %% Same optimization as in output_state_actions.
2053    [{Action, ["Cat"], RuleNmbr, NmbrOfDaughters}];
2054find_user_code2([{La, #reduce{rule_nmbr = RuleNmbr,
2055                              nmbr_of_daughters = NmbrOfDaughters}
2056                  =Action} | T]) ->
2057    [{Action,La, RuleNmbr, NmbrOfDaughters} | find_user_code2(T)];
2058find_user_code2([_ | T]) ->
2059    find_user_code2(T).
2060
2061output_actions(St0, StateJumps, StateInfo) ->
2062    %% Not all the clauses of the dispatcher function yeccpars2() can
2063    %% be reached. Only when shifting, that is, calling yeccpars1(),
2064    %% will yeccpars2() be called.
2065    Y2CL = [NewState || {_State,{Actions,J}} <- StateJumps,
2066                        {_LA, #shift{state = NewState}} <-
2067                            (Actions
2068                             ++ [A || {_Tag,_To,Part} <- [J], A <- Part])],
2069    Y2CS = ordsets:from_list([0 | Y2CL]),
2070    Y2S = ordsets:from_list([S || {S,_} <- StateJumps]),
2071    NY2CS = ordsets:subtract(Y2S, Y2CS),
2072    Sel = [{S,true} || S <- ordsets:to_list(Y2CS)] ++
2073          [{S,false} || S <- ordsets:to_list(NY2CS)],
2074
2075    SelS = [{State,Called} ||
2076               {{State,_JActions}, {State,Called}} <-
2077                   lists:zip(StateJumps, lists:keysort(1, Sel))],
2078    St05 =
2079        fwrite(St0, <<"-dialyzer({nowarn_function, yeccpars2/7}).\n">>, []),
2080    St10 = foldl(fun({State, Called}, St_0) ->
2081                         {State, #state_info{state_repr = IState}} =
2082                             lookup_state(StateInfo, State),
2083                         output_state_selection(St_0, State, IState, Called)
2084            end, St05, SelS),
2085    St20 = fwrite(St10, <<"yeccpars2(Other, _, _, _, _, _, _) ->\n">>, []),
2086    St = fwrite(St20,
2087                ?YECC_BUG(<<"{missing_state_in_action_table, Other}">>, []),
2088                []),
2089    foldl(fun({State, JActions}, St_0) ->
2090                  {State, #state_info{state_repr = IState}} =
2091                      lookup_state(StateInfo, State),
2092                  output_state_actions(St_0, State, IState,
2093                                       JActions, StateInfo)
2094          end, St, StateJumps).
2095
2096output_state_selection(St0, State, IState, Called) ->
2097    Comment = [<<"%% ">> || false <- [Called]],
2098    St = fwrite(St0, <<"~syeccpars2(~w=S, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>,
2099                [Comment, State]),
2100    fwrite(St,
2101           <<"~s yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr);\n">>,
2102           [Comment, IState]).
2103
2104output_state_actions(St, State, State, {Actions,jump_none}, SI) ->
2105    St1 = output_state_actions_begin(St, State, Actions),
2106    output_state_actions1(St1, State, Actions, true, normal, SI);
2107output_state_actions(St0, State, State, {Actions, Jump}, SI) ->
2108    {Tag, To, Common} = Jump,
2109    CS = case Tag of
2110             jump_some -> list_to_atom(lists:concat([cont_, To]));
2111             jump_all -> To
2112         end,
2113    St = output_state_actions1(St0, State, Actions, true, {to, CS}, SI),
2114    if
2115        To =:= State ->
2116            St1 = output_state_actions_begin(St, State, Actions),
2117            output_state_actions1(St1, CS, Common, true, normal, SI);
2118        true ->
2119            St
2120    end;
2121output_state_actions(St, State, JState, _XActions, _SI) ->
2122    fwrite(St, <<"%% yeccpars2_~w: see yeccpars2_~w\n\n">>, [State, JState]).
2123
2124output_state_actions_begin(St, State, Actions) ->
2125    case [yes || {_, #reduce{}} <- Actions] of
2126        [] ->
2127            fwrite(St, <<"-dialyzer({nowarn_function, yeccpars2_~w/7}).\n">>,
2128                   [State]); % Only when yeccerror(T) is output.
2129        _ -> St
2130    end.
2131
2132output_state_actions1(St, State, [], IsFirst, normal, _SI) ->
2133    output_state_actions_fini(State, IsFirst, St);
2134output_state_actions1(St0, State, [], IsFirst, {to, ToS}, _SI) ->
2135    St = delim(St0, IsFirst),
2136    fwrite(St,
2137           <<"yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr) ->\n"
2138            " yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr).\n\n">>,
2139           [State, ToS]);
2140output_state_actions1(St0, State, [{_, #reduce{}=Action}],
2141                      IsFirst, _End, SI) ->
2142    St = output_reduce(St0, State, "Cat", Action, IsFirst, SI),
2143    fwrite(St, <<".\n\n">>, []);
2144output_state_actions1(St0, State, [{Lookahead,Action} | Tail],
2145                      IsFirst, End, SI) ->
2146    {_, St} =
2147        foldl(fun(Terminal, {IsFst,St_0}) ->
2148                      {false,
2149                       output_action(St_0, State, Terminal, Action, IsFst,SI)}
2150              end, {IsFirst,St0}, Lookahead),
2151    output_state_actions1(St, State, Tail, false, End, SI).
2152
2153output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) ->
2154    output_reduce(St, State, Terminal, Action, IsFirst, SI);
2155output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) ->
2156    St10 = delim(St0, IsFirst),
2157    St = fwrite(St10, <<"yeccpars2_~w(S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>,
2158                [State, quoted_atom(St10, Terminal)]),
2159    output_call_to_includefile(NewState, St);
2160output_action(St0, State, Terminal, accept, IsFirst, _SI) ->
2161    St10 = delim(St0, IsFirst),
2162    St = fwrite(St10,
2163                <<"yeccpars2_~w(_S, ~ts, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>,
2164                [State, quoted_atom(St10, Terminal)]),
2165    fwrite(St, <<" {ok, hd(Stack)}">>, []);
2166output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) ->
2167    St.
2168
2169output_call_to_includefile(NewState, #yecc{includefile_version = {1,1}}=St) ->
2170    %% Backward compatibility.
2171    fwrite(St, <<" yeccpars1(Ts, Tzr, ~w, [S | Ss], [T | Stack])">>,
2172           [NewState]);
2173output_call_to_includefile(NewState, St) ->
2174    fwrite(St, <<" yeccpars1(S, ~w, Ss, Stack, T, Ts, Tzr)">>,
2175           [NewState]).
2176
2177output_state_actions_fini(State, IsFirst, St0) ->
2178    %% Backward compatible.
2179    St10 = delim(St0, IsFirst),
2180    St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]),
2181    fwrite(St, <<" yeccerror(T).\n\n">>, []).
2182
2183output_reduce(St0, State, Terminal,
2184              #reduce{rule_nmbr = RuleNmbr,
2185                      head = Head,
2186                      nmbr_of_daughters = NmbrOfDaughters},
2187              IsFirst, StateInfo) ->
2188    St10 = delim(St0, IsFirst),
2189    QuotedTerminal = if
2190                         is_atom(Terminal) -> quoted_atom(St10, Terminal);
2191                         true -> Terminal
2192                     end,
2193    St20 = fwrite(St10,
2194                  <<"yeccpars2_~w(_S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>,
2195                  [State, QuotedTerminal]),
2196    St30 =
2197        if
2198            NmbrOfDaughters < 2 ->
2199                Ns = "Ss",
2200                St20;
2201            true ->
2202                Ns = "Nss",
2203                Tmp = lists:join(",",
2204                                  lists:duplicate(NmbrOfDaughters - 1, "_")),
2205                fwrite(St20, <<" [~s|Nss] = Ss,\n">>, [Tmp])
2206        end,
2207    St40 = case tokens(RuleNmbr, St30) of
2208               [{var, _, '__1'}] when NmbrOfDaughters =:= 1 ->
2209                   NewStack = "Stack",
2210                   St30;
2211               _ ->
2212                   NewStack = "NewStack",
2213                   fwrite(St30, <<" NewStack = ~w(Stack),\n">>,
2214                          [inlined_function_name(St30, State, Terminal)])
2215               end,
2216    if
2217        NmbrOfDaughters =:= 0 ->
2218            NextState = goto(State, Head, St40),
2219            {NextState, I} = lookup_state(StateInfo, NextState),
2220            #state_info{reduce_only = RO, state_repr = Repr, comment = C} = I,
2221            %% Reduce actions do not use the state, so we just pass
2222            %% the old (now bogus) on:
2223            if
2224                RO -> NextS = "_S";
2225                true -> NextS = io_lib:fwrite("~w", [NextState])
2226            end,
2227            St = fwrite(St40, <<"~s">>, [C]),
2228            %% Short-circuit call to yeccpars2:
2229            fwrite(St,
2230                   <<" yeccpars2_~w(~s, ~ts, [~w | Ss], ~s, T, Ts, Tzr)">>,
2231                   [Repr, NextS, QuotedTerminal, State, NewStack]);
2232        true ->
2233            fwrite(St40,
2234                   <<" ~w(hd(~s), ~ts, ~s, ~s, T, Ts, Tzr)">>,
2235                   [function_name(St40, yeccgoto, Head), Ns,
2236                    QuotedTerminal, Ns, NewStack])
2237    end.
2238
2239delim(St, true) ->
2240    St;
2241delim(St, false) ->
2242    fwrite(St, <<";\n">>, []).
2243
2244quoted_atom(#yecc{encoding = latin1}, Atom) when is_atom(Atom) ->
2245    io_lib:write_atom_as_latin1(Atom);
2246quoted_atom(_St, Atomic) ->
2247    io_lib:write(Atomic).
2248
2249output_inlined(St, UserCodeActions, Infile) ->
2250    foldl(fun(#user_code{funname = InlinedFunctionName,
2251                         action = Action}, St_0) ->
2252                  output_inlined(St_0, InlinedFunctionName,
2253                                 Action, Infile)
2254          end, St, UserCodeActions).
2255
2256%% Each action with user code is placed in a separate inlined function.
2257%% The purpose is to be able to pinpoint errors and warnings correctly.
2258output_inlined(St0, FunctionName, Reduce, Infile) ->
2259    #reduce{rule_nmbr = RuleNmbr, nmbr_of_daughters = N_daughters} = Reduce,
2260    #rule{tokens = Tokens, is_well_formed = WF} = get_rule(RuleNmbr, St0),
2261    Line0 = first_line(Tokens),
2262    NLines = last_line(Tokens) - Line0,
2263
2264    St5 = if
2265              WF ->
2266                  St0;
2267              not WF ->
2268                  %% The compiler will generate an error message for
2269                  %% the inlined function (unless the reason that yecc
2270                  %% failed to parse the action was some macro). The
2271                  %% line number of the message will be correct since
2272                  %% we are keeping track of the current line of the
2273                  %% output file...
2274                  #yecc{outfile = Outfile, line = CurLine} = St0,
2275                  output_file_directive(St0, Outfile, CurLine)
2276          end,
2277
2278    CodeStartLine = lists:max([0, Line0 - 4]),
2279    St10 = fwrite(St5, <<"-compile({inline,~w/1}).\n">>, [FunctionName]),
2280    St20 = output_file_directive(St10, Infile, CodeStartLine),
2281    St30 = fwrite(St20, <<"~w(__Stack0) ->\n">>, [FunctionName]),
2282    %% Currently the (old) inliner emits less code if matching the
2283    %% stack inside the body rather than in the head...
2284    St40 = case N_daughters of
2285               0 ->
2286                   Stack = "__Stack0",
2287                   St30;
2288               _ ->
2289                   Stack = "__Stack",
2290                   A = concat(flatmap(fun(I) -> [",__",I] end,
2291                                      lists:seq(N_daughters, 1, -1))),
2292                   fwrite(St30, <<" ~s = __Stack0,\n">>,
2293                          [append(["[", tl(A), " | __Stack]"])])
2294           end,
2295    St = St40#yecc{line = St40#yecc.line + NLines},
2296    fwrite(St, <<" [begin\n  ~ts\n  end | ~s].\n\n">>,
2297           [pp_tokens(Tokens, Line0, St#yecc.encoding), Stack]).
2298
2299inlined_function_name(St, State, Terminal) ->
2300    End = case Terminal of
2301              "Cat" -> [];
2302              _ -> [quoted_atom(St, Terminal)]
2303          end,
2304    list_to_atom(concat([yeccpars2_, State, '_'] ++ End)).
2305
2306-compile({nowarn_unused_function,function_name/3}).
2307function_name(St, Name, Suf) ->
2308    list_to_atom(concat([Name, '_'] ++ [quoted_atom(St, Suf)])).
2309
2310rule(RulePointer, St) ->
2311    #rule{n = N, anno = Anno, symbols = Symbols} =
2312        maps:get(RulePointer, St#yecc.rule_pointer2rule),
2313    {Symbols, Anno, N}.
2314
2315get_rule(RuleNmbr, St) ->
2316    maps:get(RuleNmbr, St#yecc.rule_pointer2rule).
2317
2318tokens(RuleNmbr, St) ->
2319    Rule = maps:get(RuleNmbr, St#yecc.rule_pointer2rule),
2320    Rule#rule.tokens.
2321
2322goto(From, Symbol, St) ->
2323    case ets:lookup(St#yecc.goto_tab, {From, Symbol}) of
2324        [{_, To}] ->
2325            To;
2326        [] ->
2327            erlang:error({error_in_goto_table, From, Symbol})
2328    end.
2329
2330%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2331% Auxiliaries:
2332
2333-ifdef(SYMBOLS_AS_CODES).
2334
2335%%% Bit mask operations.
2336
2337-compile({inline,[set_empty/0]}).
2338set_empty() ->
2339    0.
2340
2341set_add(I, BM) ->
2342    (1 bsl I) bor BM.
2343
2344-compile({inline,[set_member/2]}).
2345set_member(I, BM) ->
2346    ((1 bsl I) band BM) =/= 0.
2347
2348%% Assumes I is a member...
2349-compile({inline,[set_delete/2]}).
2350set_delete(I, BM) ->
2351    (1 bsl I) bxor BM.
2352
2353-compile({inline,[set_union/2]}).
2354set_union(BM1, BM2) ->
2355    BM1 bor BM2.
2356
2357-compile({inline,[set_is_subset/2]}).
2358set_is_subset(BM1, BM2) ->
2359    (BM1 band BM2) =:= BM1.
2360
2361empty_member(BM) ->
2362    set_member(0, BM).
2363
2364empty_delete(BM) ->
2365    set_delete(0, BM).
2366
2367code_symbols(Ss, SymbolTable) ->
2368    map(fun(S) -> ets:lookup_element(SymbolTable, S, 2) end, Ss).
2369
2370decode_symbol(C, InvSymbolTable) ->
2371    ets:lookup_element(InvSymbolTable, C, 1).
2372
2373code_terminal(T, SymbolTab) ->
2374    set_add(ets:lookup_element(SymbolTab, T, 2), 0).
2375
2376decode_terminals(BM, InvSymbolTab) ->
2377    case get(BM) of
2378        undefined ->
2379            Symbols = decode_terminals(BM, 0, InvSymbolTab),
2380            put(BM, Symbols),
2381            Symbols;
2382        Symbols ->
2383            Symbols
2384    end.
2385
2386decode_terminals(0, _I, _InvSymbolTab) ->
2387    [];
2388decode_terminals(BM, I, InvSymbolTab) ->
2389    case set_member(I, BM) of
2390        true ->
2391            [ets:lookup_element(InvSymbolTab, I, 1)
2392             | decode_terminals(set_delete(I, BM), I+1, InvSymbolTab)];
2393        false ->
2394            decode_terminals(BM, I+1, InvSymbolTab)
2395    end.
2396
2397set_add_terminal({_Symbol, TerminalNum}, BM) ->
2398    set_add(TerminalNum, BM).
2399
2400-compile({inline,[is_terminal/2]}).
2401is_terminal(_Tab, SymbolCode) ->
2402    SymbolCode >= 0.
2403
2404left_corner_symbol_table(St) ->
2405    St#yecc.inv_symbol_tab.
2406
2407-else.
2408
2409set_empty() ->
2410    [].
2411
2412set_add(Symbol, L) ->
2413    ordsets:union([Symbol], L).
2414
2415set_union(Es1, Es2) ->
2416    ordsets:union(Es1, Es2).
2417
2418set_is_subset(Es1, Es2) ->
2419    ordsets:is_subset(Es1, Es2).
2420
2421code_symbols(Ss, _SymbolTab) ->
2422    Ss.
2423
2424decode_symbol(S, _InvSymbolTab) ->
2425    S.
2426
2427code_terminal(T, _SymbolTab) ->
2428    [T].
2429
2430decode_terminals(Ts, _InvSymbolTab) ->
2431    Ts.
2432
2433empty_member(['$empty' | _]) ->
2434    true;
2435empty_member(_) ->
2436    false.
2437
2438empty_delete(['$empty' | Terminals]) ->
2439    Terminals.
2440
2441set_add_terminal({Symbol, _TerminalNum}, L) ->
2442    set_add(Symbol, L).
2443
2444is_terminal(Tab, SymbolName) ->
2445   ets:lookup_element(Tab, SymbolName, 2) >= 0.
2446
2447left_corner_symbol_table(St) ->
2448    St#yecc.symbol_tab.
2449
2450-endif. % SYMBOLS_AS_CODES
2451
2452intersect(L1, L2) ->
2453    ordsets:to_list(ordsets:intersection(ordsets:from_list(L1),
2454                                         ordsets:from_list(L2))).
2455
2456format_symbols([Sym | Syms]) ->
2457    concat([format_symbol(Sym) | format_symbols1(Syms)]).
2458
2459format_symbols1([]) ->
2460    [];
2461format_symbols1([H | T]) ->
2462    [" ", format_symbol(H) | format_symbols1(T)].
2463
2464include(St, File, Outport) ->
2465    case file:open(File, [read]) of
2466        {error, Reason} ->
2467            throw(add_error(File, none, {file_error, Reason}, St));
2468        {ok, Inport} ->
2469            _ = epp:set_encoding(Inport),
2470            Line = io:get_line(Inport, ''),
2471            try include1(Line, Inport, Outport, File, 1, St) - 1
2472            after ok = file:close(Inport)
2473            end
2474    end.
2475
2476include1(eof, _, _, _File, L, _St) ->
2477    L;
2478include1({error, _}=_Error, _Inport, _Outport, File, L, St) ->
2479    throw(add_error(File, erl_anno:new(L), cannot_parse, St));
2480include1(Line, Inport, Outport, File, L, St) ->
2481    Incr = case member($\n, Line) of
2482               true -> 1;
2483               false -> 0
2484           end,
2485    io:put_chars(Outport, Line),
2486    include1(io:get_line(Inport, ''), Inport, Outport, File, L + Incr, St).
2487
2488includefile_version([]) ->
2489    {1,4};
2490includefile_version(Includefile) ->
2491    case epp:open(Includefile, []) of
2492        {ok, Epp} ->
2493            try
2494                parse_file(Epp)
2495            after
2496                epp:close(Epp)
2497            end;
2498        {error, _Error} ->
2499            {1,1}
2500    end.
2501
2502parse_file(Epp) ->
2503    case epp:parse_erl_form(Epp) of
2504        {ok, {function,_Anno,yeccpars1,7,_Clauses}} ->
2505            {1,4};
2506        {eof,_Line} ->
2507            {1,1};
2508        _Form ->
2509            parse_file(Epp)
2510    end.
2511
2512%% Keeps the line breaks of the original code.
2513pp_tokens(Tokens, Line0, Enc) ->
2514    concat(pp_tokens1(Tokens, Line0, Enc, [])).
2515
2516pp_tokens1([], _Line0, _Enc, _T0) ->
2517    [];
2518pp_tokens1([T | Ts], Line0, Enc, T0) ->
2519    Line = location(anno(T)),
2520    [pp_sep(Line, Line0, T0), pp_symbol(T, Enc)|pp_tokens1(Ts, Line, Enc, T)].
2521
2522pp_symbol({var,_,Var}, _Enc) -> Var;
2523pp_symbol({string,_,String}, latin1) ->
2524    io_lib:write_string_as_latin1(String);
2525pp_symbol({string,_,String}, _Enc) -> io_lib:write_string(String);
2526pp_symbol({_,_,Symbol}, latin1) -> io_lib:fwrite(<<"~p">>, [Symbol]);
2527pp_symbol({_,_,Symbol}, _Enc) -> io_lib:fwrite(<<"~tp">>, [Symbol]);
2528pp_symbol({Symbol, _}, _Enc) -> Symbol.
2529
2530pp_sep(Line, Line0, T0) when Line > Line0 ->
2531    ["\n   " | pp_sep(Line - 1, Line0, T0)];
2532pp_sep(_Line, _Line0, {'.',_}) ->
2533    "";
2534pp_sep(_Line, _Line0, _T0) ->
2535    " ".
2536
2537set_encoding(#yecc{encoding = none}, Port) ->
2538    ok = io:setopts(Port, [{encoding, epp:default_encoding()}]);
2539set_encoding(#yecc{encoding = E}, Port) ->
2540    ok = io:setopts(Port, [{encoding, E}]).
2541
2542output_encoding_comment(#yecc{encoding = none}=St) ->
2543    St;
2544output_encoding_comment(#yecc{encoding = Encoding}=St) ->
2545    fwrite(St, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]).
2546
2547output_file_directive(St, Filename, Line) when St#yecc.file_attrs ->
2548    fwrite(St, <<"-file(~ts, ~w).\n">>,
2549           [format_filename(Filename, St), Line]);
2550output_file_directive(St, _Filename, _Line) ->
2551    St.
2552
2553first_line(Tokens) ->
2554    location(anno(hd(Tokens))).
2555
2556last_line(Tokens) ->
2557    location(anno(lists:last(Tokens))).
2558
2559location(none) -> none;
2560location(Anno) ->
2561    erl_anno:line(Anno).
2562
2563anno(Token) ->
2564    element(2, Token).
2565
2566%% Keep track of the current line in the generated file.
2567fwrite(#yecc{outport = Outport, line = Line}=St, Format, Args) ->
2568    NLines = count_nl(Format),
2569    io:fwrite(Outport, Format, Args),
2570    St#yecc{line = Line + NLines}.
2571
2572%% Assumes \n is used, and never ~n.
2573count_nl(<<$\n,Rest/binary>>) ->
2574    1 + count_nl(Rest);
2575count_nl(<<_,Rest/binary>>) ->
2576    count_nl(Rest);
2577count_nl(<<>>) ->
2578    0.
2579
2580nl(#yecc{outport = Outport, line = Line}=St) ->
2581    io:nl(Outport),
2582    St#yecc{line = Line + 1}.
2583
2584format_filename(Filename0, St) ->
2585    Filename = filename:flatten(Filename0),
2586    case lists:keyfind(encoding, 1, io:getopts(St#yecc.outport)) of
2587        {encoding, unicode} -> io_lib:write_string(Filename);
2588        _ ->                   io_lib:write_string_as_latin1(Filename)
2589    end.
2590
2591format_assoc(left) ->
2592    "Left";
2593format_assoc(right) ->
2594    "Right";
2595format_assoc(unary) ->
2596    "Unary";
2597format_assoc(nonassoc) ->
2598    "Nonassoc".
2599
2600format_symbol(Symbol) ->
2601    String = concat([Symbol]),
2602    case erl_scan:string(String) of
2603        {ok, [{atom, _, _}], _} ->
2604            io_lib:fwrite(<<"~tw">>, [Symbol]);
2605        {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' ->
2606            case erl_scan:reserved_word(Word) of
2607                true ->
2608                    String;
2609                false ->
2610                    io_lib:fwrite(<<"~tw">>, [Symbol])
2611            end;
2612        {ok, [{var, _, _}], _} ->
2613            String;
2614        _ ->
2615            io_lib:fwrite(<<"~tw">>, [Symbol])
2616    end.
2617
2618inverse(L) ->
2619    sort([{A,B} || {B,A} <- L]).
2620
2621family(L) ->
2622    sofs:to_external(sofs:relation_to_family(sofs:relation(L))).
2623
2624seq1(To) when To < 1 ->
2625    [];
2626seq1(To) ->
2627    lists:seq(1, To).
2628
2629count(From, L) ->
2630    lists:zip(L, lists:seq(From, length(L)-1+From)).
2631
2632family_with_domain(L, DL) ->
2633    sofs:to_external(sofs_family_with_domain(sofs:relation(L), sofs:set(DL))).
2634
2635sofs_family_with_domain(R0, D) ->
2636    R = sofs:restriction(R0, D),
2637    F = sofs:relation_to_family(R),
2638    FD = sofs:constant_function(D, sofs:from_term([])),
2639    sofs:family_union(F, FD).
2640