1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 1996-2020. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20%% Yacc like LALR-1 parser generator for Erlang.
21%% Ref: Aho & Johnson: "LR Parsing", ACM Computing Surveys, vol. 6:2, 1974.
22%% Auxiliary files: yeccgramm.yrl, yeccparser.erl, yeccpre.hrl, yeccscan.erl.
23%%
24
25-module(yecc).
26
27-export([compile/3, file/1, file/2, format_error/1]).
28
29%% Kept for compatibility with R10B.
30-export([yecc/2, yecc/3, yecc/4]).
31
32-import(lists, [append/1, append/2, concat/1, delete/2, filter/2,
33                flatmap/2, foldl/3, foldr/3, foreach/2, keydelete/3,
34                keysort/2, last/1, map/2, member/2, reverse/1,
35                sort/1, usort/1]).
36
37-include("erl_compile.hrl").
38-include("ms_transform.hrl").
39
40-record(yecc, {
41          infile,
42          outfile,
43          includefile,
44          includefile_version,
45          module,
46          encoding = none,
47          options = [],
48          verbose = false,
49          file_attrs = true,
50          errors = [],
51          warnings = [],
52          conflicts_done = false,
53          shift_reduce = [],
54          reduce_reduce = [],
55          n_states = 0,
56          inport,
57          outport,
58          line,
59
60          parse_actions,
61          symbol_tab,
62          inv_symbol_tab,
63          state_tab,
64          prec_tab,
65          goto_tab,
66
67          terminals = [],
68          nonterminals = [],
69          all_symbols = [],
70          prec = [],
71          rules_list = [],
72          rules, % a tuple of rules_list
73          rule_pointer2rule,
74          rootsymbol = [],
75          endsymbol = [],
76          expect_shift_reduce = [],
77          expect_n_states = [],
78          header = [],
79          erlang_code = none
80         }).
81
82-record(rule, {
83          n,             % rule n in the grammar file
84          location,
85          symbols,       % the names of symbols
86          tokens,
87          is_guard,      % the action is a guard (not used)
88          is_well_formed % can be parsed (without macro expansion)
89         }).
90
91-record(reduce, {
92          rule_nmbr,
93          head,
94          nmbr_of_daughters,
95          prec,
96          unused % assure that #reduce{} comes before #shift{} when sorting
97         }).
98
99-record(shift, {
100          state,
101          pos,
102          prec,
103          rule_nmbr
104         }).
105
106-record(user_code, {state, terminal, funname, action}).
107
108-record(symbol, {anno = none, name}).
109
110%% ACCEPT is neither an atom nor a non-terminal.
111-define(ACCEPT, {}).
112
113%% During the phase 'compute_states' terminals in lookahead sets are
114%% coded as integers; sets of terminals are integer bit masks. This is
115%% for efficiency only. '$empty' is always given the mask 1. The
116%% behaviour can be turned off by un-defining SYMBOLS_AS_CODES (useful
117%% when debugging).
118
119%% Non-terminals are also given integer codes, starting with -1. The
120%% absolut value of the code is used for indexing a tuple of lists of
121%% rules.
122
123-define(SYMBOLS_AS_CODES, true).
124
125-ifdef(SYMBOLS_AS_CODES).
126-define(EMPTY, 0).
127-else.
128-define(EMPTY, '$empty').
129-endif.
130
131%%%
132%%% Exported functions
133%%%
134
135%%% Interface to erl_compile.
136
137compile(Input0, Output0,
138        #options{warning = WarnLevel, verbose=Verbose, includes=Includes,
139		 specific=Specific}) ->
140    Input = shorten_filename(Input0),
141    Output = shorten_filename(Output0),
142    Includefile = lists:sublist(Includes, 1),
143    Werror = proplists:get_bool(warnings_as_errors, Specific),
144    Opts = [{parserfile,Output}, {includefile,Includefile}, {verbose,Verbose},
145            {report_errors, true}, {report_warnings, WarnLevel > 0},
146	    {warnings_as_errors, Werror}],
147    case file(Input, Opts) of
148        {ok, _OutFile} ->
149            ok;
150        error ->
151            error
152    end.
153
154-spec format_error(ErrorDescriptor) -> io_lib:chars() when
155      ErrorDescriptor :: term().
156
157format_error(bad_declaration) ->
158    io_lib:fwrite("unknown or bad declaration, ignored", []);
159format_error({bad_expect, SymName}) ->
160    io_lib:fwrite("argument ~ts of Expect is not an integer",
161                  [format_symbol(SymName)]);
162format_error({bad_rootsymbol, SymName}) ->
163    io_lib:fwrite("rootsymbol ~ts is not a nonterminal",
164                  [format_symbol(SymName)]);
165format_error({bad_states, SymName}) ->
166    io_lib:fwrite("argument ~ts of States is not an integer",
167                  [format_symbol(SymName)]);
168format_error({conflict, Conflict}) ->
169    format_conflict(Conflict);
170format_error({conflicts, SR, RR}) ->
171    io_lib:fwrite("conflicts: ~w shift/reduce, ~w reduce/reduce", [SR, RR]);
172format_error({duplicate_declaration, Tag}) ->
173    io_lib:fwrite("duplicate declaration of ~s", [atom_to_list(Tag)]);
174format_error({duplicate_nonterminal, Nonterminal}) ->
175    io_lib:fwrite("duplicate non-terminals ~ts",
176                  [format_symbol(Nonterminal)]);
177format_error({duplicate_precedence, Op}) ->
178    io_lib:fwrite("duplicate precedence operator ~ts",
179                  [format_symbol(Op)]);
180format_error({duplicate_terminal, Terminal}) ->
181    io_lib:fwrite("duplicate terminal ~ts",
182                  [format_symbol(Terminal)]);
183format_error({endsymbol_is_nonterminal, Symbol}) ->
184    io_lib:fwrite("endsymbol ~ts is a nonterminal",
185                  [format_symbol(Symbol)]);
186format_error({endsymbol_is_terminal, Symbol}) ->
187    io_lib:fwrite("endsymbol ~ts is a terminal",
188                  [format_symbol(Symbol)]);
189format_error({error, Module, Error}) ->
190    Module:format_error(Error);
191format_error({file_error, Reason}) ->
192    io_lib:fwrite("~ts",[file:format_error(Reason)]);
193format_error(illegal_empty) ->
194    io_lib:fwrite("illegal use of empty symbol", []);
195format_error({internal_error, Error}) ->
196    io_lib:fwrite("internal yecc error: ~w", [Error]);
197format_error({missing_syntax_rule, Nonterminal}) ->
198    io_lib:fwrite("no syntax rule for non-terminal symbol ~ts",
199                  [format_symbol(Nonterminal)]);
200format_error({n_states, Exp, N}) ->
201    io_lib:fwrite("expected ~w states, but got ~p states", [Exp, N]);
202format_error(no_grammar_rules) ->
203    io_lib:fwrite("grammar rules are missing", []);
204format_error(nonterminals_missing) ->
205    io_lib:fwrite("Nonterminals is missing", []);
206format_error({precedence_op_is_endsymbol, SymName}) ->
207    io_lib:fwrite("precedence operator ~ts is endsymbol",
208                  [format_symbol(SymName)]);
209format_error({precedence_op_is_unknown, SymName}) ->
210    io_lib:fwrite("unknown precedence operator ~ts",
211                  [format_symbol(SymName)]);
212format_error({reserved, N}) ->
213    io_lib:fwrite("the use of ~w should be avoided", [N]);
214format_error({symbol_terminal_and_nonterminal, SymName}) ->
215    io_lib:fwrite("symbol ~ts is both a terminal and nonterminal",
216                  [format_symbol(SymName)]);
217format_error(rootsymbol_missing) ->
218    io_lib:fwrite("Rootsymbol is missing", []);
219format_error(terminals_missing) ->
220    io_lib:fwrite("Terminals is missing", []);
221format_error({undefined_nonterminal, Symbol}) ->
222    io_lib:fwrite("undefined nonterminal: ~ts", [format_symbol(Symbol)]);
223format_error({undefined_pseudo_variable, Atom}) ->
224    io_lib:fwrite("undefined pseudo variable ~w", [Atom]);
225format_error({undefined_symbol, SymName}) ->
226    io_lib:fwrite("undefined rhs symbol ~ts", [format_symbol(SymName)]);
227format_error({unused_nonterminal, Nonterminal}) ->
228    io_lib:fwrite("non-terminal symbol ~ts not used",
229                  [format_symbol(Nonterminal)]);
230format_error({unused_terminal, Terminal}) ->
231    io_lib:fwrite("terminal symbol ~ts not used",
232                  [format_symbol(Terminal)]);
233format_error({bad_symbol, String}) ->
234    io_lib:fwrite("bad symbol ~ts", [String]);
235format_error(cannot_parse) ->
236    io_lib:fwrite("cannot parse; possibly encoding mismatch", []).
237
238-type error_info() :: {erl_anno:location() | 'none',
239                       module(), ErrorDescriptor :: term()}.
240-type errors() :: [{file:filename(), [error_info()]}].
241-type warnings() :: [{file:filename(), [error_info()]}].
242-type ok_ret() :: {'ok', Parserfile :: file:filename()}
243                | {'ok', Parserfile :: file:filename(), warnings()}.
244-type error_ret() :: 'error'
245                  | {'error', Errors :: errors(), Warnings :: warnings()}.
246-type yecc_ret() :: ok_ret() | error_ret().
247
248-spec file(FileName) -> yecc_ret() when
249      FileName :: file:filename().
250
251file(GrammarFile) ->
252    file(GrammarFile, [report_errors, report_warnings]).
253
254-spec file(Grammarfile, Options) -> yecc_ret() when
255      Grammarfile :: file:filename(),
256      Options :: Option | [Option],
257      Option :: {'error_location', 'column' | 'line'}
258              | {'includefile', Includefile :: file:filename()}
259              | {'report_errors', boolean()}
260              | {'report_warnings', boolean()}
261              | {'report', boolean()}
262              | {'return_errors', boolean()}
263              | {'return_warnings', boolean()}
264              | {'return', boolean()}
265              | {'parserfile', Parserfile :: file:filename()}
266              | {'verbose', boolean()}
267              | {'warnings_as_errors', boolean()}
268              | 'report_errors' | 'report_warnings' | 'report'
269              | 'return_errors' | 'return_warnings' | 'return'
270              | 'verbose' | 'warnings_as_errors'.
271
272file(File, Options0) when is_list(Options0) ->
273    case is_filename(File) of
274        no -> erlang:error(badarg, [File, Options0]);
275        _ -> ok
276    end,
277    EnvOpts0 = env_default_opts(),
278    EnvOpts = select_recognized_opts(EnvOpts0),
279    Options = Options0 ++ EnvOpts,
280    case options(Options) of
281        badarg ->
282            erlang:error(badarg, [File, Options]);
283        OptionValues ->
284            Self = self(),
285            Flag = process_flag(trap_exit, false),
286            Pid = spawn_link(fun() -> infile(Self, File, OptionValues) end),
287            receive
288                {Pid, Rep} ->
289                    receive after 1 -> ok end,
290                    process_flag(trap_exit, Flag),
291                    Rep
292            end
293    end;
294file(File, Option) ->
295    file(File, [Option]).
296
297%% Kept for backward compatibility.
298yecc(Infile, Outfile) ->
299    yecc(Infile, Outfile, false, []).
300
301yecc(Infile, Outfile, Verbose) ->
302    yecc(Infile, Outfile, Verbose, []).
303
304yecc(Infilex, Outfilex, Verbose, Includefilex) ->
305    _ = statistics(runtime),
306    case file(Infilex, [{parserfile, Outfilex},
307                        {verbose, Verbose},
308                        {report, true},
309                        {includefile, Includefilex}]) of
310        {ok, _File} ->
311            statistics(runtime);
312        error ->
313            exit(error)
314    end.
315
316%%%
317%%% Local functions
318%%%
319
320%% Copied from compile.erl.
321env_default_opts() ->
322    Key = "ERL_COMPILER_OPTIONS",
323    case os:getenv(Key) of
324	false -> [];
325	Str when is_list(Str) ->
326	    case erl_scan:string(Str) of
327		{ok,Tokens,_} ->
328                    Dot = {dot, erl_anno:new(1)},
329		    case erl_parse:parse_term(Tokens ++ [Dot]) of
330			{ok,List} when is_list(List) -> List;
331			{ok,Term} -> [Term];
332			{error,_Reason} ->
333			    io:format("Ignoring bad term in ~s\n", [Key]),
334			    []
335		    end;
336		{error, {_,_,_Reason}, _} ->
337		    io:format("Ignoring bad term in ~s\n", [Key]),
338		    []
339	    end
340    end.
341
342select_recognized_opts(Options0) ->
343    Options = preprocess_options(Options0),
344    AllOptions = all_options(),
345    [Option ||
346        {Name, _} = Option <- Options,
347        lists:member(Name, AllOptions)].
348
349options(Options0) ->
350    Options1 = preprocess_options(Options0),
351    AllOptions = all_options(),
352    case check_options(Options1, AllOptions, []) of
353        badarg ->
354            badarg;
355        OptionValues  ->
356            AllOptionValues =
357                [case lists:keyfind(Option, 1, OptionValues) of
358                     false ->
359                         {Option, default_option(Option)};
360                     OptionValue ->
361                         OptionValue
362                 end || Option <- AllOptions],
363            foldr(fun({_, false}, L) -> L;
364                     ({Option, true}, L) -> [Option | L];
365                     (OptionValue, L) -> [OptionValue | L]
366                  end, [], AllOptionValues)
367    end.
368
369preprocess_options(Options) ->
370    foldr(fun preproc_opt/2, [], Options).
371
372preproc_opt(return, Os) ->
373    [{return_errors, true}, {return_warnings, true} | Os];
374preproc_opt(report, Os) ->
375    [{report_errors, true}, {report_warnings, true} | Os];
376preproc_opt({return, T}, Os) ->
377    [{return_errors, T}, {return_warnings, T} | Os];
378preproc_opt({report, T}, Os) ->
379    [{report_errors, T}, {report_warnings, T} | Os];
380preproc_opt(Option, Os) ->
381    [try atom_option(Option) catch error:_ -> Option end | Os].
382
383check_options([{Option, FileName0} | Options], AllOptions, L)
384          when Option =:= includefile; Option =:= parserfile ->
385    case is_filename(FileName0) of
386        no ->
387            badarg;
388        Filename ->
389            check_options(Options, AllOptions, [{Option, Filename} | L])
390    end;
391check_options([{error_location, ELoc}=OptionValue | Options], AllOptions, L)
392          when ELoc =:= column; ELoc =:= line ->
393    check_options(Options, AllOptions, [OptionValue | L]);
394check_options([{Option, Boolean}=OptionValue | Options], AllOptions, L)
395          when is_boolean(Boolean) ->
396    case lists:member(Option, AllOptions) of
397        true ->
398            check_options(Options, AllOptions, [OptionValue | L]);
399        false ->
400            badarg
401        end;
402check_options([], _AllOptions, L) ->
403    L;
404check_options(_Options, _, _L) ->
405    badarg.
406
407all_options() ->
408    [error_location, file_attributes, includefile, parserfile,
409     report_errors, report_warnings, return_errors, return_warnings,
410     time, verbose, warnings_as_errors].
411
412default_option(error_location) -> column;
413default_option(file_attributes) -> true;
414default_option(includefile) -> [];
415default_option(parserfile) -> [];
416default_option(report_errors) -> true;
417default_option(report_warnings) -> true;
418default_option(return_errors) -> false;
419default_option(return_warnings) -> false;
420default_option(time) -> false;
421default_option(verbose) -> false;
422default_option(warnings_as_errors) -> false.
423
424atom_option(file_attributes) -> {file_attributes, true};
425atom_option(report_errors) -> {report_errors, true};
426atom_option(report_warnings) -> {report_warnings, true};
427atom_option(return_errors) -> {return_errors, true};
428atom_option(return_warnings) -> {return_warnings, true};
429atom_option(time) -> {time, true};
430atom_option(verbose) -> {verbose, true};
431atom_option(warnings_as_errors) -> {warnings_as_errors, true};
432atom_option(Key) -> Key.
433
434is_filename(T) ->
435    try filename:flatten(T)
436    catch error: _ -> no
437    end.
438
439shorten_filename(Name0) ->
440    {ok,Cwd} = file:get_cwd(),
441    case string:prefix(Name0, Cwd) of
442        nomatch -> Name0;
443        Rest ->
444            case unicode:characters_to_list(Rest) of
445                "/"++N -> N;
446                N -> N
447            end
448    end.
449
450start(Infilex, Options) ->
451    Infile = assure_extension(Infilex, ".yrl"),
452    {_, Outfilex0} = lists:keyfind(parserfile, 1, Options),
453    {_, Includefilex} = lists:keyfind(includefile, 1, Options),
454    Outfilex = case Outfilex0 of
455                   [] -> filename:rootname(Infilex, ".yrl");
456                   _ -> Outfilex0
457               end,
458    Includefile = case Includefilex of
459                      [] -> [];
460                      _ -> assure_extension(Includefilex,".hrl")
461                  end,
462    IncludefileVersion = includefile_version(Includefile),
463    Outfile = assure_extension(Outfilex, ".erl"),
464    Module = list_to_atom(filename:basename(Outfile, ".erl")),
465    #yecc{infile = Infile,
466          outfile = Outfile,
467          includefile = Includefile,
468          includefile_version = IncludefileVersion,
469          module = Module,
470          options = Options,
471          verbose = member(verbose, Options),
472          file_attrs = member(file_attributes, Options)}.
473
474assure_extension(File, Ext) ->
475    concat([strip_extension(File, Ext), Ext]).
476
477%% Assumes File is a filename.
478strip_extension(File, Ext) ->
479    case filename:extension(File) of
480        Ext -> filename:rootname(File);
481        _Other -> File
482    end.
483
484infile(Parent, Infilex, Options) ->
485    St0 = start(Infilex, Options),
486    St = case file:open(St0#yecc.infile, [read, read_ahead]) of
487             {ok, Inport} ->
488                 try
489                     Encoding = epp:set_encoding(Inport),
490                     St1 = St0#yecc{inport = Inport, encoding = Encoding},
491                     outfile(St1)
492                 after
493                     ok = file:close(Inport)
494                 end;
495             {error, Reason} ->
496                 add_error(St0#yecc.infile, none, {file_error, Reason}, St0)
497         end,
498    case {St#yecc.errors, werror(St)} of
499        {[], false} -> ok;
500        _ -> _ = file:delete(St#yecc.outfile), ok
501    end,
502    Parent ! {self(), yecc_ret(St)}.
503
504werror(St) ->
505    St#yecc.warnings =/= []
506	andalso member(warnings_as_errors, St#yecc.options).
507
508outfile(St0) ->
509    case file:open(St0#yecc.outfile, [write, delayed_write]) of
510        {ok, Outport} ->
511            try
512                %% Set the same encoding as infile:
513                set_encoding(St0, Outport),
514                generate(St0#yecc{outport = Outport, line = 1})
515            catch
516                throw: St1  ->
517                    St1;
518                exit: Reason ->
519                    add_error({internal_error, Reason}, St0)
520            after
521               ok = file:close(Outport)
522            end;
523        {error, Reason} ->
524            add_error(St0#yecc.outfile, none, {file_error, Reason}, St0)
525    end.
526
527os_process_size() ->
528    case os:type() of
529        {unix, sunos} ->
530            Size = os:cmd("ps -o vsz -p " ++ os:getpid() ++ " | tail -1"),
531            list_to_integer(nonl(Size));
532        _ ->
533            0
534    end.
535
536nonl([$\n]) -> [];
537nonl([]) -> [];
538nonl([H|T]) -> [H|nonl(T)].
539
540timeit(Name, Fun, St0) ->
541    Time = runtime,
542    %% Time = wall_clock,
543    {Before, _} = statistics(Time),
544    St = Fun(St0),
545    {After, _} = statistics(Time),
546    Mem0 = erts_debug:flat_size(St)*erlang:system_info(wordsize),
547    Mem = lists:flatten(io_lib:format("~.1f kB", [Mem0/1024])),
548    Sz = lists:flatten(io_lib:format("~.1f MB", [os_process_size()/1024])),
549    io:fwrite(" ~-30w: ~10.2f s ~12s ~10s\n",
550              [Name, (After-Before)/1000, Mem, Sz]),
551    St.
552
553-define(PASS(P), {P, fun P/1}).
554
555generate(St0) ->
556    St1 = output_encoding_comment(St0),
557    Passes = [?PASS(parse_grammar), ?PASS(check_grammar),
558              ?PASS(states_and_goto_table), ?PASS(parse_actions),
559              ?PASS(action_conflicts), ?PASS(write_file)],
560    F = case member(time, St1#yecc.options) of
561            true ->
562                io:fwrite(<<"Generating parser from grammar in ~ts\n">>,
563                          [format_filename(St1#yecc.infile, St1)]),
564                fun timeit/3;
565            false ->
566                fun(_Name, Fn, St) -> Fn(St) end
567        end,
568    Fun = fun({Name, Fun}, St) ->
569                  St2 = F(Name, Fun, St),
570                  if
571                      St2#yecc.errors =:= [] -> St2;
572                      true -> throw(St2)
573                  end
574          end,
575    foldl(Fun, St1, Passes).
576
577parse_grammar(St) ->
578    StartLocation =
579        case lists:keyfind(error_location, 1, St#yecc.options) of
580            {error_location, column} ->
581                {1, 1};
582            _ ->
583                1
584        end,
585    parse_grammar(St#yecc.inport, StartLocation, St).
586
587parse_grammar(Inport, Location, St) ->
588    {NextLocation, Grammar} = read_grammar(Inport, St, Location),
589    parse_grammar(Grammar, Inport, NextLocation, St).
590
591parse_grammar(eof, _Inport, _NextLocation, St) ->
592    St;
593parse_grammar({#symbol{name = 'Header'}, Ss}, Inport, NextLocation, St0) ->
594    St1 = St0#yecc{header = [S || {string,_,S} <- Ss]},
595    parse_grammar(Inport, NextLocation, St1);
596parse_grammar({#symbol{name = 'Erlang'}, [#symbol{name = code}]}, _Inport,
597              NextLocation, St) ->
598    St#yecc{erlang_code = line_of_location(NextLocation)};
599parse_grammar(Grammar, Inport, NextLocation, St0) ->
600    St = parse_grammar(Grammar, St0),
601    parse_grammar(Inport, NextLocation, St).
602
603parse_grammar({error,ErrorLocation,Error}, St) ->
604    add_error(erl_anno:new(ErrorLocation), Error, St);
605parse_grammar({rule, Rule, Tokens, AllTokens}, St0) ->
606    NmbrOfDaughters = case Rule of
607                          [_, #symbol{name = '$empty'}]  -> 0;
608                          _ -> length(Rule) - 1
609                      end,
610    {IsGuard, IsWellFormed} = check_action(Tokens),
611    {Tokens1, St} = subst_pseudo_vars(AllTokens,
612                                      NmbrOfDaughters,
613                                      St0),
614    RuleDef = #rule{symbols = Rule,
615                    tokens = Tokens1,
616                    is_guard = IsGuard,
617                    is_well_formed = IsWellFormed},
618    St#yecc{rules_list = [RuleDef | St#yecc.rules_list]};
619parse_grammar({prec, Prec}, St) ->
620    St#yecc{prec = Prec ++ St#yecc.prec};
621parse_grammar({#symbol{}, [{string,Anno,String}]}, St) ->
622    add_error(Anno, {bad_symbol, String}, St);
623parse_grammar({#symbol{anno = Anno, name = Name}, Symbols}, St) ->
624    CF = fun(I) ->
625                 case element(I, St) of
626                     [] ->
627                         setelement(I, St, Symbols);
628                     _ ->
629                         add_error(Anno, {duplicate_declaration, Name}, St)
630                 end
631         end,
632    OneSymbol = length(Symbols) =:= 1,
633    case Name of
634        'Nonterminals' -> CF(#yecc.nonterminals);
635        'Terminals' -> CF(#yecc.terminals);
636        'Rootsymbol' when OneSymbol -> CF(#yecc.rootsymbol);
637        'Endsymbol' when OneSymbol ->  CF(#yecc.endsymbol);
638        'Expect' when OneSymbol -> CF(#yecc.expect_shift_reduce);
639        'States' when OneSymbol -> CF(#yecc.expect_n_states); % undocumented
640        _ -> add_warning(Anno, bad_declaration, St)
641    end.
642
643read_grammar(Inport, St, Location) ->
644    case yeccscan:scan(Inport, '', Location) of
645        {eof, NextLocation} ->
646            {NextLocation, eof};
647        {error, {ErrorLocation, Mod, What}, NextLocation} ->
648            {NextLocation, {error, ErrorLocation, {error, Mod, What}}};
649        {error, terminated} ->
650            throw(St);
651        {error, _} ->
652            File = St#yecc.infile,
653            throw(add_error(File, none, cannot_parse, St));
654        {ok, AllInput, Input, NextLocation} ->
655            {NextLocation,
656             case yeccparser:parse(Input) of
657                 {error, {ErrorLocation, Mod, Message}} ->
658                     {error, ErrorLocation, {error, Mod, Message}};
659                 {ok, {rule, Rule, {erlang_code, Tokens}}} ->
660                     AllTokens0 =
661                         lists:dropwhile(fun(T) -> element(1, T) =/= ':' end,
662                                         AllInput),
663                     AllTokens = case AllTokens0 of
664                                     [] ->
665                                         Tokens;
666                                     _ -> % Remove ':' and dot.
667                                         tl(AllTokens0) -- [last(AllTokens0)]
668                                 end,
669                     {rule, Rule, Tokens, AllTokens};
670                 {ok, {#symbol{name=P},
671                       [#symbol{name=I} | OpL]}=Ss} ->
672                     A = precedence(P),
673                     if
674                         A =/= unknown,
675                         is_integer(I),
676                         OpL =/= [] ->
677                             Ps = [{Op, I , A} || Op <- OpL],
678                             {prec, Ps};
679                         true ->
680                             Ss
681                     end;
682                 {ok, Ss} ->
683                     Ss
684             end}
685    end.
686
687precedence('Left') -> left;
688precedence('Right') -> right;
689precedence('Unary') -> unary;
690precedence('Nonassoc') -> nonassoc;
691precedence(_) -> unknown.
692
693%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
694
695check_grammar(St0) ->
696    Empty = #symbol{anno = none, name = '$empty'},
697    AllSymbols = St0#yecc.nonterminals ++ St0#yecc.terminals ++ [Empty],
698    St1 = St0#yecc{all_symbols = AllSymbols},
699    Cs = [fun check_nonterminals/1, fun check_terminals/1,
700          fun check_rootsymbol/1, fun check_endsymbol/1,
701          fun check_expect/1, fun check_states/1,
702          fun check_precedences/1, fun check_rules/1],
703    foldl(fun(F, St) -> F(St) end, St1, Cs).
704
705check_nonterminals(St) ->
706    case St#yecc.nonterminals of
707        [] ->
708            add_error(nonterminals_missing, St);
709        Nonterminals ->
710            {Unique, Dups} = duplicates(names(Nonterminals)),
711            St1 = add_warnings(Dups, duplicate_nonterminal, St),
712            St2 = check_reserved(Unique, St1),
713            St2#yecc{nonterminals = [?ACCEPT | Unique]}
714    end.
715
716check_terminals(St0) ->
717    case St0#yecc.terminals of
718        [] ->
719            add_error(terminals_missing, St0);
720        Terminals ->
721            {Unique, Dups} = duplicates(names(Terminals)),
722            St1 = add_warnings(Dups, duplicate_terminal, St0),
723            Common = intersect(St1#yecc.nonterminals, Unique),
724            St2 = add_errors(Common, symbol_terminal_and_nonterminal, St1),
725            St3 = check_reserved(Unique, St2),
726            St3#yecc{terminals = ['$empty' | Unique]}
727    end.
728
729check_reserved(Names, St) ->
730    add_errors(intersect(Names, ['$empty', '$end', '$undefined']),
731               reserved, St).
732
733check_rootsymbol(St) ->
734    case St#yecc.rootsymbol of
735        [] ->
736            add_error(rootsymbol_missing, St);
737        [#symbol{anno = Anno, name = SymName}] ->
738            case kind_of_symbol(St, SymName) of
739                nonterminal ->
740                    St#yecc{rootsymbol = SymName};
741                _ ->
742                    add_error(Anno, {bad_rootsymbol, SymName}, St)
743            end
744    end.
745
746check_endsymbol(St) ->
747    case St#yecc.endsymbol of
748        [] ->
749            St#yecc{endsymbol = '$end'};
750        [#symbol{anno = Anno, name = SymName}] ->
751            case kind_of_symbol(St, SymName) of
752                nonterminal ->
753                    add_error(Anno, {endsymbol_is_nonterminal, SymName}, St);
754                terminal ->
755                    add_error(Anno, {endsymbol_is_terminal, SymName}, St);
756                _ ->
757                    St#yecc{endsymbol = SymName}
758            end
759    end.
760
761check_expect(St0) ->
762    case St0#yecc.expect_shift_reduce of
763        [] ->
764            St0#yecc{expect_shift_reduce = 0};
765        [#symbol{name = Expect}] when is_integer(Expect) ->
766            St0#yecc{expect_shift_reduce = Expect};
767        [#symbol{anno = Anno, name = Name}] ->
768            St1 = add_error(Anno, {bad_expect, Name}, St0),
769            St1#yecc{expect_shift_reduce = 0}
770    end.
771
772check_states(St) ->
773    case St#yecc.expect_n_states of
774        [] ->
775            St;
776        [#symbol{name = NStates}] when is_integer(NStates) ->
777            St#yecc{expect_n_states = NStates};
778        [#symbol{anno = Anno, name = Name}] ->
779            add_error(Anno, {bad_states, Name}, St)
780    end.
781
782check_precedences(St0) ->
783    {St1, _} =
784        foldr(fun({#symbol{anno = Anno, name = Op},_I,_A}, {St,Ps}) ->
785                      case member(Op, Ps) of
786                          true ->
787                              {add_error(Anno, {duplicate_precedence,Op}, St),
788                               Ps};
789                          false ->
790                              {St, [Op | Ps]}
791                      end
792              end, {St0,[]}, St0#yecc.prec),
793    foldl(fun({#symbol{anno = Anno, name = Op},I,A}, St) ->
794                  case kind_of_symbol(St, Op) of
795                      endsymbol ->
796                          add_error(Anno,{precedence_op_is_endsymbol,Op}, St);
797                      unknown ->
798                          add_error(Anno, {precedence_op_is_unknown, Op}, St);
799                      _ ->
800                          St#yecc{prec = [{Op,I,A} | St#yecc.prec]}
801                  end
802          end, St1#yecc{prec = []}, St1#yecc.prec).
803
804check_rule(Rule0, {St0,Rules}) ->
805    Symbols = Rule0#rule.symbols,
806    #symbol{anno = HeadAnno, name = Head} = hd(Symbols),
807    case member(Head, St0#yecc.nonterminals) of
808        false ->
809            {add_error(HeadAnno, {undefined_nonterminal, Head}, St0), Rules};
810        true ->
811            St = check_rhs(tl(Symbols), St0),
812            Rule = Rule0#rule{location = location(HeadAnno),
813                              symbols = names(Symbols)},
814            {St, [Rule | Rules]}
815    end.
816
817check_rules(St0) ->
818    {St,Rules0} = foldl(fun check_rule/2, {St0,[]}, St0#yecc.rules_list),
819    case St#yecc.rules_list of
820        [] ->
821            add_error(no_grammar_rules, St);
822        _ ->
823            Rule = #rule{location = none,
824                         symbols = [?ACCEPT, St#yecc.rootsymbol],
825                         tokens = []},
826            Rules1 = [Rule | Rules0],
827            Rules = map(fun({R,I}) -> R#rule{n = I} end,  count(0, Rules1)),
828            St#yecc{rules_list = Rules, rules = list_to_tuple(Rules)}
829    end.
830
831duplicates(List) ->
832    Unique = usort(List),
833    {Unique, List -- Unique}.
834
835names(Symbols) ->
836    map(fun(Symbol) -> Symbol#symbol.name end, Symbols).
837
838symbol_anno(Name, St) ->
839    #symbol{anno = Anno} = symbol_find(Name, St#yecc.all_symbols),
840    Anno.
841
842symbol_member(Symbol, Symbols) ->
843    symbol_find(Symbol#symbol.name, Symbols) =/= false.
844
845symbol_find(Name, Symbols) ->
846    lists:keyfind(Name, #symbol.name, Symbols).
847
848states_and_goto_table(St0) ->
849    St1 = create_symbol_table(St0),
850    St = compute_states(St1),
851    create_precedence_table(St).
852
853parse_actions(St) ->
854    _ = erase(), % the pd is used when decoding lookahead sets
855    ParseActions = compute_parse_actions(St#yecc.n_states, St, []),
856    _ = erase(),
857    St#yecc{parse_actions = ParseActions, state_tab = []}.
858
859action_conflicts(St0) ->
860    St = find_action_conflicts(St0),
861    St#yecc{conflicts_done = true}.
862
863-record(state_info, {reduce_only, state_repr, comment}).
864
865write_file(St0) ->
866    #yecc{parse_actions = ParseActions, goto_tab = GotoTab} = St0,
867    Sorted = sort_parse_actions(ParseActions),
868    StateReprs = find_identical_shift_states(Sorted),
869    StateInfo = collect_some_state_info(Sorted, StateReprs),
870    StateJumps = find_partial_shift_states(Sorted, StateReprs),
871    UserCodeActions = find_user_code(Sorted, St0),
872    #yecc{infile = Infile, outfile = Outfile,
873          inport = Inport, outport = Outport,
874          nonterminals = Nonterminals} = St0,
875    {St10, N_lines, LastErlangCodeLine} =
876        output_prelude(Outport, Inport, St0),
877    St20 = St10#yecc{line = St10#yecc.line + N_lines},
878    St25 = nl(St20),
879    St30 = output_file_directive(St25, Outfile, St25#yecc.line),
880    St40 = nl(St30),
881    St50 = output_actions(St40, StateJumps, StateInfo),
882    Go0 = [{Symbol,{From,To}} || {{From,Symbol},To} <- ets:tab2list(GotoTab)],
883    Go = family_with_domain(Go0, Nonterminals),
884    St60 = output_goto(St50, Go, StateInfo),
885    St70 = output_inlined(St60, UserCodeActions, Infile),
886    St = nl(St70),
887    case LastErlangCodeLine of
888        %% Just in case warnings or errors are emitted after the last
889        %% line of the file.
890        {last_erlang_code_line, Last_line} ->
891            output_file_directive(St, Infile, Last_line);
892        no_erlang_code ->
893            St
894    end.
895
896yecc_ret(St0) ->
897    St = check_expected(St0),
898    report_errors(St),
899    report_warnings(St),
900    Es = pack_errors(St#yecc.errors),
901    Ws = pack_warnings(St#yecc.warnings),
902    Werror = werror(St),
903    if
904        Werror ->
905            do_error_return(St, Es, Ws);
906        Es =:= [] ->
907            case member(return_warnings, St#yecc.options) of
908                true -> {ok, St#yecc.outfile, Ws};
909                false -> {ok, St#yecc.outfile}
910            end;
911        true ->
912            do_error_return(St, Es, Ws)
913    end.
914
915do_error_return(St, Es, Ws) ->
916    case member(return_errors, St#yecc.options) of
917        true -> {error, Es, Ws};
918        false -> error
919    end.
920
921check_expected(St0) ->
922    #yecc{shift_reduce = SR, reduce_reduce = RR, expect_shift_reduce = ExpSR,
923          n_states = NStates0, expect_n_states = ExpStates,
924          conflicts_done = Done} = St0,
925    N_RR = length(usort(RR)),
926    N_SR = length(usort(SR)),
927    St1 = if
928              not Done ->
929                  St0;
930              N_SR =:= ExpSR, N_RR =:= 0 ->
931                  St0;
932              true ->
933                  add_warning(none, {conflicts, N_SR, N_RR}, St0)
934          end,
935    NStates = NStates0 + 1,
936    if
937        (not Done) or (ExpStates =:= []) or (NStates =:= ExpStates) ->
938            St1;
939        true ->
940            add_warning(none, {n_states, ExpStates, NStates}, St1)
941    end.
942
943pack_errors([{File,_} | _] = Es) ->
944    [{File, flatmap(fun({_,E}) -> [E] end, sort(Es))}];
945pack_errors([]) ->
946    [].
947
948pack_warnings([{File,_} | _] = Ws) ->
949    [{File, flatmap(fun({_,W}) -> [W] end, sort(Ws))}];
950pack_warnings([]) ->
951    [].
952
953report_errors(St) ->
954    case member(report_errors, St#yecc.options) of
955        true ->
956            foreach(fun({File,{none,Mod,E}}) ->
957                            io:fwrite(<<"~ts: ~ts\n">>,
958                                      [File,Mod:format_error(E)]);
959                       ({File,{Location,Mod,E}}) ->
960                            io:fwrite(<<"~ts:~s: ~ts\n">>,
961                                      [File,pos(Location),Mod:format_error(E)])
962                    end, sort(St#yecc.errors));
963        false ->
964            ok
965    end.
966
967report_warnings(St) ->
968    Werror = member(warnings_as_errors, St#yecc.options),
969    Prefix = case Werror of
970		 true -> "";
971		 false -> "Warning: "
972	     end,
973    ReportWerror = Werror andalso member(report_errors, St#yecc.options),
974    case member(report_warnings, St#yecc.options) orelse ReportWerror of
975        true ->
976            foreach(fun({File,{none,Mod,W}}) ->
977                            io:fwrite(<<"~ts: ~s~ts\n">>,
978                                      [File,Prefix,
979				       Mod:format_error(W)]);
980                       ({File,{Location,Mod,W}}) ->
981                            io:fwrite(<<"~ts:~s: ~s~ts\n">>,
982                                      [File,pos(Location),Prefix,
983				       Mod:format_error(W)])
984                    end, sort(St#yecc.warnings));
985        false ->
986            ok
987    end.
988
989pos(Line) when is_integer(Line) ->
990    io_lib:format("~w", [Line]);
991pos({Line, Column}) when is_integer(Line), is_integer(Column) ->
992    io_lib:format("~w:~w", [Line, Column]).
993
994add_error(E, St) ->
995    add_error(none, E, St).
996
997add_error(Anno, E, St) ->
998    add_error(St#yecc.infile, Anno, E, St).
999
1000add_error(File, Anno, E, St) ->
1001    Loc = location(Anno),
1002    St#yecc{errors = [{File,{Loc,?MODULE,E}}|St#yecc.errors]}.
1003
1004add_errors(SymNames, E0, St0) ->
1005    foldl(fun(SymName, St) ->
1006                  add_error(symbol_anno(SymName, St), {E0, SymName}, St)
1007          end, St0, SymNames).
1008
1009add_warning(Anno, W, St) ->
1010    Loc = location(Anno),
1011    St#yecc{warnings = [{St#yecc.infile,{Loc,?MODULE,W}}|St#yecc.warnings]}.
1012
1013add_warnings(SymNames, W0, St0) ->
1014    foldl(fun(SymName, St) ->
1015                  add_warning(symbol_anno(SymName, St), {W0, SymName}, St)
1016          end, St0, SymNames).
1017
1018check_rhs([#symbol{name = '$empty'}], St) ->
1019    St;
1020check_rhs(Rhs, St0) ->
1021    case symbol_find('$empty', Rhs) of
1022        #symbol{anno = Anno} ->
1023            add_error(Anno, illegal_empty, St0);
1024        false ->
1025            foldl(fun(Sym, St) ->
1026                          case symbol_member(Sym, St#yecc.all_symbols) of
1027                              true ->
1028                                  St;
1029                              false ->
1030                                  E = {undefined_symbol,Sym#symbol.name},
1031                                  add_error(Sym#symbol.anno, E, St)
1032                          end
1033                  end, St0, Rhs)
1034    end.
1035
1036check_action(Tokens) ->
1037    case erl_parse:parse_exprs(add_roberts_dot(Tokens, erl_anno:new(0))) of
1038        {error, _Error} ->
1039            {false, false};
1040        {ok, [Expr | Exprs]} ->
1041            IsGuard = Exprs =:= [] andalso erl_lint:is_guard_test(Expr),
1042            {IsGuard, true}
1043    end.
1044
1045add_roberts_dot([], Anno) ->
1046    [{'dot', Anno}];
1047add_roberts_dot([{'dot', Anno} | _], _) ->
1048    [{'dot', Anno}];
1049add_roberts_dot([Token | Tokens], _) ->
1050    [Token | add_roberts_dot(Tokens, element(2, Token))].
1051
1052-define(PSEUDO_PREFIX, "___").
1053-define(PSEUDO_VAR_1, '___1').
1054
1055subst_pseudo_vars([], _, St) ->
1056    {[], St};
1057subst_pseudo_vars([H0 | T0], NmbrOfDaughters, St0) ->
1058    {H, St1} = subst_pseudo_vars(H0, NmbrOfDaughters, St0),
1059    {T, St} = subst_pseudo_vars(T0, NmbrOfDaughters, St1),
1060    {[H | T], St};
1061subst_pseudo_vars({atom, Anno, Atom}, NmbrOfDaughters, St0) ->
1062    case atom_to_list(Atom) of
1063        [$$ | Rest] ->
1064            try list_to_integer(Rest) of
1065                N when N > 0, N =< NmbrOfDaughters ->
1066                    PseudoVarName = append(?PSEUDO_PREFIX, Rest),
1067                    NewAnno = erl_anno:set_text(PseudoVarName, Anno),
1068                    {{var, NewAnno, list_to_atom(PseudoVarName)}, St0};
1069                _ ->
1070                    St = add_error(Anno,
1071                                   {undefined_pseudo_variable, Atom},
1072                                   St0),
1073                    {{atom, Anno, '$undefined'}, St}
1074            catch
1075                error: _ -> {{atom, Anno, Atom}, St0}
1076            end;
1077        _ ->
1078            {{atom, Anno, Atom}, St0}
1079    end;
1080subst_pseudo_vars(Tuple, NmbrOfDaughters, St0) when is_tuple(Tuple) ->
1081    {L, St} = subst_pseudo_vars(tuple_to_list(Tuple), NmbrOfDaughters, St0),
1082    {list_to_tuple(L), St};
1083subst_pseudo_vars(Something_else, _, St) ->
1084    {Something_else, St}.
1085
1086kind_of_symbol(St, SymName) ->
1087    case member(SymName, St#yecc.nonterminals) of
1088        false ->
1089            case member(SymName, St#yecc.terminals) of
1090                false ->
1091                    case St#yecc.endsymbol of
1092                        SymName ->
1093                            endsymbol;
1094                        _ ->
1095                            unknown
1096                    end;
1097                true ->
1098                    terminal
1099            end;
1100        true ->
1101            nonterminal
1102    end.
1103
1104%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1105% Computing parse states and goto table from grammar.
1106% Start item: {0, [Endsymbol]} <->
1107% (['ACCEPT' '.', Rootsymbol], {'$'}) in Aho & Johnson
1108% where '$end' is the default end of input symbol of the
1109% scanner if no 'Endsymbol' has been declared in the syntax file.
1110
1111-record(tabs, {
1112          symbols,      % ETS-set, keypos 1: {SymbolName, SymbolCode}
1113          inv_symbols,  % ETS-set, keypos 2: {SymbolName, SymbolCode}
1114          state_id,     % ETS-bag, keypos 1: {StateId, StateNum}
1115                        % StateId is not unique for a state.
1116          rp_rhs,       % rule pointer -> the remaining rhs symbols
1117          rp_info,      % rule pointer -> expanding rules and lookahead
1118          goto          % ETS-bag, keypos 1: first
1119                        % {{FromStateNum, Symbol, ToStateNum}}, then
1120                        % {{FromStateNum, Symbol}, ToStateNum}
1121         }).
1122
1123-record(item, { % what states are made of
1124          rule_pointer,
1125          look_ahead,
1126          rhs
1127         }).
1128
1129compute_states(St0) ->
1130    SymbolTab = St0#yecc.symbol_tab,
1131    CodedRules = map(fun(#rule{symbols = Syms} = R) ->
1132                             R#rule{symbols = code_symbols(Syms, SymbolTab)}
1133                     end, St0#yecc.rules_list),
1134    CodedNonterminals = code_symbols(St0#yecc.nonterminals, SymbolTab),
1135    %% Only coded in this phase; StC is thrown away.
1136    StC = St0#yecc{rules_list = CodedRules,
1137                   rules = list_to_tuple(CodedRules),
1138                   nonterminals = CodedNonterminals},
1139    {RuleIndex, RulePointer2Rule} =
1140        make_rule_index(StC, St0#yecc.rules_list),
1141    StateTab0 = {},
1142    StateIdTab = ets:new(yecc_state_id, [set]),
1143    GotoTab = ets:new(yecc_goto, [bag]),
1144    RulePointerRhs = make_rhs_index(StC#yecc.rules_list),
1145    RulePointerInfo = make_rule_pointer_info(StC, RulePointerRhs, RuleIndex),
1146
1147    Tables = #tabs{symbols = SymbolTab,
1148                   state_id = StateIdTab,
1149                   rp_rhs = RulePointerRhs,
1150                   rp_info = RulePointerInfo,
1151                   goto = GotoTab},
1152
1153    _ = erase(),
1154    EndsymCode = code_terminal(StC#yecc.endsymbol, StC#yecc.symbol_tab),
1155    {StateId, State0} = compute_state([{EndsymCode, 1}], Tables),
1156
1157    StateNum0 = first_state(),
1158    FirstState = {StateNum0, State0},
1159    StateTab1 = insert_state(Tables, StateTab0, FirstState, StateId),
1160    {StateTab, N} =
1161        compute_states1([{StateNum0, get_current_symbols(State0)}],
1162                        FirstState, StateTab1, Tables),
1163    true = ets:delete(StateIdTab),
1164    St = St0#yecc{state_tab = StateTab, goto_tab = GotoTab, n_states = N,
1165                  rule_pointer2rule = RulePointer2Rule},
1166    decode_goto(GotoTab, St#yecc.inv_symbol_tab),
1167    check_usage(St).
1168
1169first_state() ->
1170    0.
1171
1172decode_goto(GotoTab, InvSymTab) ->
1173    G = ets:tab2list(GotoTab),
1174    ets:delete_all_objects(GotoTab),
1175    ets:insert(GotoTab,
1176               map(fun({{From, Sym, Next}}) ->
1177                           {{From, decode_symbol(Sym, InvSymTab)}, Next}
1178                   end, G)).
1179
1180check_usage(St0) ->
1181    SelSyms = ets:fun2ms(fun({{_,Sym},_}) -> Sym end),
1182    UsedSymbols = ets:select(St0#yecc.goto_tab, SelSyms),
1183    Syms = ordsets:from_list([?ACCEPT, '$empty' | UsedSymbols]),
1184    NonTerms = ordsets:from_list(St0#yecc.nonterminals),
1185    UnusedNonTerms = ordsets:to_list(ordsets:subtract(NonTerms, Syms)),
1186    St1 = add_warnings(UnusedNonTerms, unused_nonterminal, St0),
1187    Terms = ordsets:from_list(St0#yecc.terminals),
1188    St2 = add_warnings(ordsets:to_list(ordsets:subtract(Terms, Syms)),
1189                       unused_terminal, St1),
1190    DefinedNonTerminals = map(fun(#rule{symbols = [Name | _]}) ->
1191                                            Name
1192                              end, St2#yecc.rules_list),
1193    DefNonTerms = ordsets:from_list(DefinedNonTerminals),
1194    UndefNonTerms = ordsets:subtract(NonTerms, DefNonTerms),
1195    add_errors(ordsets:to_list(ordsets:subtract(UndefNonTerms,
1196                                                UnusedNonTerms)),
1197               missing_syntax_rule, St2).
1198
1199%% States are sometimes big, should not be copied to ETS tables.
1200%% Here an "extendible" tuple is used.
1201lookup_state(StateTab, N) ->
1202    element(N+1, StateTab).
1203
1204insert_state(#tabs{state_id = StateIdTab}, StateTab0, State, StateId) ->
1205    {N, _Items} = State,
1206    insert_state_id(StateIdTab, N, StateId),
1207    StateTab = if
1208                   tuple_size(StateTab0) > N ->
1209                       StateTab0;
1210                   true ->
1211                       list_to_tuple(tuple_to_list(StateTab0) ++
1212                                     lists:duplicate(round(1 + N * 1.5), []))
1213               end,
1214    setelement(N+1, StateTab, State).
1215
1216insert_state_id(StateIdTab, N, StateId) ->
1217    true = ets:insert(StateIdTab, {StateId, N}).
1218
1219compute_states1([], {N, _}=_CurrState, StateTab0, _Tables) ->
1220    {StateTab0, N};
1221compute_states1([{N, Symbols} | Try], CurrState, StateTab, Tables) ->
1222    {_N, S} = lookup_state(StateTab, N),
1223    Seeds = state_seeds(S, Symbols),
1224    compute_states2(Seeds, N, Try, CurrState, StateTab, Tables).
1225
1226compute_states2([], _N, Try, CurrState, StateTab, Tables) ->
1227    compute_states1(Try, CurrState, StateTab, Tables);
1228compute_states2([{Sym,Seed} | Seeds], N, Try, CurrState, StateTab, Tables) ->
1229    {StateId, NewState} = compute_state(Seed, Tables),
1230    case check_states(NewState, StateId, StateTab, Tables) of
1231        add ->
1232            {M, _} = CurrState,
1233            %% io:fwrite(<<"Adding state ~w\n">>, [M + 1]),
1234            CurrentSymbols = get_current_symbols(NewState),
1235            Next = M + 1,
1236            NextState = {Next, NewState},
1237            NewStateTab = insert_state(Tables, StateTab, NextState, StateId),
1238            insert_goto(Tables, N, Sym, Next),
1239            compute_states2(Seeds, N, [{Next, CurrentSymbols} | Try],
1240                            NextState, NewStateTab, Tables);
1241        {old, M} ->
1242            %% io:fwrite(<<"Identical to old state ~w\n">>, [M]),
1243            insert_goto(Tables, N, Sym, M),
1244            compute_states2(Seeds, N, Try, CurrState, StateTab, Tables);
1245        {merge, M, NewCurrent} ->
1246            %% io:fwrite(<<"Merging with state ~w\n">>, [M]),
1247            Try1 = case lists:keyfind(M, 1, Try) of
1248                       false ->
1249                           [{M, NewCurrent} | Try];
1250                       {_, OldCurrent} ->
1251                           case ordsets:is_subset(NewCurrent, OldCurrent) of
1252                               true ->
1253                                   Try;
1254                               false ->
1255                                   [{M, ordsets:union(NewCurrent, OldCurrent)}
1256                                    | keydelete(M, 1, Try)]
1257                           end
1258                   end,
1259            NewStateTab = merge_states(NewState, StateTab, Tables, M,StateId),
1260            insert_goto(Tables, N, Sym, M),
1261            compute_states2(Seeds, N, Try1, CurrState, NewStateTab, Tables)
1262    end.
1263
1264insert_goto(Tables, From, Sym, To) ->
1265    true = ets:insert(Tables#tabs.goto, {{From, Sym, To}}).
1266
1267%% Create an ets table for faster lookups.
1268create_symbol_table(St) ->
1269    #yecc{terminals = Terminals, endsymbol = Endsymbol} = St,
1270    SymbolTab = ets:new(yecc_symbols, [{keypos,1}]),
1271    %% '$empty' is always assigned 0
1272    Ts = ['$empty', Endsymbol | delete('$empty', Terminals)],
1273    TsC = count(0, Ts),
1274    NTsC = map(fun({NT,I}) -> {NT,-I} end, count(1, St#yecc.nonterminals)),
1275    Cs = TsC++NTsC,
1276    true = ets:insert(SymbolTab, Cs),
1277
1278    InvSymTable = ets:new(yecc_inverted_terminals, [{keypos,2}]),
1279    true = ets:insert(InvSymTable, Cs),
1280
1281    St#yecc{symbol_tab = SymbolTab, inv_symbol_tab = InvSymTable}.
1282
1283get_current_symbols(State) ->
1284    usort(get_current_symbols1(State, [])).
1285
1286get_current_symbols1([], Syms) ->
1287    Syms;
1288get_current_symbols1([#item{rhs = Rhs} | Items], Syms) ->
1289    case Rhs of
1290        [] ->
1291            get_current_symbols1(Items, Syms);
1292        [Symbol | _] ->
1293            get_current_symbols1(Items, [Symbol | Syms])
1294    end.
1295
1296state_seeds(Items, Symbols) ->
1297    L = [{S,{LA,RP + 1}} || #item{rule_pointer = RP, look_ahead = LA,
1298                                  rhs = [S | _]} <- Items],
1299    state_seeds1(keysort(1, L), Symbols).
1300
1301state_seeds1(_L, []) ->
1302    [];
1303state_seeds1(L, [Symbol | Symbols]) ->
1304    state_seeds(L, Symbol, Symbols, []).
1305
1306state_seeds([{Symbol, Item} | L], Symbol, Symbols, Is) ->
1307    state_seeds(L, Symbol, Symbols, [Item | Is]);
1308state_seeds([{S, _Item} | L], Symbol, Symbols, Is) when S < Symbol ->
1309    state_seeds(L, Symbol, Symbols, Is);
1310state_seeds(L, Symbol, Symbols, Is) ->
1311    [{Symbol, Is} | state_seeds1(L, Symbols)].
1312
1313compute_state(Seed, Tables) ->
1314    RpInfo = Tables#tabs.rp_info,
1315    foreach(fun({LA, RulePointer}) -> put(RulePointer, LA) end, Seed),
1316    foreach(fun({LA, RP}) -> compute_closure(LA, RP, RpInfo) end, Seed),
1317    Closure = keysort(1, erase()),
1318    state_items(Closure, [], [], Tables#tabs.rp_rhs).
1319
1320%% Collects a uniqe id for the state (all rule pointers).
1321state_items([{RP, LA} | L], Is, Id, RpRhs) ->
1322    I = #item{rule_pointer = RP, look_ahead = LA, rhs = element(RP, RpRhs)},
1323    state_items(L, [I | Is], [RP | Id], RpRhs);
1324state_items(_, Is, Id, _RpRhs) ->
1325    {Id, Is}.
1326
1327-compile({inline,[compute_closure/3]}).
1328compute_closure(Lookahead, RulePointer, RpInfo) ->
1329    case element(RulePointer, RpInfo) of
1330        []=Void -> % no followers, or terminal
1331            Void;
1332        {no_union, ExpandingRules, NewLookahead} ->
1333            compute_closure1(ExpandingRules, NewLookahead, RpInfo);
1334        {union, ExpandingRules, Lookahead0} ->
1335            NewLookahead = set_union(Lookahead0, Lookahead),
1336            compute_closure1(ExpandingRules, NewLookahead, RpInfo);
1337        ExpandingRules ->
1338            compute_closure1(ExpandingRules, Lookahead, RpInfo)
1339    end.
1340
1341compute_closure1([RulePointer | Tail], NewLookahead, RpInfo) ->
1342    compute_closure1(Tail, NewLookahead, RpInfo),
1343    case get(RulePointer) of
1344        undefined -> % New
1345            put(RulePointer, NewLookahead),
1346            compute_closure(NewLookahead, RulePointer, RpInfo);
1347        Lookahead2 ->
1348            Lookahead = set_union(Lookahead2, NewLookahead),
1349            if
1350                Lookahead =:= Lookahead2 -> % Old
1351                    Lookahead2; % void()
1352                true -> % Merge
1353                    put(RulePointer, Lookahead),
1354                    compute_closure(NewLookahead, RulePointer, RpInfo)
1355            end
1356    end;
1357compute_closure1(Nil, _, _RpInfo) ->
1358    Nil.
1359
1360%% Check if some old state is a superset of our NewState
1361check_states(NewState, StateId, StateTab, #tabs{state_id = StateIdTab}) ->
1362    try ets:lookup_element(StateIdTab, StateId, 2) of
1363        N ->
1364            {_N, OldState} = lookup_state(StateTab, N),
1365            check_state1(NewState, OldState, [], N)
1366    catch error:_ -> add
1367    end.
1368
1369check_state1([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1],
1370             [#item{look_ahead = Lookahead2} | Items2], Symbols, N) ->
1371    case set_is_subset(Lookahead1, Lookahead2) of
1372        true ->
1373            check_state1(Items1, Items2, Symbols, N);
1374        false ->
1375            case Rhs of
1376                [] ->
1377                    check_state2(Items1, Items2, Symbols, N);
1378                [Symbol | _] ->
1379                    check_state2(Items1, Items2, [Symbol | Symbols], N)
1380            end
1381    end;
1382check_state1([], [], _Symbols, N) ->
1383    {old, N}.
1384
1385check_state2([#item{look_ahead = Lookahead1, rhs = Rhs} | Items1],
1386             [#item{look_ahead = Lookahead2} | Items2], Symbols, N) ->
1387    case set_is_subset(Lookahead1, Lookahead2) of
1388        true ->
1389            check_state2(Items1, Items2, Symbols, N);
1390        false ->
1391            case Rhs of
1392                [] ->
1393                    check_state2(Items1, Items2, Symbols, N);
1394                [Symbol | _] ->
1395                    check_state2(Items1, Items2, [Symbol | Symbols], N)
1396            end
1397    end;
1398check_state2([], [], Symbols, N) ->
1399    {merge, N, usort(Symbols)}.
1400
1401merge_states(NewState, StateTab, Tables, M, StateId) ->
1402    {_M, Old_state} = lookup_state(StateTab, M),
1403    MergedState = merge_states1(NewState, Old_state),
1404    insert_state(Tables, StateTab, {M, MergedState}, StateId).
1405
1406merge_states1([Item1 | Items1], [Item2 | Items2]) ->
1407    LA1 = Item1#item.look_ahead,
1408    LA2 = Item2#item.look_ahead,
1409    if
1410        LA1 =:= LA2 ->
1411            [Item1 | merge_states1(Items1, Items2)];
1412        true ->
1413            [Item1#item{look_ahead = set_union(LA1, LA2)}
1414             | merge_states1(Items1, Items2)]
1415    end;
1416merge_states1(_, _) ->
1417    [].
1418
1419%% RulePointer -> Rhs. Every position Rhs in has its unique "rule pointer".
1420make_rhs_index(RulesList) ->
1421    Index = flatmap(fun(#rule{symbols = [_Non | Daughters]}) ->
1422                            suffixes0(Daughters)
1423                    end, RulesList),
1424    list_to_tuple(Index).
1425
1426suffixes0([?EMPTY]) ->
1427    [[], []];
1428suffixes0(L) ->
1429    suffixes(L).
1430
1431suffixes([]=L) ->
1432    [L];
1433suffixes([_ | T]=L) ->
1434    [L | suffixes(T)].
1435
1436%% Setup info about lookahead and expanding rules for each point
1437%% ("rule pointer") in the grammar.
1438make_rule_pointer_info(StC, RpRhs, RuleIndex) ->
1439    SymbolTab = StC#yecc.symbol_tab,
1440    LcTab = make_left_corner_table(StC),
1441    LA_index = map(fun(Syms) ->
1442                           rp_info(Syms, SymbolTab, LcTab, RuleIndex)
1443                   end, tuple_to_list(RpRhs)),
1444    list_to_tuple(LA_index).
1445
1446rp_info([], _SymbolTab, _LcTab, _RuleIndex) ->
1447    [];
1448rp_info([Category | Followers], SymbolTab, LcTab, RuleIndex) ->
1449    case maps:find(Category, RuleIndex) of
1450        error -> % terminal
1451            [];
1452        {ok, ExpandingRules} when Followers =:= [] ->
1453            ExpandingRules;
1454        {ok, ExpandingRules} ->
1455            case make_lookahead(Followers, SymbolTab, LcTab, set_empty()) of
1456                {empty, LA} ->
1457                    {union, ExpandingRules, LA};
1458                LA ->
1459                    {no_union, ExpandingRules, LA}
1460            end
1461    end.
1462
1463%% Lookahead computation is complicated by the possible existence
1464%% of null string rewriting rules, such as A -> '$empty'.
1465make_lookahead([], _, _, LA) ->
1466    {empty, LA};
1467make_lookahead([Symbol | Symbols], SymbolTab, LcTab, LA) ->
1468    case maps:find(Symbol, LcTab) of
1469        {ok, LeftCorner} -> % nonterminal
1470            case empty_member(LeftCorner) of
1471                true ->
1472                    make_lookahead(Symbols, SymbolTab, LcTab,
1473                                   set_union(empty_delete(LeftCorner), LA));
1474                false ->
1475                    set_union(LeftCorner, LA)
1476            end;
1477        error -> % terminal
1478            set_add(Symbol, LA)
1479    end.
1480
1481%% -> map-of({Nonterminal, [Terminal]}).
1482%% The algorithm FIRST/1 from the Dragon Book.
1483%% Left corner table, all terminals (including '$empty') that can
1484%% begin strings generated by Nonterminal.
1485make_left_corner_table(#yecc{rules_list = RulesList} = St) ->
1486    SymbolTab = left_corner_symbol_table(St),
1487    Rules = map(fun(#rule{symbols = [Lhs | Rhs]}) ->
1488                        {Lhs,{Lhs, Rhs}}
1489                end, RulesList),
1490    LeftHandTab = maps:from_list(family(Rules)),
1491    X0 = [{S,H} || {H,{H,Rhs}} <- Rules,
1492                   S <- Rhs,
1493                   not is_terminal(SymbolTab, S)],
1494    XL = family_with_domain(X0, St#yecc.nonterminals),
1495    X = maps:from_list(XL),
1496    Xref = fun(NT) -> maps:get(NT, X) end,
1497    E = set_empty(),
1498    LC0 = maps:from_list([{H, E} || {H,_} <- XL]),
1499    %% Handle H -> a S, where a is a terminal ('$empty' inclusive).
1500    {Q, LC1} =
1501        foldl(fun({H,{H,[S | _]}}, {Q0, LC}) ->
1502                      case ets:lookup(SymbolTab, S) of
1503                          [{_,Num}=SymbolAndNum] when Num >= 0 ->
1504                              F = set_add_terminal(SymbolAndNum, E),
1505                              {[Xref(H) | Q0], upd_first(H, F, LC)};
1506                          _ ->
1507                              {Q0, LC}
1508                      end
1509              end, {[], LC0}, Rules),
1510    left_corners(Q, LC1, LeftHandTab, SymbolTab, Xref).
1511
1512left_corners(Q0, LC0, LeftHandTab, SymbolTab, Xref) ->
1513    case usort(append(Q0)) of
1514        [] ->
1515            LC0;
1516        Q1 ->
1517            Rs = flatmap(fun(NT) -> maps:get(NT, LeftHandTab) end, Q1),
1518            {LC, Q} = left_corners2(Rs, LC0, [], SymbolTab, Xref),
1519            left_corners(Q, LC, LeftHandTab, SymbolTab, Xref)
1520    end.
1521
1522left_corners2([], LC, Q, _SymbolTab, _Xref) ->
1523    {LC, Q};
1524left_corners2([{Head,Rhs} | Rs], LC, Q0, SymbolTab, Xref) ->
1525    Ts = left_corner_rhs(Rhs, Head, LC, set_empty(), SymbolTab),
1526    First0 = maps:get(Head, LC),
1527    case set_is_subset(Ts, First0) of
1528        true ->
1529            left_corners2(Rs, LC, Q0, SymbolTab, Xref);
1530        false ->
1531            LC1 = upd_first(Head, Ts, LC),
1532            left_corners2(Rs, LC1, [Xref(Head) | Q0], SymbolTab, Xref)
1533    end.
1534
1535upd_first(NT, Ts, LC) ->
1536    maps:update_with(NT, fun(First) -> set_union(First, Ts) end, LC).
1537
1538left_corner_rhs([S | Ss], Head, LC, Ts, SymbolTab) ->
1539    case ets:lookup(SymbolTab, S) of
1540        [{_,Num}=SymbolAndNum] when Num >= 0 ->
1541            set_add_terminal(SymbolAndNum, Ts);
1542        [_NonTerminalSymbol] ->
1543            First = maps:get(S, LC),
1544            case empty_member(First) of
1545                true ->
1546                    NTs = set_union(empty_delete(First), Ts),
1547                    left_corner_rhs(Ss, Head, LC, NTs, SymbolTab);
1548                false ->
1549                    set_union(First, Ts)
1550            end
1551    end;
1552left_corner_rhs([], _Head, _LC, Ts, _SymbolTab) ->
1553    set_add(?EMPTY, Ts).
1554
1555%% For every non-terminal return a list of "rule pointers" for rules
1556%% expanding the non-terminal.
1557%% Also assigns a unique number to each point in the grammar, "rule pointer".
1558make_rule_index(#yecc{nonterminals = Nonterminals,
1559                      rules_list = RulesList}, RulesListNoCodes) ->
1560    {RulesL, _N} =
1561        lists:mapfoldl(fun(#rule{symbols = [Nonterminal | Daughters]}, I) ->
1562                               I1 = I + length(Daughters)+1,
1563                               {{Nonterminal, I}, I1}
1564                       end, 1, RulesList),
1565    IndexedTab = family_with_domain(RulesL, Nonterminals),
1566
1567    Symbol2Rule = [{Foo,R} || #rule{symbols = Symbols}=R <- RulesListNoCodes,
1568                              Foo <- Symbols],
1569    Pointer2Rule = [{I, R} || {{_Foo,R},I} <- count(1, Symbol2Rule)],
1570    {maps:from_list(IndexedTab), maps:from_list(Pointer2Rule)}.
1571
1572%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1573% Computing parse action table from list of states and goto table:
1574
1575compute_parse_actions(N, St, StateActions) ->
1576    case N < first_state() of
1577        true ->
1578            StateActions;
1579        false ->
1580            {N, StateN} = lookup_state(St#yecc.state_tab, N),
1581            %% There can be duplicates in Actions.
1582            Actions = compute_parse_actions1(StateN, N, St),
1583            compute_parse_actions(N - 1, St, [{N, Actions} | StateActions])
1584    end.
1585
1586compute_parse_actions1([], _, _) ->
1587    [];
1588compute_parse_actions1([#item{rule_pointer = RulePointer,
1589                              look_ahead = Lookahead0,
1590                              rhs = Rhs} | Items], N, St) ->
1591    case Rhs of
1592        [] ->
1593            Lookahead = decode_terminals(Lookahead0, St#yecc.inv_symbol_tab),
1594            case rule(RulePointer, St) of
1595                {[?ACCEPT | _], _RuleLocation, _} ->
1596                    [{Lookahead, accept}
1597                     | compute_parse_actions1(Items, N, St)];
1598                %% Head is placed after the daughters when finding the
1599                %% precedence. This is how giving precedence to
1600                %% non-terminals takes effect.
1601                {[Head | Daughters0], _RuleLocation, _} ->
1602                    Daughters = delete('$empty', Daughters0),
1603                    [{Lookahead,
1604                      #reduce{rule_nmbr = RulePointer, head = Head,
1605                              nmbr_of_daughters = length(Daughters),
1606                              prec = get_prec(Daughters ++ [Head], St)}}
1607                     | compute_parse_actions1(Items, N, St)]
1608            end;
1609        [Symbol | Daughters] ->
1610            case is_terminal(St#yecc.symbol_tab, Symbol) of
1611                true ->
1612                    DecSymbol = decode_symbol(Symbol, St#yecc.inv_symbol_tab),
1613                    {[Head | _], _RuleLocation, _} = rule(RulePointer, St),
1614                    %% A bogus shift-shift conflict can be introduced
1615                    %% here if some terminal occurs in different rules
1616                    %% which have been given precedence "one level up".
1617                    Prec1 = case Daughters of
1618                                [] -> get_prec([DecSymbol, Head], St);
1619                                _ -> get_prec([DecSymbol], St)
1620                            end,
1621                    Pos = case Daughters of
1622                              [] -> z;
1623                              _ -> a
1624                          end,
1625                    [{[DecSymbol],
1626                      #shift{state = goto(N, DecSymbol, St),
1627                             pos = Pos,
1628                             prec = Prec1,
1629                             rule_nmbr = RulePointer}}
1630                     | compute_parse_actions1(Items, N, St)];
1631                false ->
1632                    compute_parse_actions1(Items, N, St)
1633            end
1634    end.
1635
1636get_prec(Symbols, St) ->
1637    get_prec1(Symbols, St#yecc.prec_tab, {0, none}).
1638
1639get_prec1([], _, P) ->
1640    P;
1641get_prec1([Symbol | T], PrecTab, P) ->
1642    case ets:lookup(PrecTab, Symbol) of
1643        [] ->
1644            get_prec1(T, PrecTab, P);
1645        [{_, N, Ass}] ->
1646            get_prec1(T, PrecTab, {N, Ass})
1647    end.
1648
1649create_precedence_table(St) ->
1650    PrecTab = ets:new(yecc_precedences, []),
1651    true = ets:insert(PrecTab, St#yecc.prec),
1652    St#yecc{prec_tab = PrecTab}.
1653
1654-record(cxt, {terminal, state_n, yecc, res}).
1655
1656%% Detects shift-reduce and reduce-reduce conflicts.
1657%% Also removes all but one conflicting action. As a consequence the
1658%% lookahead sets for a state are always disjoint.
1659%% Reduce/reduce conflicts are considered errors.
1660find_action_conflicts(St0) ->
1661    Cxt0 = #cxt{yecc = St0, res = []},
1662    {#cxt{yecc = St, res = Res}, NewParseActions0} =
1663        foldl(fun({N, Actions0}, {Cxt1, StateActions}) ->
1664                      L = [{Terminal, Act} || {Lookahead, Act} <- Actions0,
1665                                              Terminal <- Lookahead],
1666                      {Cxt, Actions} =
1667                          foldl(fun({Terminal, As}, {Cxt2,Acts0}) ->
1668                                        Cxt3 = Cxt2#cxt{terminal = Terminal,
1669                                                        state_n = N},
1670                                        {Action, Cxt} =
1671                                            find_action_conflicts2(As, Cxt3),
1672                                        {Cxt,[{Action,Terminal} | Acts0]}
1673                                end, {Cxt1,[]}, family(L)),
1674                      {Cxt,[{N,inverse(family(Actions))} | StateActions]}
1675              end, {Cxt0, []}, St0#yecc.parse_actions),
1676    if
1677        length(Res) > 0, St#yecc.verbose ->
1678            io:fwrite(<<"\n*** Conflicts resolved by operator "
1679                        "precedences:\n\n">>),
1680            foreach(fun({Confl, Name}) ->
1681                            report_conflict(Confl, St, Name, prec)
1682                    end, reverse(Res)),
1683            io:fwrite(<<"*** End of resolved conflicts\n\n">>);
1684        true ->
1685            ok
1686    end,
1687    NewParseActions = reverse(NewParseActions0),
1688    St#yecc{parse_actions = NewParseActions}.
1689
1690find_action_conflicts2([Action], Cxt) ->
1691    {Action, Cxt};
1692find_action_conflicts2([#shift{state = St, pos = Pos, prec = Prec},
1693                        #shift{state = St}=S | As],
1694                       Cxt) when Pos =:= a; Prec =:= {0,none} ->
1695    %% This is a kludge to remove the bogus shift-shift conflict
1696    %% introduced in compute_parse_actions1().
1697    find_action_conflicts2([S | As], Cxt);
1698find_action_conflicts2([#shift{state = NewState, pos = z}=S1,
1699                        #shift{state = NewState}=S2 | _], Cxt) ->
1700    %% This is even worse than last clause. Give up.
1701    Confl = conflict(S1, S2, Cxt),
1702    #cxt{yecc = St0} = Cxt,
1703    St = conflict_error(Confl, St0),
1704    {S1, Cxt#cxt{yecc = St}}; % return any action
1705find_action_conflicts2([#shift{prec = {P1, Ass1}}=S | Rs], Cxt0) ->
1706    {R, Cxt1} = find_reduce_reduce(Rs, Cxt0),
1707    #cxt{res = Res0, yecc = St0} = Cxt1,
1708    #reduce{prec = {P2, Ass2}} = R,
1709    Confl = conflict(R, S, Cxt1),
1710    if
1711        P1 > P2 ->
1712            {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}};
1713        P2 > P1 ->
1714            {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}};
1715        Ass1 =:= left, Ass2 =:= left ->
1716            {R, Cxt1#cxt{res = [{Confl, reduce} | Res0]}};
1717        Ass1 =:= right, Ass2 =:= right ->
1718            {S, Cxt1#cxt{res = [{Confl, shift} | Res0]}};
1719        Ass1 =:= nonassoc, Ass2 =:= nonassoc ->
1720            {nonassoc, Cxt1};
1721        P1 =:= 0, P2 =:= 0 ->
1722            report_conflict(Confl, St0, shift, default),
1723            St = add_conflict(Confl, St0),
1724            {S, Cxt1#cxt{yecc = St}};
1725        true ->
1726            St = conflict_error(Confl, St0),
1727            {S, Cxt1#cxt{yecc = St}} % return any action
1728    end;
1729find_action_conflicts2(Rs, Cxt0) ->
1730    find_reduce_reduce(Rs, Cxt0).
1731
1732find_reduce_reduce([R], Cxt) ->
1733    {R, Cxt};
1734find_reduce_reduce([accept=A, #reduce{}=R | Rs], Cxt0) ->
1735    Confl = conflict(R, A, Cxt0),
1736    St = conflict_error(Confl, Cxt0#cxt.yecc),
1737    Cxt = Cxt0#cxt{yecc = St},
1738    find_reduce_reduce([R | Rs], Cxt);
1739find_reduce_reduce([#reduce{head = Categ1, prec = {P1, _}}=R1,
1740                    #reduce{head = Categ2, prec = {P2, _}}=R2 | Rs], Cxt0) ->
1741    #cxt{res = Res0, yecc = St0} = Cxt0,
1742    Confl = conflict(R1, R2, Cxt0),
1743    {R, Res, St} =
1744        if
1745            P1 > P2 ->
1746                {R1, [{Confl, Categ1} | Res0], St0};
1747            P2 > P1 ->
1748                {R2, [{Confl, Categ2} | Res0], St0};
1749            true ->
1750                St1 = conflict_error(Confl, St0),
1751                {R1, Res0, St1}
1752        end,
1753    Cxt = Cxt0#cxt{res = Res, yecc = St},
1754    find_reduce_reduce([R | Rs], Cxt).
1755
1756%% Since the lookahead sets are disjoint (assured by
1757%% find_action_conflicts), the order between actions can be chosen
1758%% almost arbitrarily. nonassoc has to come last, though (but is later
1759%% discarded!). And shift has to come before reduce.
1760sort_parse_actions([]) ->
1761    [];
1762sort_parse_actions([{N, La_actions} | Tail]) ->
1763    [{N, sort_parse_actions1(La_actions)} | sort_parse_actions(Tail)].
1764
1765sort_parse_actions1(LaActions) ->
1766    As = filter(fun({_LA, A}) -> A =:= accept end, LaActions),
1767    Ss = filter(fun({_LA, A}) -> is_record(A, shift) end, LaActions),
1768    Rs = filter(fun({_LA, A}) -> is_record(A, reduce) end, LaActions),
1769    Ns = filter(fun({_LA, A}) -> A =:= nonassoc end, LaActions),
1770    As ++ Ss ++ Rs ++ Ns.
1771
1772%% -> {State, StateRepr}. StateRepr has the same set of shift actions
1773%% as State. No code will be output for State if State =/= StateRepr.
1774find_identical_shift_states(StateActions) ->
1775    L1 = [{Actions, State} || {State,Actions} <- StateActions],
1776    {SO, NotSO} = lists:partition(fun({Actions,_States}) ->
1777                                          shift_actions_only(Actions)
1778                                  end, family(L1)),
1779    R = [{State, hd(States)} || {_Actions, States} <- SO, State <- States]
1780        ++
1781        [{State, State} || {_Actions, States} <- NotSO, State <- States],
1782    lists:keysort(1, R).
1783
1784-record(part_data, {name, eq_state, actions, n_actions, states}).
1785
1786%% Replace {SStates,Actions} with {SStates,{Actions,Jump}} where
1787%% Jump describes which clauses that have been extracted from shift
1788%% states so that they can be used from other states. Some space is
1789%% saved.
1790find_partial_shift_states(StateActionsL, StateReprs) ->
1791    L = [{State, Actions} || {{State,Actions}, {State,State}} <-
1792                                 lists:zip(StateActionsL, StateReprs),
1793                             shift_actions_only(Actions)],
1794    StateActions = sofs:family(L, [{state,[action]}]),
1795    StateAction = sofs:family_to_relation(StateActions),
1796
1797    %% Two actions are equal if they occur in the same states:
1798    Parts = sofs:partition(sofs:range(StateActions)),
1799    PartsL = sofs:to_external(Parts),
1800    %% Assign temporary names to the parts of the partition (of actions):
1801    PartNameL = lists:zip(seq1(length(PartsL)), PartsL),
1802    ActPartL = [{Action,PartName} ||
1803                   {PartName,Actions} <- PartNameL,
1804                   Action <- Actions],
1805    ActionPartName = sofs:relation(ActPartL, [{action,partname}]),
1806    StatePartName = sofs:relative_product(StateAction, ActionPartName),
1807    PartInStates = sofs:relation_to_family(sofs:converse(StatePartName)),
1808
1809    %% Parts that equal all actions of a state:
1810    PartActions = sofs:family(PartNameL, [{partname,[action]}]),
1811    PartState =
1812        sofs:relative_product(PartActions, sofs:converse(StateActions)),
1813    PartStates = sofs_family_with_domain(PartState, sofs:domain(PartActions)),
1814
1815    PartDataL = [#part_data{name = Nm, eq_state = EqS, actions = P,
1816                            n_actions = length(P),
1817                            states = ordsets:from_list(S)} ||
1818                    {{Nm,P}, {Nm,S}, {Nm,EqS}} <-
1819                        lists:zip3(PartNameL,
1820                                   sofs:to_external(PartInStates),
1821                                   sofs:to_external(PartStates))],
1822    true = length(PartDataL) =:= length(PartNameL),
1823    Ps = select_parts(PartDataL),
1824
1825    J1 = [{State, Actions, {jump_some,hd(States)}} ||
1826             {_W, #part_data{actions = Actions, eq_state = [],
1827                             states = States}} <- Ps,
1828             State <- States],
1829    J2 = [{State, Actions, {jump_all,To}} ||
1830             {_W, #part_data{actions = Actions, eq_state = EqS,
1831                             states = States}} <- Ps,
1832             To <- EqS,
1833             State <- States,
1834             State =/= To],
1835    J = lists:keysort(1, J1 ++ J2),
1836
1837    JumpStates = ordsets:from_list([S || {S,_,_} <- J]),
1838    {JS, NJS} =
1839        sofs:partition(1, sofs:relation(StateActionsL, [{state, actions}]),
1840                       sofs:set(JumpStates, [state])),
1841    R =
1842        [{S, {Actions,jump_none}} || {S,Actions} <- sofs:to_external(NJS)]
1843        ++
1844        [{S, {Actions--Part, {Tag,ToS,Part}}} ||
1845            {{S,Actions}, {S,Part,{Tag,ToS}}} <-
1846                lists:zip(sofs:to_external(JS), J)],
1847    true = length(StateActionsL) =:= length(R),
1848    lists:keysort(1, R).
1849
1850%% Very greedy. By no means optimal.
1851select_parts([]) ->
1852    [];
1853select_parts(PartDataL) ->
1854    T1 = [{score(PD), PD} || PD <- PartDataL],
1855    [{W, PD} | Ws] = lists:reverse(lists:keysort(1, T1)),
1856    #part_data{n_actions = NActions, states = S} = PD,
1857    if
1858        W < 8 -> % don't bother
1859            [];
1860        true ->
1861            %% Cannot extract more clauses from the chosen part's states:
1862            NL = [D#part_data{states = NewS} ||
1863                     {W1, #part_data{states = S0}=D} <- Ws,
1864                     W1 > 0,
1865                     (NewS = ordsets:subtract(S0, S)) =/= []],
1866            if
1867                length(S) =:= 1; NActions =:= 1 ->
1868                    select_parts(NL);
1869                true ->
1870                    [{W,PD} | select_parts(NL)]
1871            end
1872    end.
1873
1874%% Does it pay off to extract clauses into a new function?
1875%% Assumptions:
1876%% - a call costs 8 (C = 8);
1877%% - a clause (per action) costs 20 plus 8 (select) (Cl = 28);
1878%% - a new function costs 20 (funinfo) plus 16 (select) (F = 36).
1879%% A is number of actions, S is number of states.
1880%% Special case (the part equals all actions of some state):
1881%% C * (S - 1) < (S - 1) * A * Cl
1882%% Normal case (introduce new function):
1883%% F + A * Cl + C * S < S * A * Cl
1884score(#part_data{n_actions = NActions, eq_state = [], states = S}) ->
1885    (length(S) * NActions * 28) - (36 + NActions * 28 + length(S) * 8);
1886score(#part_data{n_actions = NActions, states = S}) ->
1887    ((length(S) - 1) * NActions * 28) - (8 * (length(S) - 1)).
1888
1889shift_actions_only(Actions) ->
1890    length([foo || {_Ts,{shift,_,_,_,_}} <- Actions]) =:= length(Actions).
1891
1892collect_some_state_info(StateActions, StateReprs) ->
1893    RF = fun({_LA, A}) -> is_record(A, reduce) end,
1894    L = [{State,
1895          begin
1896              RO = lists:all(RF, LaActions),
1897              %% C is currently always ""; identical states are all shift.
1898              C = [io_lib:fwrite(<<" %% ~w\n">>, [State]) ||
1899                      true <- [RO], Repr =/= State],
1900              #state_info{reduce_only = RO, state_repr = Repr, comment = C}
1901          end} ||
1902            {{State, LaActions}, {State, Repr}} <-
1903                lists:zip(StateActions, StateReprs)],
1904    list_to_tuple(L).
1905
1906conflict_error(Conflict, St0) ->
1907    St1 = add_conflict(Conflict, St0),
1908    add_error({conflict, Conflict}, St1).
1909
1910report_conflict(Conflict, St, ActionName, How) ->
1911    if
1912        St#yecc.verbose ->
1913            io:fwrite(<<"~s\n">>, [format_conflict(Conflict)]),
1914            Formated = format_symbol(ActionName),
1915            case How of
1916                prec ->
1917                    io:fwrite(<<"Resolved in favor of ~ts.\n\n">>, [Formated]);
1918                default ->
1919                    io:fwrite(<<"Conflict resolved in favor of ~ts.\n\n">>,
1920                              [Formated])
1921            end;
1922        true ->
1923            ok
1924    end.
1925
1926add_conflict(Conflict, St) ->
1927    case Conflict of
1928        {Symbol, StateN, _, {reduce, _, _, _}} ->
1929            St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]};
1930        {Symbol, StateN, _, {accept, _}} ->
1931            St#yecc{reduce_reduce = [{StateN,Symbol} |St#yecc.reduce_reduce]};
1932        {Symbol, StateN, _, {shift, _, _}} ->
1933            St#yecc{shift_reduce = [{StateN,Symbol} | St#yecc.shift_reduce]};
1934        {_Symbol, _StateN, {one_level_up, _, _}, _Confl} ->
1935            St
1936    end.
1937
1938conflict(#shift{prec = Prec1, rule_nmbr = RuleNmbr1},
1939         #shift{prec = Prec2, rule_nmbr = RuleNmbr2}, Cxt) ->
1940    %% Conflict due to precedences "one level up". Kludge.
1941    #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt,
1942    {_, L1, RuleN1} = rule(RuleNmbr1, St),
1943    {_, L2, RuleN2} = rule(RuleNmbr2, St),
1944    Confl = {one_level_up, {L1, RuleN1, Prec1}, {L2, RuleN2, Prec2}},
1945    {Symbol, N, Confl, Confl};
1946conflict(#reduce{rule_nmbr = RuleNmbr1}, NewAction, Cxt) ->
1947    #cxt{terminal = Symbol, state_n = N, yecc = St} = Cxt,
1948    {R1, RuleLocation1, RuleN1} = rule(RuleNmbr1, St),
1949    Confl = case NewAction of
1950                accept ->
1951                    {accept, St#yecc.rootsymbol};
1952                #reduce{rule_nmbr = RuleNmbr2} ->
1953                    {R2, RuleLocation2, RuleN2} = rule(RuleNmbr2, St),
1954                    {reduce, R2, RuleN2, RuleLocation2};
1955                #shift{state = NewState} ->
1956                    {shift, NewState, last(R1)}
1957            end,
1958    {Symbol, N, {R1, RuleN1, RuleLocation1}, Confl}.
1959
1960format_conflict({Symbol, N, _, {one_level_up,
1961                                {L1, RuleN1, {P1, Ass1}},
1962                                {L2, RuleN2, {P2, Ass2}}}}) ->
1963    S1 = io_lib:fwrite(<<"Conflicting precedences of symbols when "
1964                         "scanning ~ts in state ~w:\n">>,
1965                       [format_symbol(Symbol), N]),
1966    S2 = io_lib:fwrite(<<"   ~s ~w (rule ~w ~s)\n"
1967                          "      vs.\n">>,
1968                       [format_assoc(Ass1), P1, RuleN1, rule_pos(L1)]),
1969    S3 = io_lib:fwrite(<<"   ~s ~w (rule ~w ~s)\n">>,
1970                       [format_assoc(Ass2), P2, RuleN2, rule_pos(L2)]),
1971    [S1, S2, S3];
1972format_conflict({Symbol, N, Reduce, Confl}) ->
1973    S1 = io_lib:fwrite(<<"Parse action conflict scanning symbol "
1974                         "~ts in state ~w:\n">>, [format_symbol(Symbol), N]),
1975    S2 = case Reduce of
1976             {[HR | TR], RuleNmbr, RuleLocation} ->
1977                 io_lib:fwrite(<<"   Reduce to ~ts from ~ts (rule ~w "
1978                                 "~s)\n      vs.\n">>,
1979                               [format_symbol(HR), format_symbols(TR),
1980                                RuleNmbr, rule_pos(RuleLocation)])
1981         end,
1982    S3 = case Confl of
1983             {reduce, [HR2|TR2], RuleNmbr2, RuleLocation2} ->
1984                 io_lib:fwrite(<<"   reduce to ~ts from ~ts "
1985                                 "(rule ~w ~s).">>,
1986                               [format_symbol(HR2), format_symbols(TR2),
1987                                RuleNmbr2, rule_pos(RuleLocation2)]);
1988             {shift, NewState, Sym} ->
1989                 io_lib:fwrite(<<"   shift to state ~w, adding right "
1990                                 "sisters to ~ts.">>,
1991                               [NewState, format_symbol(Sym)]);
1992             {accept, Rootsymbol} ->
1993                 io_lib:fwrite(<<"   reduce to rootsymbol ~ts.">>,
1994                               [format_symbol(Rootsymbol)])
1995         end,
1996    [S1, S2, S3].
1997
1998rule_pos(Line) when is_integer(Line) ->
1999    io_lib:format("at line ~w", [Line]);
2000rule_pos({Line, Column}) when is_integer(Line), is_integer(Column) ->
2001    io_lib:format("at location ~w:~w", [Line, Column]).
2002
2003%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2004% Code generation:
2005
2006%% The version up to and including parsetools-1.3 is called "1.0".
2007%%
2008%% "1.1", parsetools-1.4:
2009%% - the prologue file has been updated;
2010%% - nonassoc is new;
2011%% - different order of clauses;
2012%% - never more than one clause matching a given symbol in a given state;
2013%% - file attributes relate messages to .yrl file;
2014%% - actions put in inlined functions;
2015%% - a few other minor fixes.
2016%%
2017%% "1.2", parsetools-1.4.2:
2018%% - the generated code has been changed as follows:
2019%%   - yeccpars2() calls the functions yeccpars2_State();
2020%%   - several states can share yeccpars2_State(), which reduces code size;
2021%%   - yeccgoto() has been split on one function per nonterminal;
2022%%   - several minor changes have made the loaded code smaller.
2023%% - the include file yeccpre.hrl has been changed incompatibly.
2024%%
2025%% "1.3", parsetools-1.4.4:
2026%% - the generated code has been changed as follows:
2027%%   - yeccgoto_T() no longer returns the next state, but calls yeccpars_S();
2028%%   - yeccpars2() is not called when it is known which yeccpars2_S() to call;
2029%%   - "__Stack" has been substituted for "Stack";
2030%%   - several states can share yeccpars2_S_cont(), which reduces code size;
2031%%   - instead if calling lists:nthtail() matching code is emitted.
2032%%
2033%% "1.4", parsetools-2.0.4:
2034%% - yeccerror() is called when a syntax error is found (as in version 1.1).
2035%% - the include file yeccpre.hrl has been changed.
2036
2037-define(CODE_VERSION, "1.4").
2038-define(YECC_BUG(M, A),
2039        unicode:characters_to_binary(
2040          [" erlang:error({yecc_bug,\"",?CODE_VERSION,"\",",
2041           io_lib:fwrite(M, A), "}).\n\n"])).
2042
2043%% Returns number of newlines in included files.
2044output_prelude(Outport, Inport, St0) when St0#yecc.includefile =:= [] ->
2045    St5 = output_header(St0),
2046    #yecc{infile = Infile, module = Module} = St5,
2047    St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]),
2048    St20 =
2049        fwrite(St10,
2050               <<"-export([parse/1, parse_and_scan/1, format_error/1]).\n">>,
2051               []),
2052    {St25, N_lines_1, LastErlangCodeLine} =
2053        case St20#yecc.erlang_code of
2054            none ->
2055                {St20, 0, no_erlang_code};
2056            Next_line ->
2057                St_10 = output_file_directive(St20, Infile, Next_line-1),
2058                Last_line = include1([], Inport, Outport, Infile,
2059                                     Next_line, St_10),
2060                Nmbr_of_lines = Last_line - Next_line,
2061                {St_10, Nmbr_of_lines, {last_erlang_code_line, Last_line}}
2062    end,
2063    St30 = nl(St25),
2064    IncludeFile =
2065        filename:join([code:lib_dir(parsetools), "include","yeccpre.hrl"]),
2066    %% Maybe one could assume there are no warnings in this file.
2067    St = output_file_directive(St30, IncludeFile, 0),
2068    N_lines_2 = include(St, IncludeFile, Outport),
2069    {St, N_lines_1 + N_lines_2, LastErlangCodeLine};
2070output_prelude(Outport, Inport, St0) ->
2071    St5 = output_header(St0),
2072    #yecc{infile = Infile, module = Module, includefile = Includefile} = St5,
2073    St10 = fwrite(St5, <<"-module(~w).\n">>, [Module]),
2074    St20 = output_file_directive(St10, Includefile, 0),
2075    N_lines_1 = include(St20, Includefile, Outport),
2076    St30 = nl(St20),
2077    case St30#yecc.erlang_code of
2078        none ->
2079            {St30, N_lines_1, no_erlang_code};
2080        Next_line ->
2081            St = output_file_directive(St30, Infile, Next_line-1),
2082            Last_line = include1([], Inport, Outport, Infile, Next_line, St),
2083            Nmbr_of_lines = Last_line - Next_line,
2084            {St, Nmbr_of_lines + N_lines_1, {last_erlang_code_line, Last_line}}
2085    end.
2086
2087output_header(St0) ->
2088    lists:foldl(fun(Str, St) -> fwrite(St, <<"~ts\n">>, [Str])
2089                end, St0, St0#yecc.header).
2090
2091output_goto(St, [{_Nonterminal, []} | Go], StateInfo) ->
2092    output_goto(St, Go, StateInfo);
2093output_goto(St0, [{Nonterminal, List} | Go], StateInfo) ->
2094    F = function_name(St0, yeccgoto, Nonterminal),
2095    St05 = output_nowarn(St0, F, '', 7),
2096    St10 = output_goto1(St05, List, F, StateInfo, true),
2097    St = output_goto_fini(F, Nonterminal, St10),
2098    output_goto(St, Go, StateInfo);
2099output_goto(St, [], _StateInfo) ->
2100    St.
2101
2102output_goto1(St0, [{From, To} | Tail], F, StateInfo, IsFirst) ->
2103    St10 = delim(St0, IsFirst),
2104    {To, ToInfo} = lookup_state(StateInfo, To),
2105    #state_info{reduce_only = RO, state_repr = Repr, comment = C} = ToInfo,
2106    if
2107        RO ->
2108            %% Reduce actions do not use the state, so we just pass
2109            %% the old (now bogus) on:
2110            FromS = io_lib:fwrite("~w=_S", [From]),
2111            ToS = "_S";
2112        true ->
2113            FromS = io_lib:fwrite("~w", [From]),
2114            ToS = io_lib:fwrite("~w", [To])
2115    end,
2116    St20 = fwrite(St10, <<"~w(~s, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>,
2117                  [F,FromS]),
2118    St30 = fwrite(St20, <<"~s">>, [C]),
2119    %% Short-circuit call to yeccpars2:
2120    St = fwrite(St30, <<" yeccpars2_~w(~s, Cat, Ss, Stack, T, Ts, Tzr)">>,
2121                [Repr, ToS]),
2122    output_goto1(St, Tail, F, StateInfo, false);
2123output_goto1(St, [], _F, _StateInfo, _IsFirst) ->
2124    St.
2125
2126output_goto_fini(F, NT, #yecc{includefile_version = {1,1}}=St0) ->
2127    %% Backward compatibility.
2128    St10 = delim(St0, false),
2129    St = fwrite(St10, <<"~w(State, _Cat, _Ss, _Stack, _T, _Ts, _Tzr) ->\n">>,
2130                [F]),
2131    fwrite(St,
2132           ?YECC_BUG(<<"{~ts, State, missing_in_goto_table}">>,
2133                     [quoted_atom(St0, NT)]),
2134           []);
2135output_goto_fini(_F, _NT, St) ->
2136    fwrite(St, <<".\n\n">>, []).
2137
2138%% Find actions having user code.
2139find_user_code(ParseActions, St) ->
2140    [#user_code{state = State,
2141                terminal = Terminal,
2142                funname = inlined_function_name(St, State, Terminal),
2143                action = Action} ||
2144        {State, La_actions} <- ParseActions,
2145        {Action, Terminals, RuleNmbr, NmbrOfDaughters}
2146            <- find_user_code2(La_actions),
2147        case tokens(RuleNmbr, St) of
2148            [{var, _, ?PSEUDO_VAR_1}] -> NmbrOfDaughters =/= 1;
2149            _ -> true
2150        end,
2151        Terminal <- Terminals].
2152
2153find_user_code2([]) ->
2154    [];
2155find_user_code2([{_, #reduce{rule_nmbr = RuleNmbr,
2156                             nmbr_of_daughters = NmbrOfDaughters}
2157                  =Action}]) ->
2158    %% Same optimization as in output_state_actions.
2159    [{Action, ["Cat"], RuleNmbr, NmbrOfDaughters}];
2160find_user_code2([{La, #reduce{rule_nmbr = RuleNmbr,
2161                              nmbr_of_daughters = NmbrOfDaughters}
2162                  =Action} | T]) ->
2163    [{Action,La, RuleNmbr, NmbrOfDaughters} | find_user_code2(T)];
2164find_user_code2([_ | T]) ->
2165    find_user_code2(T).
2166
2167output_actions(St0, StateJumps, StateInfo) ->
2168    %% Not all the clauses of the dispatcher function yeccpars2() can
2169    %% be reached. Only when shifting, that is, calling yeccpars1(),
2170    %% will yeccpars2() be called.
2171    Y2CL = [NewState || {_State,{Actions,J}} <- StateJumps,
2172                        {_LA, #shift{state = NewState}} <-
2173                            (Actions
2174                             ++ [A || {_Tag,_To,Part} <- [J], A <- Part])],
2175    Y2CS = ordsets:from_list([0 | Y2CL]),
2176    Y2S = ordsets:from_list([S || {S,_} <- StateJumps]),
2177    NY2CS = ordsets:subtract(Y2S, Y2CS),
2178    Sel = [{S,true} || S <- ordsets:to_list(Y2CS)] ++
2179          [{S,false} || S <- ordsets:to_list(NY2CS)],
2180
2181    SelS = [{State,Called} ||
2182               {{State,_JActions}, {State,Called}} <-
2183                   lists:zip(StateJumps, lists:keysort(1, Sel))],
2184    St05 = output_nowarn(St0, yeccpars2, '', 7),
2185    St10 = foldl(fun({State, Called}, St_0) ->
2186                         {State, #state_info{state_repr = IState}} =
2187                             lookup_state(StateInfo, State),
2188                         output_state_selection(St_0, State, IState, Called)
2189            end, St05, SelS),
2190    St20 = fwrite(St10, <<"yeccpars2(Other, _, _, _, _, _, _) ->\n">>, []),
2191    St = fwrite(St20,
2192                ?YECC_BUG(<<"{missing_state_in_action_table, Other}">>, []),
2193                []),
2194    foldl(fun({State, JActions}, St_0) ->
2195                  {State, #state_info{state_repr = IState}} =
2196                      lookup_state(StateInfo, State),
2197                  output_state_actions(St_0, State, IState,
2198                                       JActions, StateInfo)
2199          end, St, StateJumps).
2200
2201output_state_selection(St0, State, IState, Called) ->
2202    Comment = [<<"%% ">> || false <- [Called]],
2203    St = fwrite(St0, <<"~syeccpars2(~w=S, Cat, Ss, Stack, T, Ts, Tzr) ->\n">>,
2204                [Comment, State]),
2205    fwrite(St,
2206           <<"~s yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr);\n">>,
2207           [Comment, IState]).
2208
2209output_state_actions(St, State, State, {Actions,jump_none}, SI) ->
2210    St1 = output_state_actions_begin(St, State, Actions),
2211    output_state_actions1(St1, State, Actions, true, normal, SI);
2212output_state_actions(St0, State, State, {Actions, Jump}, SI) ->
2213    {Tag, To, Common} = Jump,
2214    CS = case Tag of
2215             jump_some -> list_to_atom(lists:concat([cont_, To]));
2216             jump_all -> To
2217         end,
2218    St = output_state_actions1(St0, State, Actions, true, {to, CS}, SI),
2219    if
2220        To =:= State ->
2221            St1 = output_state_actions_begin(St, State, Actions),
2222            output_state_actions1(St1, CS, Common, true, normal, SI);
2223        true ->
2224            St
2225    end;
2226output_state_actions(St, State, JState, _XActions, _SI) ->
2227    fwrite(St, <<"%% yeccpars2_~w: see yeccpars2_~w\n\n">>, [State, JState]).
2228
2229output_state_actions_begin(St, State, _Actions) ->
2230    output_nowarn(St, yeccpars2_, State, 7).
2231
2232output_state_actions1(St, State, [], IsFirst, normal, _SI) ->
2233    output_state_actions_fini(State, IsFirst, St);
2234output_state_actions1(St0, State, [], IsFirst, {to, ToS}, _SI) ->
2235    St = delim(St0, IsFirst),
2236    fwrite(St,
2237           <<"yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr) ->\n"
2238            " yeccpars2_~w(S, Cat, Ss, Stack, T, Ts, Tzr).\n\n">>,
2239           [State, ToS]);
2240output_state_actions1(St0, State, [{_, #reduce{}=Action}],
2241                      IsFirst, _End, SI) ->
2242    St = output_reduce(St0, State, "Cat", Action, IsFirst, SI),
2243    fwrite(St, <<".\n\n">>, []);
2244output_state_actions1(St0, State, [{Lookahead,Action} | Tail],
2245                      IsFirst, End, SI) ->
2246    {_, St} =
2247        foldl(fun(Terminal, {IsFst,St_0}) ->
2248                      {false,
2249                       output_action(St_0, State, Terminal, Action, IsFst,SI)}
2250              end, {IsFirst,St0}, Lookahead),
2251    output_state_actions1(St, State, Tail, false, End, SI).
2252
2253output_action(St, State, Terminal, #reduce{}=Action, IsFirst, SI) ->
2254    output_reduce(St, State, Terminal, Action, IsFirst, SI);
2255output_action(St0, State, Terminal, #shift{state = NewState}, IsFirst, _SI) ->
2256    St10 = delim(St0, IsFirst),
2257    St = fwrite(St10, <<"yeccpars2_~w(S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>,
2258                [State, quoted_atom(St10, Terminal)]),
2259    output_call_to_includefile(NewState, St);
2260output_action(St0, State, Terminal, accept, IsFirst, _SI) ->
2261    St10 = delim(St0, IsFirst),
2262    St = fwrite(St10,
2263                <<"yeccpars2_~w(_S, ~ts, _Ss, Stack, _T, _Ts, _Tzr) ->\n">>,
2264                [State, quoted_atom(St10, Terminal)]),
2265    fwrite(St, <<" {ok, hd(Stack)}">>, []);
2266output_action(St, _State, _Terminal, nonassoc, _IsFirst, _SI) ->
2267    St.
2268
2269output_call_to_includefile(NewState, #yecc{includefile_version = {1,1}}=St) ->
2270    %% Backward compatibility.
2271    fwrite(St, <<" yeccpars1(Ts, Tzr, ~w, [S | Ss], [T | Stack])">>,
2272           [NewState]);
2273output_call_to_includefile(NewState, St) ->
2274    fwrite(St, <<" yeccpars1(S, ~w, Ss, Stack, T, Ts, Tzr)">>,
2275           [NewState]).
2276
2277output_state_actions_fini(State, IsFirst, St0) ->
2278    %% Backward compatible.
2279    St10 = delim(St0, IsFirst),
2280    St = fwrite(St10, <<"yeccpars2_~w(_, _, _, _, T, _, _) ->\n">>, [State]),
2281    fwrite(St, <<" yeccerror(T).\n\n">>, []).
2282
2283output_reduce(St0, State, Terminal,
2284              #reduce{rule_nmbr = RuleNmbr,
2285                      head = Head,
2286                      nmbr_of_daughters = NmbrOfDaughters},
2287              IsFirst, StateInfo) ->
2288    St10 = delim(St0, IsFirst),
2289    QuotedTerminal = if
2290                         is_atom(Terminal) -> quoted_atom(St10, Terminal);
2291                         true -> Terminal
2292                     end,
2293    St20 = fwrite(St10,
2294                  <<"yeccpars2_~w(_S, ~ts, Ss, Stack, T, Ts, Tzr) ->\n">>,
2295                  [State, QuotedTerminal]),
2296    St30 =
2297        if
2298            NmbrOfDaughters < 2 ->
2299                Ns = "Ss",
2300                St20;
2301            true ->
2302                Ns = "Nss",
2303                Tmp = lists:join(",",
2304                                  lists:duplicate(NmbrOfDaughters - 1, "_")),
2305                fwrite(St20, <<" [~s|Nss] = Ss,\n">>, [Tmp])
2306        end,
2307    St40 = case tokens(RuleNmbr, St30) of
2308               [{var, _, ?PSEUDO_VAR_1}] when NmbrOfDaughters =:= 1 ->
2309                   NewStack = "Stack",
2310                   St30;
2311               _ ->
2312                   NewStack = "NewStack",
2313                   fwrite(St30, <<" NewStack = ~w(Stack),\n">>,
2314                          [inlined_function_name(St30, State, Terminal)])
2315               end,
2316    if
2317        NmbrOfDaughters =:= 0 ->
2318            NextState = goto(State, Head, St40),
2319            {NextState, I} = lookup_state(StateInfo, NextState),
2320            #state_info{reduce_only = RO, state_repr = Repr, comment = C} = I,
2321            %% Reduce actions do not use the state, so we just pass
2322            %% the old (now bogus) on:
2323            if
2324                RO -> NextS = "_S";
2325                true -> NextS = io_lib:fwrite("~w", [NextState])
2326            end,
2327            St = fwrite(St40, <<"~s">>, [C]),
2328            %% Short-circuit call to yeccpars2:
2329            fwrite(St,
2330                   <<" yeccpars2_~w(~s, ~ts, [~w | Ss], ~s, T, Ts, Tzr)">>,
2331                   [Repr, NextS, QuotedTerminal, State, NewStack]);
2332        true ->
2333            fwrite(St40,
2334                   <<" ~w(hd(~s), ~ts, ~s, ~s, T, Ts, Tzr)">>,
2335                   [function_name(St40, yeccgoto, Head), Ns,
2336                    QuotedTerminal, Ns, NewStack])
2337    end.
2338
2339delim(St, true) ->
2340    St;
2341delim(St, false) ->
2342    fwrite(St, <<";\n">>, []).
2343
2344quoted_atom(#yecc{encoding = latin1}, Atom) when is_atom(Atom) ->
2345    io_lib:write_atom_as_latin1(Atom);
2346quoted_atom(_St, Atomic) ->
2347    io_lib:write(Atomic).
2348
2349output_inlined(St, UserCodeActions, Infile) ->
2350    foldl(fun(#user_code{funname = InlinedFunctionName,
2351                         action = Action}, St_0) ->
2352                  output_inlined(St_0, InlinedFunctionName,
2353                                 Action, Infile)
2354          end, St, UserCodeActions).
2355
2356%% Each action with user code is placed in a separate inlined function.
2357%% The purpose is to be able to pinpoint errors and warnings correctly.
2358output_inlined(St0, FunctionName, Reduce, Infile) ->
2359    #reduce{rule_nmbr = RuleNmbr, nmbr_of_daughters = N_daughters} = Reduce,
2360    #rule{tokens = Tokens, is_well_formed = WF} = get_rule(RuleNmbr, St0),
2361    Line0 = first_line(Tokens),
2362    NLines = last_line(Tokens) - Line0,
2363
2364    St5 = if
2365              WF ->
2366                  St0;
2367              not WF ->
2368                  %% The compiler will generate an error message for
2369                  %% the inlined function (unless the reason that yecc
2370                  %% failed to parse the action was some macro). The
2371                  %% line number of the message will be correct since
2372                  %% we are keeping track of the current line of the
2373                  %% output file...
2374                  #yecc{outfile = Outfile, line = CurLine} = St0,
2375                  output_file_directive(St0, Outfile, CurLine)
2376          end,
2377
2378    CodeStartLine = lists:max([0, Line0 - 4]),
2379    St10 = fwrite(St5, <<"-compile({inline,~w/1}).\n">>, [FunctionName]),
2380    St15 = output_nowarn(St10, FunctionName, '', 1),
2381    St20 = output_file_directive(St15, Infile, CodeStartLine),
2382    St30 = fwrite(St20, <<"~w(__Stack0) ->\n">>, [FunctionName]),
2383    %% Currently the (old) inliner emits less code if matching the
2384    %% stack inside the body rather than in the head...
2385    St40 = case N_daughters of
2386               0 ->
2387                   Stack = "__Stack0",
2388                   St30;
2389               _ ->
2390                   Stack = "__Stack",
2391                   A = concat(flatmap(fun(I) -> [",",?PSEUDO_PREFIX,I] end,
2392                                      lists:seq(N_daughters, 1, -1))),
2393                   fwrite(St30, <<" ~s = __Stack0,\n">>,
2394                          [append(["[", tl(A), " | __Stack]"])])
2395           end,
2396    St = St40#yecc{line = St40#yecc.line + NLines},
2397    fwrite(St, <<" [begin\n~ts\n  end | ~s].\n\n">>,
2398           [pp_tokens(Tokens), Stack]).
2399
2400output_nowarn(St, Function, Suffix, Arity) ->
2401    S = case Suffix of '' -> ""; _ -> integer_to_list(Suffix) end,
2402    St1 = fwrite(St, <<"-dialyzer({nowarn_function, ~w~s/~w}).\n">>,
2403                 [Function, S, Arity]),
2404    fwrite(St1, <<"-compile({nowarn_unused_function,  ~w~s/~w}).\n">>,
2405                 [Function, S, Arity]).
2406
2407inlined_function_name(St, State, Terminal) ->
2408    End = case Terminal of
2409              "Cat" -> [];
2410              _ -> [quoted_atom(St, Terminal)]
2411          end,
2412    list_to_atom(concat([yeccpars2_, State, '_'] ++ End)).
2413
2414-compile({nowarn_unused_function,function_name/3}).
2415function_name(St, Name, Suf) ->
2416    list_to_atom(concat([Name, '_'] ++ [quoted_atom(St, Suf)])).
2417
2418rule(RulePointer, St) ->
2419    #rule{n = N, location = Location, symbols = Symbols} =
2420        maps:get(RulePointer, St#yecc.rule_pointer2rule),
2421    {Symbols, Location, N}.
2422
2423get_rule(RuleNmbr, St) ->
2424    maps:get(RuleNmbr, St#yecc.rule_pointer2rule).
2425
2426tokens(RuleNmbr, St) ->
2427    Rule = maps:get(RuleNmbr, St#yecc.rule_pointer2rule),
2428    Rule#rule.tokens.
2429
2430goto(From, Symbol, St) ->
2431    case ets:lookup(St#yecc.goto_tab, {From, Symbol}) of
2432        [{_, To}] ->
2433            To;
2434        [] ->
2435            erlang:error({error_in_goto_table, From, Symbol})
2436    end.
2437
2438%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2439% Auxiliaries:
2440
2441-ifdef(SYMBOLS_AS_CODES).
2442
2443%%% Bit mask operations.
2444
2445-compile({inline,[set_empty/0]}).
2446set_empty() ->
2447    0.
2448
2449set_add(I, BM) ->
2450    (1 bsl I) bor BM.
2451
2452-compile({inline,[set_member/2]}).
2453set_member(I, BM) ->
2454    ((1 bsl I) band BM) =/= 0.
2455
2456%% Assumes I is a member...
2457-compile({inline,[set_delete/2]}).
2458set_delete(I, BM) ->
2459    (1 bsl I) bxor BM.
2460
2461-compile({inline,[set_union/2]}).
2462set_union(BM1, BM2) ->
2463    BM1 bor BM2.
2464
2465-compile({inline,[set_is_subset/2]}).
2466set_is_subset(BM1, BM2) ->
2467    (BM1 band BM2) =:= BM1.
2468
2469empty_member(BM) ->
2470    set_member(0, BM).
2471
2472empty_delete(BM) ->
2473    set_delete(0, BM).
2474
2475code_symbols(Ss, SymbolTable) ->
2476    map(fun(S) -> ets:lookup_element(SymbolTable, S, 2) end, Ss).
2477
2478decode_symbol(C, InvSymbolTable) ->
2479    ets:lookup_element(InvSymbolTable, C, 1).
2480
2481code_terminal(T, SymbolTab) ->
2482    set_add(ets:lookup_element(SymbolTab, T, 2), 0).
2483
2484decode_terminals(BM, InvSymbolTab) ->
2485    case get(BM) of
2486        undefined ->
2487            Symbols = decode_terminals(BM, 0, InvSymbolTab),
2488            put(BM, Symbols),
2489            Symbols;
2490        Symbols ->
2491            Symbols
2492    end.
2493
2494decode_terminals(0, _I, _InvSymbolTab) ->
2495    [];
2496decode_terminals(BM, I, InvSymbolTab) ->
2497    case set_member(I, BM) of
2498        true ->
2499            [ets:lookup_element(InvSymbolTab, I, 1)
2500             | decode_terminals(set_delete(I, BM), I+1, InvSymbolTab)];
2501        false ->
2502            decode_terminals(BM, I+1, InvSymbolTab)
2503    end.
2504
2505set_add_terminal({_Symbol, TerminalNum}, BM) ->
2506    set_add(TerminalNum, BM).
2507
2508-compile({inline,[is_terminal/2]}).
2509is_terminal(_Tab, SymbolCode) ->
2510    SymbolCode >= 0.
2511
2512left_corner_symbol_table(St) ->
2513    St#yecc.inv_symbol_tab.
2514
2515-else.
2516
2517set_empty() ->
2518    [].
2519
2520set_add(Symbol, L) ->
2521    ordsets:union([Symbol], L).
2522
2523set_union(Es1, Es2) ->
2524    ordsets:union(Es1, Es2).
2525
2526set_is_subset(Es1, Es2) ->
2527    ordsets:is_subset(Es1, Es2).
2528
2529code_symbols(Ss, _SymbolTab) ->
2530    Ss.
2531
2532decode_symbol(S, _InvSymbolTab) ->
2533    S.
2534
2535code_terminal(T, _SymbolTab) ->
2536    [T].
2537
2538decode_terminals(Ts, _InvSymbolTab) ->
2539    Ts.
2540
2541empty_member(['$empty' | _]) ->
2542    true;
2543empty_member(_) ->
2544    false.
2545
2546empty_delete(['$empty' | Terminals]) ->
2547    Terminals.
2548
2549set_add_terminal({Symbol, _TerminalNum}, L) ->
2550    set_add(Symbol, L).
2551
2552is_terminal(Tab, SymbolName) ->
2553   ets:lookup_element(Tab, SymbolName, 2) >= 0.
2554
2555left_corner_symbol_table(St) ->
2556    St#yecc.symbol_tab.
2557
2558-endif. % SYMBOLS_AS_CODES
2559
2560intersect(L1, L2) ->
2561    ordsets:to_list(ordsets:intersection(ordsets:from_list(L1),
2562                                         ordsets:from_list(L2))).
2563
2564format_symbols([Sym | Syms]) ->
2565    concat([format_symbol(Sym) | format_symbols1(Syms)]).
2566
2567format_symbols1([]) ->
2568    [];
2569format_symbols1([H | T]) ->
2570    [" ", format_symbol(H) | format_symbols1(T)].
2571
2572include(St, File, Outport) ->
2573    case file:open(File, [read]) of
2574        {error, Reason} ->
2575            throw(add_error(File, none, {file_error, Reason}, St));
2576        {ok, Inport} ->
2577            _ = epp:set_encoding(Inport),
2578            Line = io:get_line(Inport, ''),
2579            try include1(Line, Inport, Outport, File, 1, St) - 1
2580            after ok = file:close(Inport)
2581            end
2582    end.
2583
2584include1(eof, _, _, _File, L, _St) ->
2585    L;
2586include1({error, _}=_Error, _Inport, _Outport, File, L, St) ->
2587    throw(add_error(File, erl_anno:new(L), cannot_parse, St));
2588include1(Line, Inport, Outport, File, L, St) ->
2589    Incr = case member($\n, Line) of
2590               true -> 1;
2591               false -> 0
2592           end,
2593    io:put_chars(Outport, Line),
2594    include1(io:get_line(Inport, ''), Inport, Outport, File, L + Incr, St).
2595
2596includefile_version([]) ->
2597    {1,4};
2598includefile_version(Includefile) ->
2599    case epp:open(Includefile, []) of
2600        {ok, Epp} ->
2601            try
2602                parse_file(Epp)
2603            after
2604                epp:close(Epp)
2605            end;
2606        {error, _Error} ->
2607            {1,1}
2608    end.
2609
2610parse_file(Epp) ->
2611    case epp:parse_erl_form(Epp) of
2612        {ok, {function,_Anno,yeccpars1,7,_Clauses}} ->
2613            {1,4};
2614        {eof,_Location} ->
2615            {1,1};
2616        _Form ->
2617            parse_file(Epp)
2618    end.
2619
2620%% Keeps the line breaks and the column numbers of the original code.
2621pp_tokens(Tokens) ->
2622    C = first_column(Tokens),
2623    Indent = lists:duplicate(C - 1, $\s),
2624    Text = lists:append([erl_anno:text(anno(T)) || T <- Tokens]),
2625    [Indent, Text].
2626
2627set_encoding(#yecc{encoding = none}, Port) ->
2628    ok = io:setopts(Port, [{encoding, epp:default_encoding()}]);
2629set_encoding(#yecc{encoding = E}, Port) ->
2630    ok = io:setopts(Port, [{encoding, E}]).
2631
2632output_encoding_comment(#yecc{encoding = none}=St) ->
2633    St;
2634output_encoding_comment(#yecc{encoding = Encoding}=St) ->
2635    fwrite(St, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]).
2636
2637output_file_directive(St, Filename, Line) when St#yecc.file_attrs ->
2638    fwrite(St, <<"-file(~ts, ~w).\n">>,
2639           [format_filename(Filename, St), Line]);
2640output_file_directive(St, _Filename, _Line) ->
2641    St.
2642
2643first_column(Tokens) ->
2644    case erl_anno:column(anno(hd(Tokens))) of
2645        undefined ->
2646            1;
2647        Column ->
2648            Column
2649    end.
2650
2651first_line(Tokens) ->
2652    line(anno(hd(Tokens))).
2653
2654last_line(Tokens) ->
2655    line(anno(lists:last(Tokens))).
2656
2657line(Anno) ->
2658    erl_anno:line(Anno).
2659
2660location(none) -> none;
2661location(Anno) ->
2662    erl_anno:location(Anno).
2663
2664anno(Token) ->
2665    element(2, Token).
2666
2667%% Keep track of the current line in the generated file.
2668fwrite(#yecc{outport = Outport, line = Line}=St, Format, Args) ->
2669    NLines = count_nl(Format),
2670    io:fwrite(Outport, Format, Args),
2671    St#yecc{line = Line + NLines}.
2672
2673%% Assumes \n is used, and never ~n.
2674count_nl(<<$\n,Rest/binary>>) ->
2675    1 + count_nl(Rest);
2676count_nl(<<_,Rest/binary>>) ->
2677    count_nl(Rest);
2678count_nl(<<>>) ->
2679    0.
2680
2681nl(#yecc{outport = Outport, line = Line}=St) ->
2682    io:nl(Outport),
2683    St#yecc{line = Line + 1}.
2684
2685format_filename(Filename0, St) ->
2686    Filename = filename:flatten(Filename0),
2687    case lists:keyfind(encoding, 1, io:getopts(St#yecc.outport)) of
2688        {encoding, unicode} -> io_lib:write_string(Filename);
2689        _ ->                   io_lib:write_string_as_latin1(Filename)
2690    end.
2691
2692format_assoc(left) ->
2693    "Left";
2694format_assoc(right) ->
2695    "Right";
2696format_assoc(unary) ->
2697    "Unary";
2698format_assoc(nonassoc) ->
2699    "Nonassoc".
2700
2701format_symbol(Symbol) ->
2702    String = concat([Symbol]),
2703    case erl_scan:string(String) of
2704        {ok, [{atom, _, _}], _} ->
2705            io_lib:fwrite(<<"~tw">>, [Symbol]);
2706        {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' ->
2707            case erl_scan:reserved_word(Word) of
2708                true ->
2709                    String;
2710                false ->
2711                    io_lib:fwrite(<<"~tw">>, [Symbol])
2712            end;
2713        {ok, [{var, _, _}], _} ->
2714            String;
2715        _ ->
2716            io_lib:fwrite(<<"~tw">>, [Symbol])
2717    end.
2718
2719line_of_location(Line) when is_integer(Line) ->
2720    Line;
2721line_of_location({Line, Column}) when is_integer(Line), is_integer(Column) ->
2722    Line.
2723
2724inverse(L) ->
2725    sort([{A,B} || {B,A} <- L]).
2726
2727family(L) ->
2728    sofs:to_external(sofs:relation_to_family(sofs:relation(L))).
2729
2730seq1(To) when To < 1 ->
2731    [];
2732seq1(To) ->
2733    lists:seq(1, To).
2734
2735count(From, L) ->
2736    lists:zip(L, lists:seq(From, length(L)-1+From)).
2737
2738family_with_domain(L, DL) ->
2739    sofs:to_external(sofs_family_with_domain(sofs:relation(L), sofs:set(DL))).
2740
2741sofs_family_with_domain(R0, D) ->
2742    R = sofs:restriction(R0, D),
2743    F = sofs:relation_to_family(R),
2744    FD = sofs:constant_function(D, sofs:from_term([])),
2745    sofs:family_union(F, FD).
2746