1%% =====================================================================
2%% Licensed under the Apache License, Version 2.0 (the "License"); you may
3%% not use this file except in compliance with the License. You may obtain
4%% a copy of the License at <http://www.apache.org/licenses/LICENSE-2.0>
5%%
6%% Unless required by applicable law or agreed to in writing, software
7%% distributed under the License is distributed on an "AS IS" BASIS,
8%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9%% See the License for the specific language governing permissions and
10%% limitations under the License.
11%%
12%% Alternatively, you may use this file under the terms of the GNU Lesser
13%% General Public License (the "LGPL") as published by the Free Software
14%% Foundation; either version 2.1, or (at your option) any later version.
15%% If you wish to allow use of your version of this file only under the
16%% terms of the LGPL, you should delete the provisions above and replace
17%% them with the notice and other provisions required by the LGPL; see
18%% <http://www.gnu.org/licenses/>. If you do not delete the provisions
19%% above, a recipient may use your version of this file under the terms of
20%% either the Apache License or the LGPL.
21%%
22%% @copyright 2001-2006 Richard Carlsson
23%% @author Richard Carlsson <carlsson.richard@gmail.com>
24%% @end
25%% =====================================================================
26
27%% @doc `epp_dodger' - bypasses the Erlang preprocessor.
28%%
29%% <p>This module tokenises and parses most Erlang source code without
30%% expanding preprocessor directives and macro applications, as long as
31%% these are syntactically "well-behaved". Because the normal parse
32%% trees of the `erl_parse' module cannot represent these things
33%% (normally, they are expanded by the Erlang preprocessor {@link
34%% //stdlib/epp} before the parser sees them), an extended syntax tree
35%% is created, using the {@link erl_syntax} module.</p>
36
37
38%% NOTES:
39%%
40%% * It's OK if the result does not parse - then at least nothing
41%% strange happens, and the user can resort to full preprocessing.
42%% However, we must avoid generating a token stream that is accepted by
43%% the parser, but has a different meaning than the intended. A typical
44%% example is when someone uses token-level string concatenation with
45%% macros, as in `"foo" ?bar' (where `?bar' expands to a string). If we
46%% replace the tokens `? bar' with `( ... )', to preserve precedence,
47%% the result will be parsed as an application `"foo" ( ... )' and cause
48%% trouble later on. We must detect such cases and report an error.
49%%
50%% * It is pointless to add a mechanism for tracking which macros are
51%% known to take arguments, and which are known to take no arguments,
52%% since a lot of the time we will not have seen the macro definition
53%% anyway (it's usually in a header file). Hence, we try to use
54%% heuristics instead. In most cases, the token sequence `? foo ('
55%% indicates that it is a call of a macro that is supposed to take
56%% arguments, but e.g., in the context `: ? foo (', the argument list
57%% typically belongs to a remote function call, as in `m:?f(...)' and
58%% should be parsed as `m:(?f)(...)' unless it is actually a try-clause
59%% pattern such as `throw:?f(...) ->'.
60%%
61%% * We do our best to make macros without arguments pass the parsing
62%% stage transparently. Atoms are accepted in most contexts, but
63%% variables are not, so we use only atoms to encode these macros.
64%% Sadly, the parsing sometimes discards even the location info from
65%% atom tokens, so we can only use the actual characters for this.
66%%
67%% * We recognize `?m(...' at the start of a form and prevent this from
68%% being interpreted as a macro with arguments, since it is probably a
69%% function definition. Likewise with attributes `-?m(...'.
70
71-module(epp_dodger).
72
73-export([parse_file/1, quick_parse_file/1, parse_file/2,
74	 quick_parse_file/2, parse/1, quick_parse/1, parse/2,
75	 quick_parse/2, parse/3, quick_parse/3, parse_form/2,
76	 parse_form/3, quick_parse_form/2, quick_parse_form/3,
77	 format_error/1, tokens_to_string/1]).
78
79
80%% The following should be: 1) pseudo-uniquely identifiable, and 2)
81%% cause nice looking error messages when the parser has to give up.
82
83-define(macro_call, '? <macro> (').
84-define(atom_prefix, "? ").
85-define(var_prefix, "?,").
86-define(pp_form, '?preprocessor declaration?').
87
88
89%% @type errorinfo() = //stdlib/erl_scan:error_info().
90%%
91%% This is a so-called Erlang I/O ErrorInfo structure; see the {@link
92%% //stdlib/io} module for details.
93
94-type errorinfo() :: erl_scan:error_info().
95
96-type option() :: atom() | {atom(), term()}.
97
98%% =====================================================================
99%% @spec parse_file(File) -> {ok, Forms} | {error, errorinfo()}
100%%       File = file:filename()
101%%       Forms = [erl_syntax:syntaxTree()]
102%%
103%% @equiv parse_file(File, [])
104
105-spec parse_file(file:filename()) ->
106        {'ok', erl_syntax:forms()} | {'error', errorinfo()}.
107
108parse_file(File) ->
109    parse_file(File, []).
110
111%% @spec parse_file(File, Options) -> {ok, Forms} | {error, errorinfo()}
112%%       File = file:filename()
113%%       Options = [term()]
114%%       Forms = [erl_syntax:syntaxTree()]
115%%
116%% @doc Reads and parses a file. If successful, `{ok, Forms}'
117%% is returned, where `Forms' is a list of abstract syntax
118%% trees representing the "program forms" of the file (cf.
119%% `erl_syntax:is_form/1'). Otherwise, `{error, errorinfo()}' is
120%% returned, typically if the file could not be opened. Note that
121%% parse errors show up as error markers in the returned list of
122%% forms; they do not cause this function to fail or return
123%% `{error, errorinfo()}'.
124%%
125%% Options:
126%% <dl>
127%%   <dt>{@type {no_fail, boolean()@}}</dt>
128%%   <dd>If `true', this makes `epp_dodger' replace any program forms
129%%   that could not be parsed with nodes of type `text' (see {@link
130%%   erl_syntax:text/1}), representing the raw token sequence of the
131%%   form, instead of reporting a parse error. The default value is
132%%   `false'.</dd>
133%%   <dt>{@type {clever, boolean()@}}</dt>
134%%   <dd>If set to `true', this makes `epp_dodger' try to repair the
135%%   source code as it seems fit, in certain cases where parsing would
136%%   otherwise fail. Currently, it inserts `++'-operators between string
137%%   literals and macros where it looks like concatenation was intended.
138%%   The default value is `false'.</dd>
139%% </dl>
140%%
141%% @see parse/2
142%% @see quick_parse_file/1
143%% @see erl_syntax:is_form/1
144
145-spec parse_file(file:filename(), [option()]) ->
146        {'ok', erl_syntax:forms()} | {'error', errorinfo()}.
147
148parse_file(File, Options) ->
149    parse_file(File, fun parse/3, Options).
150
151%% @spec quick_parse_file(File) -> {ok, Forms} | {error, errorinfo()}
152%%       File = file:filename()
153%%       Forms = [erl_syntax:syntaxTree()]
154%%
155%% @equiv quick_parse_file(File, [])
156
157-spec quick_parse_file(file:filename()) ->
158        {'ok', erl_syntax:forms()} | {'error', errorinfo()}.
159
160quick_parse_file(File) ->
161    quick_parse_file(File, []).
162
163%% @spec quick_parse_file(File, Options) ->
164%%           {ok, Forms} | {error, errorinfo()}
165%%       File = file:filename()
166%%       Options = [term()]
167%%       Forms = [erl_syntax:syntaxTree()]
168%%
169%% @doc Similar to {@link parse_file/2}, but does a more quick-and-dirty
170%% processing of the code. Macro definitions and other preprocessor
171%% directives are discarded, and all macro calls are replaced with
172%% atoms. This is useful when only the main structure of the code is of
173%% interest, and not the details. Furthermore, the quick-parse method
174%% can usually handle more strange cases than the normal, more exact
175%% parsing.
176%%
177%% Options: see {@link parse_file/2}. Note however that for
178%% `quick_parse_file/2', the option `no_fail' is `true' by default.
179%%
180%% @see quick_parse/2
181%% @see parse_file/2
182
183-spec quick_parse_file(file:filename(), [option()]) ->
184        {'ok', erl_syntax:forms()} | {'error', errorinfo()}.
185
186quick_parse_file(File, Options) ->
187    parse_file(File, fun quick_parse/3, Options ++ [no_fail]).
188
189parse_file(File, Parser, Options) ->
190    case do_parse_file(utf8, File, Parser, Options) of
191        {ok, Forms}=Ret ->
192            case find_invalid_unicode(Forms) of
193                none ->
194                    Ret;
195                invalid_unicode ->
196                    case epp:read_encoding(File) of
197                        utf8 ->
198                            Ret;
199                        _ ->
200                            do_parse_file(latin1, File, Parser, Options)
201                    end
202            end;
203        Else ->
204            Else
205    end.
206
207do_parse_file(DefEncoding, File, Parser, Options) ->
208    case file:open(File, [read]) of
209        {ok, Dev} ->
210            _ = epp:set_encoding(Dev, DefEncoding),
211            try Parser(Dev, 1, Options)
212            after ok = file:close(Dev)
213	    end;
214        {error, Error} ->
215            {error, {0, file, Error}}  % defer to file:format_error/1
216    end.
217
218find_invalid_unicode([H|T]) ->
219    case H of
220	{error, {_Location, file_io_server, invalid_unicode}} ->
221	    invalid_unicode;
222	_Other ->
223	    find_invalid_unicode(T)
224    end;
225find_invalid_unicode([]) -> none.
226
227%% =====================================================================
228%% @spec parse(IODevice) -> {ok, Forms} | {error, errorinfo()}
229%% @equiv parse(IODevice, 1)
230
231-spec parse(file:io_device()) -> {'ok', erl_syntax:forms()}.
232
233parse(Dev) ->
234    parse(Dev, 1).
235
236%% @spec parse(IODevice, StartLocation) -> {ok, Forms} | {error, errorinfo()}
237%%       IODevice = pid()
238%%       StartLocation = //stdlib/erl_anno:location()
239%%       Forms = [erl_syntax:syntaxTree()]
240%%
241%% @equiv parse(IODevice, StartLocation, [])
242%% @see parse/1
243
244-spec parse(file:io_device(), erl_anno:location()) -> {'ok', erl_syntax:forms()}.
245
246parse(Dev, L) ->
247    parse(Dev, L, []).
248
249%% @spec parse(IODevice, StartLocation, Options) ->
250%%           {ok, Forms} | {error, errorinfo()}
251%%       IODevice = pid()
252%%       StartLocation = //stdlib/erl_anno:location()
253%%       Options = [term()]
254%%       Forms = [erl_syntax:syntaxTree()]
255%%
256%% @doc Reads and parses program text from an I/O stream. Characters are
257%% read from `IODevice' until end-of-file; apart from this, the
258%% behaviour is the same as for {@link parse_file/2}. `StartLocation' is the
259%% initial location.
260%%
261%% @see parse/2
262%% @see parse_file/2
263%% @see parse_form/2
264%% @see quick_parse/3
265
266-spec parse(file:io_device(), erl_anno:location(), [option()]) ->
267        {'ok', erl_syntax:forms()}.
268
269parse(Dev, L0, Options) ->
270    parse(Dev, L0, fun parse_form/3, Options).
271
272%% @spec quick_parse(IODevice) -> {ok, Forms} | {error, errorinfo()}
273%% @equiv quick_parse(IODevice, 1)
274
275-spec quick_parse(file:io_device()) ->
276        {'ok', erl_syntax:forms()}.
277
278quick_parse(Dev) ->
279    quick_parse(Dev, 1).
280
281%% @spec quick_parse(IODevice, StartLocation) ->
282%%           {ok, Forms} | {error, errorinfo()}
283%%       IODevice = pid()
284%%       StartLocation = //stdlib/erl_anno:location()
285%%       Forms = [erl_syntax:syntaxTree()]
286%%
287%% @equiv quick_parse(IODevice, StartLocation, [])
288%% @see quick_parse/1
289
290-spec quick_parse(file:io_device(), erl_anno:location()) ->
291        {'ok', erl_syntax:forms()}.
292
293quick_parse(Dev, L) ->
294    quick_parse(Dev, L, []).
295
296%% @spec (IODevice, StartLocation, Options) ->
297%%           {ok, Forms} | {error, errorinfo()}
298%%       IODevice = pid()
299%%       StartLocation = //stdlib/erl_anno:location()
300%%       Options = [term()]
301%%       Forms = [erl_syntax:syntaxTree()]
302%%
303%% @doc Similar to {@link parse/3}, but does a more quick-and-dirty
304%% processing of the code. See {@link quick_parse_file/2} for details.
305%%
306%% @see quick_parse/2
307%% @see quick_parse_file/2
308%% @see quick_parse_form/2
309%% @see parse/3
310
311-spec quick_parse(file:io_device(), erl_anno:location(), [option()]) ->
312        {'ok', erl_syntax:forms()}.
313
314quick_parse(Dev, L0, Options) ->
315    parse(Dev, L0, fun quick_parse_form/3, Options).
316
317parse(Dev, L0, Parser, Options) ->
318    parse(Dev, L0, [], Parser, Options).
319
320parse(Dev, L0, Fs, Parser, Options) ->
321    case Parser(Dev, L0, Options) of
322        {ok, none, L1} ->
323            parse(Dev, L1, Fs, Parser, Options);
324        {ok, F, L1} ->
325            parse(Dev, L1, [F | Fs], Parser, Options);
326        {error, IoErr, L1} ->
327            parse(Dev, L1, [{error, IoErr} | Fs], Parser, Options);
328        {eof, _L1} ->
329            {ok, lists:reverse(Fs)}
330    end.
331
332
333%% =====================================================================
334%% @spec parse_form(IODevice, StartLocation) -> {ok, Form, Location}
335%%                                            | {eof, Location}
336%%                                            | {error, errorinfo(), Location}
337%%       IODevice = pid()
338%%       StartLocation = //stdlib/erl_anno:location()
339%%       Form = erl_syntax:syntaxTree()
340%%       Location = //stdlib/erl_anno:location()
341%%
342%% @equiv parse_form(IODevice, StartLocation, [])
343%%
344%% @see quick_parse_form/2
345
346-spec parse_form(file:io_device(), erl_anno:location()) ->
347        {'ok', erl_syntax:forms(), erl_anno:location()}
348      | {'eof', erl_anno:location()} | {'error', errorinfo(), erl_anno:location()}.
349
350parse_form(Dev, L0) ->
351    parse_form(Dev, L0, []).
352
353%% @spec parse_form(IODevice, StartLocation, Options) ->
354%%           {ok, Form, Location}
355%%         | {eof, Location}
356%%         | {error, errorinfo(), Location}
357%%
358%%       IODevice = pid()
359%%       StartLocation = //stdlib/erl_anno:location()
360%%       Options = [term()]
361%%       Form = erl_syntax:syntaxTree()
362%%       Location = //stdlib/erl_anno:location()
363%%
364%% @doc Reads and parses a single program form from an I/O stream.
365%% Characters are read from `IODevice' until an end-of-form
366%% marker is found (a period character followed by whitespace), or until
367%% end-of-file; apart from this, the behaviour is similar to that of
368%% `parse/3', except that the return values also contain the
369%% final location given that `StartLocation' is the initial
370%% location, and that `{eof, Location}' may be returned.
371%%
372%% @see parse/3
373%% @see parse_form/2
374%% @see quick_parse_form/3
375
376-spec parse_form(file:io_device(), erl_anno:location(), [option()]) ->
377        {'ok', erl_syntax:forms(), erl_anno:location()}
378      | {'eof', erl_anno:location()} | {'error', errorinfo(), erl_anno:location()}.
379
380parse_form(Dev, L0, Options) ->
381    parse_form(Dev, L0, fun normal_parser/2, Options).
382
383%% @spec quick_parse_form(IODevice, StartLocation) ->
384%%           {ok, Form, Location}
385%%         | {eof, Location}
386%%         | {error, errorinfo(), Location}
387%%       IODevice = pid()
388%%       StartLocation = //stdlib/erl_anno:location()
389%%       Form = erl_syntax:syntaxTree() | none
390%%       Location = //stdlib/erl_anno:location()
391%%
392%% @equiv quick_parse_form(IODevice, StartLocation, [])
393%%
394%% @see parse_form/2
395
396-spec quick_parse_form(file:io_device(), erl_anno:location()) ->
397        {'ok', erl_syntax:forms(), erl_anno:location()}
398      | {'eof', erl_anno:location()} | {'error', errorinfo(), erl_anno:location()}.
399
400quick_parse_form(Dev, L0) ->
401    quick_parse_form(Dev, L0, []).
402
403%% @spec quick_parse_form(IODevice, StartLocation, Options) ->
404%%           {ok, Form, Location}
405%%         | {eof, Location}
406%%         | {error, errorinfo(), Location}
407%%
408%%       IODevice = pid()
409%%       StartLocation = //stdlib/erl_anno:location()
410%%       Options = [term()]
411%%       Form = erl_syntax:syntaxTree()
412%%       Location = //stdlib/erl_anno:location()
413%%
414%% @doc Similar to {@link parse_form/3}, but does a more quick-and-dirty
415%% processing of the code. See {@link quick_parse_file/2} for details.
416%%
417%% @see parse/3
418%% @see quick_parse_form/2
419%% @see parse_form/3
420
421-spec quick_parse_form(file:io_device(), erl_anno:location(), [option()]) ->
422        {'ok', erl_syntax:forms(), erl_anno:location()}
423      | {'eof', erl_anno:location()} | {'error', errorinfo(), erl_anno:location()}.
424
425quick_parse_form(Dev, L0, Options) ->
426    parse_form(Dev, L0, fun quick_parser/2, Options).
427
428-record(opt, {clever = false :: boolean()}).
429
430parse_form(Dev, L0, Parser, Options) ->
431    NoFail = proplists:get_bool(no_fail, Options),
432    Opt = #opt{clever = proplists:get_bool(clever, Options)},
433    case io:scan_erl_form(Dev, "", L0) of
434        {ok, Ts, L1} ->
435            case catch {ok, Parser(Ts, Opt)} of
436                {'EXIT', Term} ->
437                    {error, io_error(L1, {unknown, Term}), L1};
438                {error, Term} ->
439		    IoErr = io_error(L1, Term),
440		    {error, IoErr, L1};
441                {parse_error, _IoErr} when NoFail ->
442		    {ok, erl_syntax:set_pos(
443			   erl_syntax:text(tokens_to_string(Ts)),
444			   erl_anno:new(start_pos(Ts, L1))),
445		     L1};
446                {parse_error, IoErr} ->
447		    {error, IoErr, L1};
448                {ok, F} ->
449                    {ok, F, L1}
450            end;
451        {error, _IoErr, _L1} = Err -> Err;
452        {error, _Reason} -> {eof, L0}; % This is probably encoding problem
453        {eof, _L1} = Eof -> Eof
454    end.
455
456io_error(L, Desc) ->
457    {L, ?MODULE, Desc}.
458
459start_pos([T | _Ts], _L) ->
460    erl_anno:location(element(2, T));
461start_pos([], L) ->
462    L.
463
464%% Exception-throwing wrapper for the standard Erlang parser stage
465
466parse_tokens(Ts) ->
467    parse_tokens(Ts, fun fix_form/1).
468
469parse_tokens(Ts, Fix) ->
470    case erl_parse:parse_form(Ts) of
471        {ok, Form} ->
472            Form;
473        {error, IoErr} ->
474	    case Fix(Ts) of
475		{form, Form} ->
476		    Form;
477		{retry, Ts1, Fix1} ->
478		    parse_tokens(Ts1, Fix1);
479		error ->
480		    throw({parse_error, IoErr})
481	    end
482    end.
483
484%% ---------------------------------------------------------------------
485%% Quick scanning/parsing - deletes macro definitions and other
486%% preprocessor directives, and replaces all macro calls with atoms.
487
488quick_parser(Ts, _Opt) ->
489    filter_form(parse_tokens(quickscan_form(Ts))).
490
491quickscan_form([{'-', _Anno}, {atom, AnnoA, define} | _Ts]) ->
492    kill_form(AnnoA);
493quickscan_form([{'-', _Anno}, {atom, AnnoA, undef} | _Ts]) ->
494    kill_form(AnnoA);
495quickscan_form([{'-', _Anno}, {atom, AnnoA, include} | _Ts]) ->
496    kill_form(AnnoA);
497quickscan_form([{'-', _Anno}, {atom, AnnoA, include_lib} | _Ts]) ->
498    kill_form(AnnoA);
499quickscan_form([{'-', _Anno}, {atom, AnnoA, ifdef} | _Ts]) ->
500    kill_form(AnnoA);
501quickscan_form([{'-', _Anno}, {atom, AnnoA, ifndef} | _Ts]) ->
502    kill_form(AnnoA);
503quickscan_form([{'-', _Anno}, {'if', AnnoA} | _Ts]) ->
504    kill_form(AnnoA);
505quickscan_form([{'-', _Anno}, {atom, AnnoA, elif} | _Ts]) ->
506    kill_form(AnnoA);
507quickscan_form([{'-', _Anno}, {atom, AnnoA, else} | _Ts]) ->
508    kill_form(AnnoA);
509quickscan_form([{'-', _Anno}, {atom, AnnoA, endif} | _Ts]) ->
510    kill_form(AnnoA);
511quickscan_form([{'-', Anno}, {'?', _}, {Type, _, _}=N | [{'(', _} | _]=Ts])
512  when Type =:= atom; Type =:= var ->
513    %% minus, macro and open parenthesis at start of form - assume that
514    %% the macro takes no arguments; e.g. `-?foo(...).'
515    quickscan_macros_1(N, Ts, [{'-', Anno}]);
516quickscan_form([{'?', _Anno}, {Type, _, _}=N | [{'(', _} | _]=Ts])
517  when Type =:= atom; Type =:= var ->
518    %% macro and open parenthesis at start of form - assume that the
519    %% macro takes no arguments (see scan_macros for details)
520    quickscan_macros_1(N, Ts, []);
521quickscan_form(Ts) ->
522    quickscan_macros(Ts).
523
524kill_form(A) ->
525    [{atom, A, ?pp_form}, {'(', A}, {')', A}, {'->', A}, {atom, A, kill},
526     {dot, A}].
527
528quickscan_macros(Ts) ->
529    quickscan_macros(Ts, []).
530
531quickscan_macros([{'?',_}, {Type, _, A} | Ts], [{string, AnnoS, S} | As])
532  when Type =:= atom; Type =:= var ->
533    %% macro after a string literal: change to a single string
534    {_, Ts1} = skip_macro_args(Ts),
535    S1 = S ++ quick_macro_string(A),
536    quickscan_macros(Ts1, [{string, AnnoS, S1} | As]);
537quickscan_macros([{'?',_}, {Type, _, _}=N | [{'(',_}|_]=Ts],
538		 [{':',_}|_]=As)
539  when Type =:= atom; Type =:= var ->
540    %% macro and open parenthesis after colon - check the token
541    %% following the arguments (see scan_macros for details)
542    Ts1 = case skip_macro_args(Ts) of
543	      {_, [{'->',_} | _] = Ts2} -> Ts2;
544	      {_, [{'when',_} | _] = Ts2} -> Ts2;
545	      {_, [{':',_} | _] = Ts2} -> Ts2;
546	      _ -> Ts    %% assume macro without arguments
547	  end,
548    quickscan_macros_1(N, Ts1, As);
549quickscan_macros([{'?',_}, {Type, _, _}=N | Ts], As)
550  when Type =:= atom; Type =:= var ->
551    %% macro with or without arguments
552    {_, Ts1} = skip_macro_args(Ts),
553    quickscan_macros_1(N, Ts1, As);
554quickscan_macros([T | Ts], As) ->
555    quickscan_macros(Ts, [T | As]);
556quickscan_macros([], As) ->
557    lists:reverse(As).
558
559%% (after a macro has been found and the arglist skipped, if any)
560quickscan_macros_1({_Type, _, A}, [{string, AnnoS, S} | Ts], As) ->
561    %% string literal following macro: change to single string
562    S1 = quick_macro_string(A) ++ S,
563    quickscan_macros(Ts, [{string, AnnoS, S1} | As]);
564quickscan_macros_1({_Type, AnnoA, A}, Ts, As) ->
565    %% normal case - just replace the macro with an atom
566    quickscan_macros(Ts, [{atom, AnnoA, quick_macro_atom(A)} | As]).
567
568quick_macro_atom(A) ->
569    list_to_atom("?" ++ atom_to_list(A)).
570
571quick_macro_string(A) ->
572    "(?" ++ atom_to_list(A) ++ ")".
573
574%% Skipping to the end of a macro call, tracking open/close constructs.
575%% @spec (Tokens) -> {Skipped, Rest}
576
577skip_macro_args([{'(',_}=T | Ts]) ->
578    skip_macro_args(Ts, [')'], [T]);
579skip_macro_args(Ts) ->
580    {[], Ts}.
581
582skip_macro_args([{'(',_}=T | Ts], Es, As) ->
583    skip_macro_args(Ts, [')' | Es], [T | As]);
584skip_macro_args([{'{',_}=T | Ts], Es, As) ->
585    skip_macro_args(Ts, ['}' | Es], [T | As]);
586skip_macro_args([{'[',_}=T | Ts], Es, As) ->
587    skip_macro_args(Ts, [']' | Es], [T | As]);
588skip_macro_args([{'<<',_}=T | Ts], Es, As) ->
589    skip_macro_args(Ts, ['>>' | Es], [T | As]);
590skip_macro_args([{'begin',_}=T | Ts], Es, As) ->
591    skip_macro_args(Ts, ['end' | Es], [T | As]);
592skip_macro_args([{'if',_}=T | Ts], Es, As) ->
593    skip_macro_args(Ts, ['end' | Es], [T | As]);
594skip_macro_args([{'case',_}=T | Ts], Es, As) ->
595    skip_macro_args(Ts, ['end' | Es], [T | As]);
596skip_macro_args([{'receive',_}=T | Ts], Es, As) ->
597    skip_macro_args(Ts, ['end' | Es], [T | As]);
598skip_macro_args([{'try',_}=T | Ts], Es, As) ->
599    skip_macro_args(Ts, ['end' | Es], [T | As]);
600skip_macro_args([{E,_}=T | Ts], [E], As) ->		%final close
601    {lists:reverse([T | As]), Ts};
602skip_macro_args([{E,_}=T | Ts], [E | Es], As) ->	%matching close
603    skip_macro_args(Ts, Es, [T | As]);
604skip_macro_args([T | Ts], Es, As) ->
605    skip_macro_args(Ts, Es, [T | As]);
606skip_macro_args([], _Es, _As) ->
607    throw({error, macro_args}).
608
609filter_form({function, _, ?pp_form, _,
610	     [{clause, _, [], [], [{atom, _, kill}]}]}) ->
611    none;
612filter_form(T) ->
613    T.
614
615
616%% ---------------------------------------------------------------------
617%% Normal parsing - try to preserve all information
618
619normal_parser(Ts0, Opt) ->
620    case scan_form(Ts0, Opt) of
621	Ts when is_list(Ts) ->
622	    rewrite_form(parse_tokens(Ts));
623	Node ->
624	    Node
625    end.
626
627scan_form([{'-', _Anno}, {atom, AnnoA, define} | Ts], Opt) ->
628    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
629     {atom, AnnoA, define} | scan_macros(Ts, Opt)];
630scan_form([{'-', _Anno}, {atom, AnnoA, undef} | Ts], Opt) ->
631    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
632     {atom, AnnoA, undef} | scan_macros(Ts, Opt)];
633scan_form([{'-', _Anno}, {atom, AnnoA, include} | Ts], Opt) ->
634    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
635     {atom, AnnoA, include} | scan_macros(Ts, Opt)];
636scan_form([{'-', _Anno}, {atom, AnnoA, include_lib} | Ts], Opt) ->
637    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
638     {atom, AnnoA, include_lib} | scan_macros(Ts, Opt)];
639scan_form([{'-', _Anno}, {atom, AnnoA, ifdef} | Ts], Opt) ->
640    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
641     {atom, AnnoA, ifdef} | scan_macros(Ts, Opt)];
642scan_form([{'-', _Anno}, {atom, AnnoA, ifndef} | Ts], Opt) ->
643    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
644     {atom, AnnoA, ifndef} | scan_macros(Ts, Opt)];
645scan_form([{'-', _Anno}, {'if', AnnoA} | Ts], Opt) ->
646    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
647     {atom, AnnoA, 'if'} | scan_macros(Ts, Opt)];
648scan_form([{'-', _Anno}, {atom, AnnoA, elif} | Ts], Opt) ->
649    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
650     {atom, AnnoA, 'elif'} | scan_macros(Ts, Opt)];
651scan_form([{'-', _Anno}, {atom, AnnoA, else} | Ts], Opt) ->
652    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
653     {atom, AnnoA, else} | scan_macros(Ts, Opt)];
654scan_form([{'-', _Anno}, {atom, AnnoA, endif} | Ts], Opt) ->
655    [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA},
656     {atom, AnnoA, endif} | scan_macros(Ts, Opt)];
657scan_form([{'-', _Anno}, {atom, AnnoA, error} | Ts], _Opt) ->
658    Desc = build_info_string("-error", Ts),
659    ErrorInfo = {erl_anno:location(AnnoA), ?MODULE, {error, Desc}},
660    erl_syntax:error_marker(ErrorInfo);
661scan_form([{'-', _Anno}, {atom, AnnoA, warning} | Ts], _Opt) ->
662    Desc = build_info_string("-warning", Ts),
663    ErrorInfo = {erl_anno:location(AnnoA), ?MODULE, {warning, Desc}},
664    erl_syntax:error_marker(ErrorInfo);
665scan_form([{'-', A}, {'?', A1}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt)
666  when Type =:= atom; Type =:= var ->
667    %% minus, macro and open parenthesis at start of form - assume that
668    %% the macro takes no arguments; e.g. `-?foo(...).'
669    macro(A1, N, Ts, [{'-', A}], Opt);
670scan_form([{'?', A}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt)
671  when Type =:= atom; Type =:= var ->
672    %% macro and open parenthesis at start of form - assume that the
673    %% macro takes no arguments; probably a function declaration on the
674    %% form `?m(...) -> ...', which will not parse if it is rewritten as
675    %% `(?m(...)) -> ...', so it must be handled as `(?m)(...) -> ...'
676    macro(A, N, Ts, [], Opt);
677scan_form(Ts, Opt) ->
678    scan_macros(Ts, Opt).
679
680build_info_string(Prefix, Ts0) ->
681    Ts = lists:droplast(Ts0),
682    String = lists:droplast(tokens_to_string(Ts)),
683    Prefix ++ " " ++ String ++ ".".
684
685scan_macros(Ts, Opt) ->
686    scan_macros(Ts, [], Opt).
687
688scan_macros([{'?', _}=M, {Type, _, _}=N | Ts], [{string, AnnoS, _}=S | As],
689 	    #opt{clever = true}=Opt)
690  when Type =:= atom; Type =:= var ->
691    %% macro after a string literal: be clever and insert ++
692    scan_macros([M, N | Ts], [{'++', AnnoS}, S | As], Opt);
693scan_macros([{'?', Anno}, {Type, _, _}=N | [{'(',_}|_]=Ts],
694	    [{':',_}|_]=As, Opt)
695  when Type =:= atom; Type =:= var ->
696    %% macro and open parentheses after colon - probably a call
697    %% `m:?F(...)' so the argument list might belong to the call, not
698    %% the macro - but it could also be a try-clause pattern
699    %% `...:?T(...) ->' - we need to check the token following the
700    %% arguments to decide
701    {Args, Rest} = skip_macro_args(Ts),
702    case Rest of
703	[{'->',_} | _] ->
704	    macro_call(Args, Anno, N, Rest, As, Opt);
705	[{'when',_} | _] ->
706	    macro_call(Args, Anno, N, Rest, As, Opt);
707	[{':',_} | _] ->
708	    macro_call(Args, Anno, N, Rest, As, Opt);
709	_ ->
710	    macro(Anno, N, Ts, As, Opt)
711    end;
712scan_macros([{'?', Anno}, {Type, _, _}=N | [{'(',_}|_]=Ts], As, Opt)
713  when Type =:= atom; Type =:= var ->
714    %% macro with arguments
715    {Args, Rest} = skip_macro_args(Ts),
716    macro_call(Args, Anno, N, Rest, As, Opt);
717scan_macros([{'?', Anno }, {Type, _, _}=N | Ts], As, Opt)
718  when Type =:= atom; Type =:= var ->
719    %% macro without arguments
720    macro(Anno, N, Ts, As, Opt);
721scan_macros([T | Ts], As, Opt) ->
722    scan_macros(Ts, [T | As], Opt);
723scan_macros([], As, _Opt) ->
724    lists:reverse(As).
725
726%% Rewriting to a tuple which will be recognized by the post-parse pass
727%% (we insert parentheses to preserve the precedences when parsing).
728
729macro(Anno, {Type, _, A}, Rest, As, Opt) ->
730    scan_macros_1([], Rest, [{atom,Anno,macro_atom(Type,A)} | As], Opt).
731
732macro_call([{'(',_}, {')',_}], Anno, {_, AnnoN, _}=N, Rest, As, Opt) ->
733    {Open, Close} = parentheses(As),
734    scan_macros_1([], Rest,
735                  %% {'?macro_call', N }
736                  lists:reverse(Open ++ [{'{', Anno},
737                                         {atom, Anno, ?macro_call},
738                                         {',', Anno},
739                                         N,
740                                         {'}', AnnoN}] ++ Close,
741                                As), Opt);
742macro_call([{'(',_} | Args], Anno, {_, AnnoN, _}=N, Rest, As, Opt) ->
743    {Open, Close} = parentheses(As),
744    %% drop closing parenthesis
745    {')', _} = lists:last(Args), %% assert
746    Args1 = lists:droplast(Args),
747    %% note that we must scan the argument list; it may not be skipped
748    scan_macros_1(Args1 ++ [{'}', AnnoN} | Close],
749                  Rest,
750                  %% {'?macro_call', N, Arg1, ... }
751                  lists:reverse(Open ++ [{'{', Anno},
752                                         {atom, Anno, ?macro_call},
753                                         {',', Anno},
754                                         N,
755                                         {',', AnnoN}],
756                                As), Opt).
757
758macro_atom(atom, A) ->
759    list_to_atom(?atom_prefix ++ atom_to_list(A));
760macro_atom(var, A) ->
761    list_to_atom(?var_prefix ++ atom_to_list(A)).
762
763%% don't insert parentheses after a string token, to avoid turning
764%% `"string" ?macro' into a "function application" `"string"(...)'
765%% (see note at top of file)
766parentheses([{string, _, _} | _]) ->
767    {[], []};
768parentheses(_) ->
769    {[{'(',0}], [{')',0}]}.
770
771%% (after a macro has been found and the arglist skipped, if any)
772scan_macros_1(Args, [{string, AnnoS, _} | _]=Rest, As,
773	      #opt{clever = true}=Opt) ->
774    %% string literal following macro: be clever and insert ++
775    scan_macros(Args ++ [{'++', AnnoS} | Rest],  As, Opt);
776scan_macros_1(Args, Rest, As, Opt) ->
777    %% normal case - continue scanning
778    scan_macros(Args ++ Rest, As, Opt).
779
780rewrite_form({function, Anno, ?pp_form, _,
781              [{clause, _, [], [], [{call, _, A, As}]}]}) ->
782    erl_syntax:set_pos(erl_syntax:attribute(A, rewrite_list(As)), Anno);
783rewrite_form({function, Anno, ?pp_form, _, [{clause, _, [], [], [A]}]}) ->
784    erl_syntax:set_pos(erl_syntax:attribute(A), Anno);
785rewrite_form(T) ->
786    rewrite(T).
787
788rewrite_list([T | Ts]) ->
789    [rewrite(T) | rewrite_list(Ts)];
790rewrite_list([]) ->
791    [].
792
793%% Note: as soon as we start using erl_syntax:subtrees/1 and similar
794%% functions, we cannot assume that we know the exact representation of
795%% the syntax tree anymore - we must use erl_syntax functions to analyze
796%% and decompose the data.
797
798rewrite(Node) ->
799    case erl_syntax:type(Node) of
800	atom ->
801	    case atom_to_list(erl_syntax:atom_value(Node)) of
802		?atom_prefix ++ As ->
803		    A1 = list_to_atom(As),
804		    N = erl_syntax:copy_pos(Node, erl_syntax:atom(A1)),
805		    erl_syntax:copy_pos(Node, erl_syntax:macro(N));
806		?var_prefix ++ As ->
807		    A1 = list_to_atom(As),
808		    N = erl_syntax:copy_pos(Node, erl_syntax:variable(A1)),
809		    erl_syntax:copy_pos(Node, erl_syntax:macro(N));
810		_ ->
811		    Node
812	    end;
813        tuple ->
814            case erl_syntax:tuple_elements(Node) of
815                [MagicWord, A | As] ->
816                    case erl_syntax:type(MagicWord) of
817                        atom ->
818                            case erl_syntax:atom_value(MagicWord) of
819                                ?macro_call ->
820                                    M = erl_syntax:macro(A, rewrite_list(As)),
821                                    erl_syntax:copy_pos(Node, M);
822                                _ ->
823                                    rewrite_1(Node)
824                            end;
825                        _ ->
826                            rewrite_1(Node)
827                    end;
828                _ ->
829                    rewrite_1(Node)
830            end;
831	_ ->
832	    rewrite_1(Node)
833    end.
834
835rewrite_1(Node) ->
836    case erl_syntax:subtrees(Node) of
837	[] ->
838	    Node;
839	Gs ->
840	    Node1 = erl_syntax:make_tree(erl_syntax:type(Node),
841					 [[rewrite(T) || T <- Ts]
842					  || Ts <- Gs]),
843	    erl_syntax:copy_pos(Node, Node1)
844    end.
845
846%% attempting a rescue operation on a token sequence for a single form
847%% if it could not be parsed after the normal treatment
848
849fix_form([{atom, _, ?pp_form}, {'(', _}, {')', _}, {'->', _},
850	  {atom, _, define}, {'(', _} | _]=Ts) ->
851    case lists:reverse(Ts) of
852	[{dot, _}, {')', _} | _] ->
853	    {retry, Ts, fun fix_define/1};
854	[{dot, Anno} | Ts1] ->
855	    Ts2 = lists:reverse([{dot, Anno}, {')', Anno} | Ts1]),
856	    {retry, Ts2, fun fix_define/1};
857	_ ->
858	    error
859    end;
860fix_form(_Ts) ->
861    error.
862
863fix_define([{atom, Anno, ?pp_form}, {'(', _}, {')', _}, {'->', _},
864	    {atom, AnnoA, define}, {'(', _}, N, {',', _} | Ts]) ->
865    [{dot, _}, {')', _} | Ts1] = lists:reverse(Ts),
866    S = tokens_to_string(lists:reverse(Ts1)),
867    A = erl_syntax:set_pos(erl_syntax:atom(define), AnnoA),
868    Txt = erl_syntax:set_pos(erl_syntax:text(S), AnnoA),
869    {form, erl_syntax:set_pos(erl_syntax:attribute(A, [N, Txt]), Anno)};
870fix_define(_Ts) ->
871    error.
872
873%% @spec tokens_to_string(Tokens::[term()]) -> string()
874%%
875%% @doc Generates a string corresponding to the given token sequence.
876%% The string can be re-tokenized to yield the same token list again.
877
878-spec tokens_to_string([term()]) -> string().
879
880tokens_to_string([{atom,_,A} | Ts]) ->
881    io_lib:write_atom(A) ++ " " ++ tokens_to_string(Ts);
882tokens_to_string([{string, _, S} | Ts]) ->
883    io_lib:write_string(S) ++ " " ++ tokens_to_string(Ts);
884tokens_to_string([{char, _, C} | Ts]) ->
885    io_lib:write_char(C) ++ " " ++ tokens_to_string(Ts);
886tokens_to_string([{float, _, F} | Ts]) ->
887    float_to_list(F) ++ " " ++ tokens_to_string(Ts);
888tokens_to_string([{integer, _, N} | Ts]) ->
889    integer_to_list(N) ++ " " ++ tokens_to_string(Ts);
890tokens_to_string([{var, _, A} | Ts]) ->
891    atom_to_list(A) ++ " " ++ tokens_to_string(Ts);
892tokens_to_string([{dot, _} | Ts]) ->
893    ".\n" ++ tokens_to_string(Ts);
894tokens_to_string([{A, _} | Ts]) ->
895    atom_to_list(A) ++ " " ++ tokens_to_string(Ts);
896tokens_to_string([]) ->
897    "".
898
899
900%% @spec format_error(Descriptor::term()) -> string()
901%% @hidden
902%% @doc Callback function for formatting error descriptors. Not for
903%% normal use.
904
905-spec format_error(term()) -> string().
906
907format_error(macro_args) ->
908    errormsg("macro call missing end parenthesis");
909format_error({error, Error}) ->
910    Error;
911format_error({warning, Error}) ->
912    Error;
913format_error({unknown, Reason}) ->
914    errormsg(io_lib:format("unknown error: ~tP", [Reason, 15])).
915
916errormsg(String) ->
917    io_lib:format("~s: ~ts", [?MODULE, String]).
918
919
920%% =====================================================================
921