1%% ===================================================================== 2%% Licensed under the Apache License, Version 2.0 (the "License"); you may 3%% not use this file except in compliance with the License. You may obtain 4%% a copy of the License at <http://www.apache.org/licenses/LICENSE-2.0> 5%% 6%% Unless required by applicable law or agreed to in writing, software 7%% distributed under the License is distributed on an "AS IS" BASIS, 8%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9%% See the License for the specific language governing permissions and 10%% limitations under the License. 11%% 12%% Alternatively, you may use this file under the terms of the GNU Lesser 13%% General Public License (the "LGPL") as published by the Free Software 14%% Foundation; either version 2.1, or (at your option) any later version. 15%% If you wish to allow use of your version of this file only under the 16%% terms of the LGPL, you should delete the provisions above and replace 17%% them with the notice and other provisions required by the LGPL; see 18%% <http://www.gnu.org/licenses/>. If you do not delete the provisions 19%% above, a recipient may use your version of this file under the terms of 20%% either the Apache License or the LGPL. 21%% 22%% @copyright 2001-2006 Richard Carlsson 23%% @author Richard Carlsson <carlsson.richard@gmail.com> 24%% @end 25%% ===================================================================== 26 27%% @doc `epp_dodger' - bypasses the Erlang preprocessor. 28%% 29%% <p>This module tokenises and parses most Erlang source code without 30%% expanding preprocessor directives and macro applications, as long as 31%% these are syntactically "well-behaved". Because the normal parse 32%% trees of the `erl_parse' module cannot represent these things 33%% (normally, they are expanded by the Erlang preprocessor {@link 34%% //stdlib/epp} before the parser sees them), an extended syntax tree 35%% is created, using the {@link erl_syntax} module.</p> 36 37 38%% NOTES: 39%% 40%% * It's OK if the result does not parse - then at least nothing 41%% strange happens, and the user can resort to full preprocessing. 42%% However, we must avoid generating a token stream that is accepted by 43%% the parser, but has a different meaning than the intended. A typical 44%% example is when someone uses token-level string concatenation with 45%% macros, as in `"foo" ?bar' (where `?bar' expands to a string). If we 46%% replace the tokens `? bar' with `( ... )', to preserve precedence, 47%% the result will be parsed as an application `"foo" ( ... )' and cause 48%% trouble later on. We must detect such cases and report an error. 49%% 50%% * It is pointless to add a mechanism for tracking which macros are 51%% known to take arguments, and which are known to take no arguments, 52%% since a lot of the time we will not have seen the macro definition 53%% anyway (it's usually in a header file). Hence, we try to use 54%% heuristics instead. In most cases, the token sequence `? foo (' 55%% indicates that it is a call of a macro that is supposed to take 56%% arguments, but e.g., in the context `: ? foo (', the argument list 57%% typically belongs to a remote function call, as in `m:?f(...)' and 58%% should be parsed as `m:(?f)(...)' unless it is actually a try-clause 59%% pattern such as `throw:?f(...) ->'. 60%% 61%% * We do our best to make macros without arguments pass the parsing 62%% stage transparently. Atoms are accepted in most contexts, but 63%% variables are not, so we use only atoms to encode these macros. 64%% Sadly, the parsing sometimes discards even the location info from 65%% atom tokens, so we can only use the actual characters for this. 66%% 67%% * We recognize `?m(...' at the start of a form and prevent this from 68%% being interpreted as a macro with arguments, since it is probably a 69%% function definition. Likewise with attributes `-?m(...'. 70 71-module(epp_dodger). 72 73-export([parse_file/1, quick_parse_file/1, parse_file/2, 74 quick_parse_file/2, parse/1, quick_parse/1, parse/2, 75 quick_parse/2, parse/3, quick_parse/3, parse_form/2, 76 parse_form/3, quick_parse_form/2, quick_parse_form/3, 77 format_error/1, tokens_to_string/1]). 78 79 80%% The following should be: 1) pseudo-uniquely identifiable, and 2) 81%% cause nice looking error messages when the parser has to give up. 82 83-define(macro_call, '? <macro> ('). 84-define(atom_prefix, "? "). 85-define(var_prefix, "?,"). 86-define(pp_form, '?preprocessor declaration?'). 87 88 89%% @type errorinfo() = //stdlib/erl_scan:error_info(). 90%% 91%% This is a so-called Erlang I/O ErrorInfo structure; see the {@link 92%% //stdlib/io} module for details. 93 94-type errorinfo() :: erl_scan:error_info(). 95 96-type option() :: atom() | {atom(), term()}. 97 98%% ===================================================================== 99%% @spec parse_file(File) -> {ok, Forms} | {error, errorinfo()} 100%% File = file:filename() 101%% Forms = [erl_syntax:syntaxTree()] 102%% 103%% @equiv parse_file(File, []) 104 105-spec parse_file(file:filename()) -> 106 {'ok', erl_syntax:forms()} | {'error', errorinfo()}. 107 108parse_file(File) -> 109 parse_file(File, []). 110 111%% @spec parse_file(File, Options) -> {ok, Forms} | {error, errorinfo()} 112%% File = file:filename() 113%% Options = [term()] 114%% Forms = [erl_syntax:syntaxTree()] 115%% 116%% @doc Reads and parses a file. If successful, `{ok, Forms}' 117%% is returned, where `Forms' is a list of abstract syntax 118%% trees representing the "program forms" of the file (cf. 119%% `erl_syntax:is_form/1'). Otherwise, `{error, errorinfo()}' is 120%% returned, typically if the file could not be opened. Note that 121%% parse errors show up as error markers in the returned list of 122%% forms; they do not cause this function to fail or return 123%% `{error, errorinfo()}'. 124%% 125%% Options: 126%% <dl> 127%% <dt>{@type {no_fail, boolean()@}}</dt> 128%% <dd>If `true', this makes `epp_dodger' replace any program forms 129%% that could not be parsed with nodes of type `text' (see {@link 130%% erl_syntax:text/1}), representing the raw token sequence of the 131%% form, instead of reporting a parse error. The default value is 132%% `false'.</dd> 133%% <dt>{@type {clever, boolean()@}}</dt> 134%% <dd>If set to `true', this makes `epp_dodger' try to repair the 135%% source code as it seems fit, in certain cases where parsing would 136%% otherwise fail. Currently, it inserts `++'-operators between string 137%% literals and macros where it looks like concatenation was intended. 138%% The default value is `false'.</dd> 139%% </dl> 140%% 141%% @see parse/2 142%% @see quick_parse_file/1 143%% @see erl_syntax:is_form/1 144 145-spec parse_file(file:filename(), [option()]) -> 146 {'ok', erl_syntax:forms()} | {'error', errorinfo()}. 147 148parse_file(File, Options) -> 149 parse_file(File, fun parse/3, Options). 150 151%% @spec quick_parse_file(File) -> {ok, Forms} | {error, errorinfo()} 152%% File = file:filename() 153%% Forms = [erl_syntax:syntaxTree()] 154%% 155%% @equiv quick_parse_file(File, []) 156 157-spec quick_parse_file(file:filename()) -> 158 {'ok', erl_syntax:forms()} | {'error', errorinfo()}. 159 160quick_parse_file(File) -> 161 quick_parse_file(File, []). 162 163%% @spec quick_parse_file(File, Options) -> 164%% {ok, Forms} | {error, errorinfo()} 165%% File = file:filename() 166%% Options = [term()] 167%% Forms = [erl_syntax:syntaxTree()] 168%% 169%% @doc Similar to {@link parse_file/2}, but does a more quick-and-dirty 170%% processing of the code. Macro definitions and other preprocessor 171%% directives are discarded, and all macro calls are replaced with 172%% atoms. This is useful when only the main structure of the code is of 173%% interest, and not the details. Furthermore, the quick-parse method 174%% can usually handle more strange cases than the normal, more exact 175%% parsing. 176%% 177%% Options: see {@link parse_file/2}. Note however that for 178%% `quick_parse_file/2', the option `no_fail' is `true' by default. 179%% 180%% @see quick_parse/2 181%% @see parse_file/2 182 183-spec quick_parse_file(file:filename(), [option()]) -> 184 {'ok', erl_syntax:forms()} | {'error', errorinfo()}. 185 186quick_parse_file(File, Options) -> 187 parse_file(File, fun quick_parse/3, Options ++ [no_fail]). 188 189parse_file(File, Parser, Options) -> 190 case do_parse_file(utf8, File, Parser, Options) of 191 {ok, Forms}=Ret -> 192 case find_invalid_unicode(Forms) of 193 none -> 194 Ret; 195 invalid_unicode -> 196 case epp:read_encoding(File) of 197 utf8 -> 198 Ret; 199 _ -> 200 do_parse_file(latin1, File, Parser, Options) 201 end 202 end; 203 Else -> 204 Else 205 end. 206 207do_parse_file(DefEncoding, File, Parser, Options) -> 208 case file:open(File, [read]) of 209 {ok, Dev} -> 210 _ = epp:set_encoding(Dev, DefEncoding), 211 try Parser(Dev, 1, Options) 212 after ok = file:close(Dev) 213 end; 214 {error, Error} -> 215 {error, {0, file, Error}} % defer to file:format_error/1 216 end. 217 218find_invalid_unicode([H|T]) -> 219 case H of 220 {error, {_Location, file_io_server, invalid_unicode}} -> 221 invalid_unicode; 222 _Other -> 223 find_invalid_unicode(T) 224 end; 225find_invalid_unicode([]) -> none. 226 227%% ===================================================================== 228%% @spec parse(IODevice) -> {ok, Forms} | {error, errorinfo()} 229%% @equiv parse(IODevice, 1) 230 231-spec parse(file:io_device()) -> {'ok', erl_syntax:forms()}. 232 233parse(Dev) -> 234 parse(Dev, 1). 235 236%% @spec parse(IODevice, StartLocation) -> {ok, Forms} | {error, errorinfo()} 237%% IODevice = pid() 238%% StartLocation = //stdlib/erl_anno:location() 239%% Forms = [erl_syntax:syntaxTree()] 240%% 241%% @equiv parse(IODevice, StartLocation, []) 242%% @see parse/1 243 244-spec parse(file:io_device(), erl_anno:location()) -> {'ok', erl_syntax:forms()}. 245 246parse(Dev, L) -> 247 parse(Dev, L, []). 248 249%% @spec parse(IODevice, StartLocation, Options) -> 250%% {ok, Forms} | {error, errorinfo()} 251%% IODevice = pid() 252%% StartLocation = //stdlib/erl_anno:location() 253%% Options = [term()] 254%% Forms = [erl_syntax:syntaxTree()] 255%% 256%% @doc Reads and parses program text from an I/O stream. Characters are 257%% read from `IODevice' until end-of-file; apart from this, the 258%% behaviour is the same as for {@link parse_file/2}. `StartLocation' is the 259%% initial location. 260%% 261%% @see parse/2 262%% @see parse_file/2 263%% @see parse_form/2 264%% @see quick_parse/3 265 266-spec parse(file:io_device(), erl_anno:location(), [option()]) -> 267 {'ok', erl_syntax:forms()}. 268 269parse(Dev, L0, Options) -> 270 parse(Dev, L0, fun parse_form/3, Options). 271 272%% @spec quick_parse(IODevice) -> {ok, Forms} | {error, errorinfo()} 273%% @equiv quick_parse(IODevice, 1) 274 275-spec quick_parse(file:io_device()) -> 276 {'ok', erl_syntax:forms()}. 277 278quick_parse(Dev) -> 279 quick_parse(Dev, 1). 280 281%% @spec quick_parse(IODevice, StartLocation) -> 282%% {ok, Forms} | {error, errorinfo()} 283%% IODevice = pid() 284%% StartLocation = //stdlib/erl_anno:location() 285%% Forms = [erl_syntax:syntaxTree()] 286%% 287%% @equiv quick_parse(IODevice, StartLocation, []) 288%% @see quick_parse/1 289 290-spec quick_parse(file:io_device(), erl_anno:location()) -> 291 {'ok', erl_syntax:forms()}. 292 293quick_parse(Dev, L) -> 294 quick_parse(Dev, L, []). 295 296%% @spec (IODevice, StartLocation, Options) -> 297%% {ok, Forms} | {error, errorinfo()} 298%% IODevice = pid() 299%% StartLocation = //stdlib/erl_anno:location() 300%% Options = [term()] 301%% Forms = [erl_syntax:syntaxTree()] 302%% 303%% @doc Similar to {@link parse/3}, but does a more quick-and-dirty 304%% processing of the code. See {@link quick_parse_file/2} for details. 305%% 306%% @see quick_parse/2 307%% @see quick_parse_file/2 308%% @see quick_parse_form/2 309%% @see parse/3 310 311-spec quick_parse(file:io_device(), erl_anno:location(), [option()]) -> 312 {'ok', erl_syntax:forms()}. 313 314quick_parse(Dev, L0, Options) -> 315 parse(Dev, L0, fun quick_parse_form/3, Options). 316 317parse(Dev, L0, Parser, Options) -> 318 parse(Dev, L0, [], Parser, Options). 319 320parse(Dev, L0, Fs, Parser, Options) -> 321 case Parser(Dev, L0, Options) of 322 {ok, none, L1} -> 323 parse(Dev, L1, Fs, Parser, Options); 324 {ok, F, L1} -> 325 parse(Dev, L1, [F | Fs], Parser, Options); 326 {error, IoErr, L1} -> 327 parse(Dev, L1, [{error, IoErr} | Fs], Parser, Options); 328 {eof, _L1} -> 329 {ok, lists:reverse(Fs)} 330 end. 331 332 333%% ===================================================================== 334%% @spec parse_form(IODevice, StartLocation) -> {ok, Form, Location} 335%% | {eof, Location} 336%% | {error, errorinfo(), Location} 337%% IODevice = pid() 338%% StartLocation = //stdlib/erl_anno:location() 339%% Form = erl_syntax:syntaxTree() 340%% Location = //stdlib/erl_anno:location() 341%% 342%% @equiv parse_form(IODevice, StartLocation, []) 343%% 344%% @see quick_parse_form/2 345 346-spec parse_form(file:io_device(), erl_anno:location()) -> 347 {'ok', erl_syntax:forms(), erl_anno:location()} 348 | {'eof', erl_anno:location()} | {'error', errorinfo(), erl_anno:location()}. 349 350parse_form(Dev, L0) -> 351 parse_form(Dev, L0, []). 352 353%% @spec parse_form(IODevice, StartLocation, Options) -> 354%% {ok, Form, Location} 355%% | {eof, Location} 356%% | {error, errorinfo(), Location} 357%% 358%% IODevice = pid() 359%% StartLocation = //stdlib/erl_anno:location() 360%% Options = [term()] 361%% Form = erl_syntax:syntaxTree() 362%% Location = //stdlib/erl_anno:location() 363%% 364%% @doc Reads and parses a single program form from an I/O stream. 365%% Characters are read from `IODevice' until an end-of-form 366%% marker is found (a period character followed by whitespace), or until 367%% end-of-file; apart from this, the behaviour is similar to that of 368%% `parse/3', except that the return values also contain the 369%% final location given that `StartLocation' is the initial 370%% location, and that `{eof, Location}' may be returned. 371%% 372%% @see parse/3 373%% @see parse_form/2 374%% @see quick_parse_form/3 375 376-spec parse_form(file:io_device(), erl_anno:location(), [option()]) -> 377 {'ok', erl_syntax:forms(), erl_anno:location()} 378 | {'eof', erl_anno:location()} | {'error', errorinfo(), erl_anno:location()}. 379 380parse_form(Dev, L0, Options) -> 381 parse_form(Dev, L0, fun normal_parser/2, Options). 382 383%% @spec quick_parse_form(IODevice, StartLocation) -> 384%% {ok, Form, Location} 385%% | {eof, Location} 386%% | {error, errorinfo(), Location} 387%% IODevice = pid() 388%% StartLocation = //stdlib/erl_anno:location() 389%% Form = erl_syntax:syntaxTree() | none 390%% Location = //stdlib/erl_anno:location() 391%% 392%% @equiv quick_parse_form(IODevice, StartLocation, []) 393%% 394%% @see parse_form/2 395 396-spec quick_parse_form(file:io_device(), erl_anno:location()) -> 397 {'ok', erl_syntax:forms(), erl_anno:location()} 398 | {'eof', erl_anno:location()} | {'error', errorinfo(), erl_anno:location()}. 399 400quick_parse_form(Dev, L0) -> 401 quick_parse_form(Dev, L0, []). 402 403%% @spec quick_parse_form(IODevice, StartLocation, Options) -> 404%% {ok, Form, Location} 405%% | {eof, Location} 406%% | {error, errorinfo(), Location} 407%% 408%% IODevice = pid() 409%% StartLocation = //stdlib/erl_anno:location() 410%% Options = [term()] 411%% Form = erl_syntax:syntaxTree() 412%% Location = //stdlib/erl_anno:location() 413%% 414%% @doc Similar to {@link parse_form/3}, but does a more quick-and-dirty 415%% processing of the code. See {@link quick_parse_file/2} for details. 416%% 417%% @see parse/3 418%% @see quick_parse_form/2 419%% @see parse_form/3 420 421-spec quick_parse_form(file:io_device(), erl_anno:location(), [option()]) -> 422 {'ok', erl_syntax:forms(), erl_anno:location()} 423 | {'eof', erl_anno:location()} | {'error', errorinfo(), erl_anno:location()}. 424 425quick_parse_form(Dev, L0, Options) -> 426 parse_form(Dev, L0, fun quick_parser/2, Options). 427 428-record(opt, {clever = false :: boolean()}). 429 430parse_form(Dev, L0, Parser, Options) -> 431 NoFail = proplists:get_bool(no_fail, Options), 432 Opt = #opt{clever = proplists:get_bool(clever, Options)}, 433 case io:scan_erl_form(Dev, "", L0) of 434 {ok, Ts, L1} -> 435 case catch {ok, Parser(Ts, Opt)} of 436 {'EXIT', Term} -> 437 {error, io_error(L1, {unknown, Term}), L1}; 438 {error, Term} -> 439 IoErr = io_error(L1, Term), 440 {error, IoErr, L1}; 441 {parse_error, _IoErr} when NoFail -> 442 {ok, erl_syntax:set_pos( 443 erl_syntax:text(tokens_to_string(Ts)), 444 erl_anno:new(start_pos(Ts, L1))), 445 L1}; 446 {parse_error, IoErr} -> 447 {error, IoErr, L1}; 448 {ok, F} -> 449 {ok, F, L1} 450 end; 451 {error, _IoErr, _L1} = Err -> Err; 452 {error, _Reason} -> {eof, L0}; % This is probably encoding problem 453 {eof, _L1} = Eof -> Eof 454 end. 455 456io_error(L, Desc) -> 457 {L, ?MODULE, Desc}. 458 459start_pos([T | _Ts], _L) -> 460 erl_anno:location(element(2, T)); 461start_pos([], L) -> 462 L. 463 464%% Exception-throwing wrapper for the standard Erlang parser stage 465 466parse_tokens(Ts) -> 467 parse_tokens(Ts, fun fix_form/1). 468 469parse_tokens(Ts, Fix) -> 470 case erl_parse:parse_form(Ts) of 471 {ok, Form} -> 472 Form; 473 {error, IoErr} -> 474 case Fix(Ts) of 475 {form, Form} -> 476 Form; 477 {retry, Ts1, Fix1} -> 478 parse_tokens(Ts1, Fix1); 479 error -> 480 throw({parse_error, IoErr}) 481 end 482 end. 483 484%% --------------------------------------------------------------------- 485%% Quick scanning/parsing - deletes macro definitions and other 486%% preprocessor directives, and replaces all macro calls with atoms. 487 488quick_parser(Ts, _Opt) -> 489 filter_form(parse_tokens(quickscan_form(Ts))). 490 491quickscan_form([{'-', _Anno}, {atom, AnnoA, define} | _Ts]) -> 492 kill_form(AnnoA); 493quickscan_form([{'-', _Anno}, {atom, AnnoA, undef} | _Ts]) -> 494 kill_form(AnnoA); 495quickscan_form([{'-', _Anno}, {atom, AnnoA, include} | _Ts]) -> 496 kill_form(AnnoA); 497quickscan_form([{'-', _Anno}, {atom, AnnoA, include_lib} | _Ts]) -> 498 kill_form(AnnoA); 499quickscan_form([{'-', _Anno}, {atom, AnnoA, ifdef} | _Ts]) -> 500 kill_form(AnnoA); 501quickscan_form([{'-', _Anno}, {atom, AnnoA, ifndef} | _Ts]) -> 502 kill_form(AnnoA); 503quickscan_form([{'-', _Anno}, {'if', AnnoA} | _Ts]) -> 504 kill_form(AnnoA); 505quickscan_form([{'-', _Anno}, {atom, AnnoA, elif} | _Ts]) -> 506 kill_form(AnnoA); 507quickscan_form([{'-', _Anno}, {atom, AnnoA, else} | _Ts]) -> 508 kill_form(AnnoA); 509quickscan_form([{'-', _Anno}, {atom, AnnoA, endif} | _Ts]) -> 510 kill_form(AnnoA); 511quickscan_form([{'-', Anno}, {'?', _}, {Type, _, _}=N | [{'(', _} | _]=Ts]) 512 when Type =:= atom; Type =:= var -> 513 %% minus, macro and open parenthesis at start of form - assume that 514 %% the macro takes no arguments; e.g. `-?foo(...).' 515 quickscan_macros_1(N, Ts, [{'-', Anno}]); 516quickscan_form([{'?', _Anno}, {Type, _, _}=N | [{'(', _} | _]=Ts]) 517 when Type =:= atom; Type =:= var -> 518 %% macro and open parenthesis at start of form - assume that the 519 %% macro takes no arguments (see scan_macros for details) 520 quickscan_macros_1(N, Ts, []); 521quickscan_form(Ts) -> 522 quickscan_macros(Ts). 523 524kill_form(A) -> 525 [{atom, A, ?pp_form}, {'(', A}, {')', A}, {'->', A}, {atom, A, kill}, 526 {dot, A}]. 527 528quickscan_macros(Ts) -> 529 quickscan_macros(Ts, []). 530 531quickscan_macros([{'?',_}, {Type, _, A} | Ts], [{string, AnnoS, S} | As]) 532 when Type =:= atom; Type =:= var -> 533 %% macro after a string literal: change to a single string 534 {_, Ts1} = skip_macro_args(Ts), 535 S1 = S ++ quick_macro_string(A), 536 quickscan_macros(Ts1, [{string, AnnoS, S1} | As]); 537quickscan_macros([{'?',_}, {Type, _, _}=N | [{'(',_}|_]=Ts], 538 [{':',_}|_]=As) 539 when Type =:= atom; Type =:= var -> 540 %% macro and open parenthesis after colon - check the token 541 %% following the arguments (see scan_macros for details) 542 Ts1 = case skip_macro_args(Ts) of 543 {_, [{'->',_} | _] = Ts2} -> Ts2; 544 {_, [{'when',_} | _] = Ts2} -> Ts2; 545 {_, [{':',_} | _] = Ts2} -> Ts2; 546 _ -> Ts %% assume macro without arguments 547 end, 548 quickscan_macros_1(N, Ts1, As); 549quickscan_macros([{'?',_}, {Type, _, _}=N | Ts], As) 550 when Type =:= atom; Type =:= var -> 551 %% macro with or without arguments 552 {_, Ts1} = skip_macro_args(Ts), 553 quickscan_macros_1(N, Ts1, As); 554quickscan_macros([T | Ts], As) -> 555 quickscan_macros(Ts, [T | As]); 556quickscan_macros([], As) -> 557 lists:reverse(As). 558 559%% (after a macro has been found and the arglist skipped, if any) 560quickscan_macros_1({_Type, _, A}, [{string, AnnoS, S} | Ts], As) -> 561 %% string literal following macro: change to single string 562 S1 = quick_macro_string(A) ++ S, 563 quickscan_macros(Ts, [{string, AnnoS, S1} | As]); 564quickscan_macros_1({_Type, AnnoA, A}, Ts, As) -> 565 %% normal case - just replace the macro with an atom 566 quickscan_macros(Ts, [{atom, AnnoA, quick_macro_atom(A)} | As]). 567 568quick_macro_atom(A) -> 569 list_to_atom("?" ++ atom_to_list(A)). 570 571quick_macro_string(A) -> 572 "(?" ++ atom_to_list(A) ++ ")". 573 574%% Skipping to the end of a macro call, tracking open/close constructs. 575%% @spec (Tokens) -> {Skipped, Rest} 576 577skip_macro_args([{'(',_}=T | Ts]) -> 578 skip_macro_args(Ts, [')'], [T]); 579skip_macro_args(Ts) -> 580 {[], Ts}. 581 582skip_macro_args([{'(',_}=T | Ts], Es, As) -> 583 skip_macro_args(Ts, [')' | Es], [T | As]); 584skip_macro_args([{'{',_}=T | Ts], Es, As) -> 585 skip_macro_args(Ts, ['}' | Es], [T | As]); 586skip_macro_args([{'[',_}=T | Ts], Es, As) -> 587 skip_macro_args(Ts, [']' | Es], [T | As]); 588skip_macro_args([{'<<',_}=T | Ts], Es, As) -> 589 skip_macro_args(Ts, ['>>' | Es], [T | As]); 590skip_macro_args([{'begin',_}=T | Ts], Es, As) -> 591 skip_macro_args(Ts, ['end' | Es], [T | As]); 592skip_macro_args([{'if',_}=T | Ts], Es, As) -> 593 skip_macro_args(Ts, ['end' | Es], [T | As]); 594skip_macro_args([{'case',_}=T | Ts], Es, As) -> 595 skip_macro_args(Ts, ['end' | Es], [T | As]); 596skip_macro_args([{'receive',_}=T | Ts], Es, As) -> 597 skip_macro_args(Ts, ['end' | Es], [T | As]); 598skip_macro_args([{'try',_}=T | Ts], Es, As) -> 599 skip_macro_args(Ts, ['end' | Es], [T | As]); 600skip_macro_args([{E,_}=T | Ts], [E], As) -> %final close 601 {lists:reverse([T | As]), Ts}; 602skip_macro_args([{E,_}=T | Ts], [E | Es], As) -> %matching close 603 skip_macro_args(Ts, Es, [T | As]); 604skip_macro_args([T | Ts], Es, As) -> 605 skip_macro_args(Ts, Es, [T | As]); 606skip_macro_args([], _Es, _As) -> 607 throw({error, macro_args}). 608 609filter_form({function, _, ?pp_form, _, 610 [{clause, _, [], [], [{atom, _, kill}]}]}) -> 611 none; 612filter_form(T) -> 613 T. 614 615 616%% --------------------------------------------------------------------- 617%% Normal parsing - try to preserve all information 618 619normal_parser(Ts0, Opt) -> 620 case scan_form(Ts0, Opt) of 621 Ts when is_list(Ts) -> 622 rewrite_form(parse_tokens(Ts)); 623 Node -> 624 Node 625 end. 626 627scan_form([{'-', _Anno}, {atom, AnnoA, define} | Ts], Opt) -> 628 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 629 {atom, AnnoA, define} | scan_macros(Ts, Opt)]; 630scan_form([{'-', _Anno}, {atom, AnnoA, undef} | Ts], Opt) -> 631 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 632 {atom, AnnoA, undef} | scan_macros(Ts, Opt)]; 633scan_form([{'-', _Anno}, {atom, AnnoA, include} | Ts], Opt) -> 634 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 635 {atom, AnnoA, include} | scan_macros(Ts, Opt)]; 636scan_form([{'-', _Anno}, {atom, AnnoA, include_lib} | Ts], Opt) -> 637 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 638 {atom, AnnoA, include_lib} | scan_macros(Ts, Opt)]; 639scan_form([{'-', _Anno}, {atom, AnnoA, ifdef} | Ts], Opt) -> 640 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 641 {atom, AnnoA, ifdef} | scan_macros(Ts, Opt)]; 642scan_form([{'-', _Anno}, {atom, AnnoA, ifndef} | Ts], Opt) -> 643 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 644 {atom, AnnoA, ifndef} | scan_macros(Ts, Opt)]; 645scan_form([{'-', _Anno}, {'if', AnnoA} | Ts], Opt) -> 646 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 647 {atom, AnnoA, 'if'} | scan_macros(Ts, Opt)]; 648scan_form([{'-', _Anno}, {atom, AnnoA, elif} | Ts], Opt) -> 649 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 650 {atom, AnnoA, 'elif'} | scan_macros(Ts, Opt)]; 651scan_form([{'-', _Anno}, {atom, AnnoA, else} | Ts], Opt) -> 652 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 653 {atom, AnnoA, else} | scan_macros(Ts, Opt)]; 654scan_form([{'-', _Anno}, {atom, AnnoA, endif} | Ts], Opt) -> 655 [{atom, AnnoA, ?pp_form}, {'(', AnnoA}, {')', AnnoA}, {'->', AnnoA}, 656 {atom, AnnoA, endif} | scan_macros(Ts, Opt)]; 657scan_form([{'-', _Anno}, {atom, AnnoA, error} | Ts], _Opt) -> 658 Desc = build_info_string("-error", Ts), 659 ErrorInfo = {erl_anno:location(AnnoA), ?MODULE, {error, Desc}}, 660 erl_syntax:error_marker(ErrorInfo); 661scan_form([{'-', _Anno}, {atom, AnnoA, warning} | Ts], _Opt) -> 662 Desc = build_info_string("-warning", Ts), 663 ErrorInfo = {erl_anno:location(AnnoA), ?MODULE, {warning, Desc}}, 664 erl_syntax:error_marker(ErrorInfo); 665scan_form([{'-', A}, {'?', A1}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt) 666 when Type =:= atom; Type =:= var -> 667 %% minus, macro and open parenthesis at start of form - assume that 668 %% the macro takes no arguments; e.g. `-?foo(...).' 669 macro(A1, N, Ts, [{'-', A}], Opt); 670scan_form([{'?', A}, {Type, _, _}=N | [{'(', _} | _]=Ts], Opt) 671 when Type =:= atom; Type =:= var -> 672 %% macro and open parenthesis at start of form - assume that the 673 %% macro takes no arguments; probably a function declaration on the 674 %% form `?m(...) -> ...', which will not parse if it is rewritten as 675 %% `(?m(...)) -> ...', so it must be handled as `(?m)(...) -> ...' 676 macro(A, N, Ts, [], Opt); 677scan_form(Ts, Opt) -> 678 scan_macros(Ts, Opt). 679 680build_info_string(Prefix, Ts0) -> 681 Ts = lists:droplast(Ts0), 682 String = lists:droplast(tokens_to_string(Ts)), 683 Prefix ++ " " ++ String ++ ".". 684 685scan_macros(Ts, Opt) -> 686 scan_macros(Ts, [], Opt). 687 688scan_macros([{'?', _}=M, {Type, _, _}=N | Ts], [{string, AnnoS, _}=S | As], 689 #opt{clever = true}=Opt) 690 when Type =:= atom; Type =:= var -> 691 %% macro after a string literal: be clever and insert ++ 692 scan_macros([M, N | Ts], [{'++', AnnoS}, S | As], Opt); 693scan_macros([{'?', Anno}, {Type, _, _}=N | [{'(',_}|_]=Ts], 694 [{':',_}|_]=As, Opt) 695 when Type =:= atom; Type =:= var -> 696 %% macro and open parentheses after colon - probably a call 697 %% `m:?F(...)' so the argument list might belong to the call, not 698 %% the macro - but it could also be a try-clause pattern 699 %% `...:?T(...) ->' - we need to check the token following the 700 %% arguments to decide 701 {Args, Rest} = skip_macro_args(Ts), 702 case Rest of 703 [{'->',_} | _] -> 704 macro_call(Args, Anno, N, Rest, As, Opt); 705 [{'when',_} | _] -> 706 macro_call(Args, Anno, N, Rest, As, Opt); 707 [{':',_} | _] -> 708 macro_call(Args, Anno, N, Rest, As, Opt); 709 _ -> 710 macro(Anno, N, Ts, As, Opt) 711 end; 712scan_macros([{'?', Anno}, {Type, _, _}=N | [{'(',_}|_]=Ts], As, Opt) 713 when Type =:= atom; Type =:= var -> 714 %% macro with arguments 715 {Args, Rest} = skip_macro_args(Ts), 716 macro_call(Args, Anno, N, Rest, As, Opt); 717scan_macros([{'?', Anno }, {Type, _, _}=N | Ts], As, Opt) 718 when Type =:= atom; Type =:= var -> 719 %% macro without arguments 720 macro(Anno, N, Ts, As, Opt); 721scan_macros([T | Ts], As, Opt) -> 722 scan_macros(Ts, [T | As], Opt); 723scan_macros([], As, _Opt) -> 724 lists:reverse(As). 725 726%% Rewriting to a tuple which will be recognized by the post-parse pass 727%% (we insert parentheses to preserve the precedences when parsing). 728 729macro(Anno, {Type, _, A}, Rest, As, Opt) -> 730 scan_macros_1([], Rest, [{atom,Anno,macro_atom(Type,A)} | As], Opt). 731 732macro_call([{'(',_}, {')',_}], Anno, {_, AnnoN, _}=N, Rest, As, Opt) -> 733 {Open, Close} = parentheses(As), 734 scan_macros_1([], Rest, 735 %% {'?macro_call', N } 736 lists:reverse(Open ++ [{'{', Anno}, 737 {atom, Anno, ?macro_call}, 738 {',', Anno}, 739 N, 740 {'}', AnnoN}] ++ Close, 741 As), Opt); 742macro_call([{'(',_} | Args], Anno, {_, AnnoN, _}=N, Rest, As, Opt) -> 743 {Open, Close} = parentheses(As), 744 %% drop closing parenthesis 745 {')', _} = lists:last(Args), %% assert 746 Args1 = lists:droplast(Args), 747 %% note that we must scan the argument list; it may not be skipped 748 scan_macros_1(Args1 ++ [{'}', AnnoN} | Close], 749 Rest, 750 %% {'?macro_call', N, Arg1, ... } 751 lists:reverse(Open ++ [{'{', Anno}, 752 {atom, Anno, ?macro_call}, 753 {',', Anno}, 754 N, 755 {',', AnnoN}], 756 As), Opt). 757 758macro_atom(atom, A) -> 759 list_to_atom(?atom_prefix ++ atom_to_list(A)); 760macro_atom(var, A) -> 761 list_to_atom(?var_prefix ++ atom_to_list(A)). 762 763%% don't insert parentheses after a string token, to avoid turning 764%% `"string" ?macro' into a "function application" `"string"(...)' 765%% (see note at top of file) 766parentheses([{string, _, _} | _]) -> 767 {[], []}; 768parentheses(_) -> 769 {[{'(',0}], [{')',0}]}. 770 771%% (after a macro has been found and the arglist skipped, if any) 772scan_macros_1(Args, [{string, AnnoS, _} | _]=Rest, As, 773 #opt{clever = true}=Opt) -> 774 %% string literal following macro: be clever and insert ++ 775 scan_macros(Args ++ [{'++', AnnoS} | Rest], As, Opt); 776scan_macros_1(Args, Rest, As, Opt) -> 777 %% normal case - continue scanning 778 scan_macros(Args ++ Rest, As, Opt). 779 780rewrite_form({function, Anno, ?pp_form, _, 781 [{clause, _, [], [], [{call, _, A, As}]}]}) -> 782 erl_syntax:set_pos(erl_syntax:attribute(A, rewrite_list(As)), Anno); 783rewrite_form({function, Anno, ?pp_form, _, [{clause, _, [], [], [A]}]}) -> 784 erl_syntax:set_pos(erl_syntax:attribute(A), Anno); 785rewrite_form(T) -> 786 rewrite(T). 787 788rewrite_list([T | Ts]) -> 789 [rewrite(T) | rewrite_list(Ts)]; 790rewrite_list([]) -> 791 []. 792 793%% Note: as soon as we start using erl_syntax:subtrees/1 and similar 794%% functions, we cannot assume that we know the exact representation of 795%% the syntax tree anymore - we must use erl_syntax functions to analyze 796%% and decompose the data. 797 798rewrite(Node) -> 799 case erl_syntax:type(Node) of 800 atom -> 801 case atom_to_list(erl_syntax:atom_value(Node)) of 802 ?atom_prefix ++ As -> 803 A1 = list_to_atom(As), 804 N = erl_syntax:copy_pos(Node, erl_syntax:atom(A1)), 805 erl_syntax:copy_pos(Node, erl_syntax:macro(N)); 806 ?var_prefix ++ As -> 807 A1 = list_to_atom(As), 808 N = erl_syntax:copy_pos(Node, erl_syntax:variable(A1)), 809 erl_syntax:copy_pos(Node, erl_syntax:macro(N)); 810 _ -> 811 Node 812 end; 813 tuple -> 814 case erl_syntax:tuple_elements(Node) of 815 [MagicWord, A | As] -> 816 case erl_syntax:type(MagicWord) of 817 atom -> 818 case erl_syntax:atom_value(MagicWord) of 819 ?macro_call -> 820 M = erl_syntax:macro(A, rewrite_list(As)), 821 erl_syntax:copy_pos(Node, M); 822 _ -> 823 rewrite_1(Node) 824 end; 825 _ -> 826 rewrite_1(Node) 827 end; 828 _ -> 829 rewrite_1(Node) 830 end; 831 _ -> 832 rewrite_1(Node) 833 end. 834 835rewrite_1(Node) -> 836 case erl_syntax:subtrees(Node) of 837 [] -> 838 Node; 839 Gs -> 840 Node1 = erl_syntax:make_tree(erl_syntax:type(Node), 841 [[rewrite(T) || T <- Ts] 842 || Ts <- Gs]), 843 erl_syntax:copy_pos(Node, Node1) 844 end. 845 846%% attempting a rescue operation on a token sequence for a single form 847%% if it could not be parsed after the normal treatment 848 849fix_form([{atom, _, ?pp_form}, {'(', _}, {')', _}, {'->', _}, 850 {atom, _, define}, {'(', _} | _]=Ts) -> 851 case lists:reverse(Ts) of 852 [{dot, _}, {')', _} | _] -> 853 {retry, Ts, fun fix_define/1}; 854 [{dot, Anno} | Ts1] -> 855 Ts2 = lists:reverse([{dot, Anno}, {')', Anno} | Ts1]), 856 {retry, Ts2, fun fix_define/1}; 857 _ -> 858 error 859 end; 860fix_form(_Ts) -> 861 error. 862 863fix_define([{atom, Anno, ?pp_form}, {'(', _}, {')', _}, {'->', _}, 864 {atom, AnnoA, define}, {'(', _}, N, {',', _} | Ts]) -> 865 [{dot, _}, {')', _} | Ts1] = lists:reverse(Ts), 866 S = tokens_to_string(lists:reverse(Ts1)), 867 A = erl_syntax:set_pos(erl_syntax:atom(define), AnnoA), 868 Txt = erl_syntax:set_pos(erl_syntax:text(S), AnnoA), 869 {form, erl_syntax:set_pos(erl_syntax:attribute(A, [N, Txt]), Anno)}; 870fix_define(_Ts) -> 871 error. 872 873%% @spec tokens_to_string(Tokens::[term()]) -> string() 874%% 875%% @doc Generates a string corresponding to the given token sequence. 876%% The string can be re-tokenized to yield the same token list again. 877 878-spec tokens_to_string([term()]) -> string(). 879 880tokens_to_string([{atom,_,A} | Ts]) -> 881 io_lib:write_atom(A) ++ " " ++ tokens_to_string(Ts); 882tokens_to_string([{string, _, S} | Ts]) -> 883 io_lib:write_string(S) ++ " " ++ tokens_to_string(Ts); 884tokens_to_string([{char, _, C} | Ts]) -> 885 io_lib:write_char(C) ++ " " ++ tokens_to_string(Ts); 886tokens_to_string([{float, _, F} | Ts]) -> 887 float_to_list(F) ++ " " ++ tokens_to_string(Ts); 888tokens_to_string([{integer, _, N} | Ts]) -> 889 integer_to_list(N) ++ " " ++ tokens_to_string(Ts); 890tokens_to_string([{var, _, A} | Ts]) -> 891 atom_to_list(A) ++ " " ++ tokens_to_string(Ts); 892tokens_to_string([{dot, _} | Ts]) -> 893 ".\n" ++ tokens_to_string(Ts); 894tokens_to_string([{A, _} | Ts]) -> 895 atom_to_list(A) ++ " " ++ tokens_to_string(Ts); 896tokens_to_string([]) -> 897 "". 898 899 900%% @spec format_error(Descriptor::term()) -> string() 901%% @hidden 902%% @doc Callback function for formatting error descriptors. Not for 903%% normal use. 904 905-spec format_error(term()) -> string(). 906 907format_error(macro_args) -> 908 errormsg("macro call missing end parenthesis"); 909format_error({error, Error}) -> 910 Error; 911format_error({warning, Error}) -> 912 Error; 913format_error({unknown, Reason}) -> 914 errormsg(io_lib:format("unknown error: ~tP", [Reason, 15])). 915 916errormsg(String) -> 917 io_lib:format("~s: ~ts", [?MODULE, String]). 918 919 920%% ===================================================================== 921