1%% =====================================================================
2%% Licensed under the Apache License, Version 2.0 (the "License"); you may
3%% not use this file except in compliance with the License. You may obtain
4%% a copy of the License at <http://www.apache.org/licenses/LICENSE-2.0>
5%%
6%% Unless required by applicable law or agreed to in writing, software
7%% distributed under the License is distributed on an "AS IS" BASIS,
8%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9%% See the License for the specific language governing permissions and
10%% limitations under the License.
11%%
12%% Alternatively, you may use this file under the terms of the GNU Lesser
13%% General Public License (the "LGPL") as published by the Free Software
14%% Foundation; either version 2.1, or (at your option) any later version.
15%% If you wish to allow use of your version of this file only under the
16%% terms of the LGPL, you should delete the provisions above and replace
17%% them with the notice and other provisions required by the LGPL; see
18%% <http://www.gnu.org/licenses/>. If you do not delete the provisions
19%% above, a recipient may use your version of this file under the terms of
20%% either the Apache License or the LGPL.
21%%
22%% @private
23%% @copyright 2001-2003 Richard Carlsson
24%% @author Richard Carlsson <carlsson.richard@gmail.com>
25%% @see edoc
26%% @end
27%% =====================================================================
28
29%% @doc EDoc tag scanning.
30
31%% TODO: tag/macro for including the code of a function as `<pre>'-text.
32%% TODO: consider new tag: @license text
33
34-module(edoc_tags).
35
36-export([tags/0, tags/1, tag_names/0, tag_parsers/0, scan_lines/2,
37	 filter_tags/2, filter_tags/3, check_tags/4, parse_tags/4,
38         check_types/3]).
39
40-import(edoc_report, [report/4, warning/4, error/3]).
41
42-include("edoc.hrl").
43-include("edoc_types.hrl").
44
45
46%% Tags are described by {Name, Parser, Flags}.
47%%   Name = atom()
48%%   Parser = text | xml | (Text,Line,Where) -> term()
49%%   Flags = [Flag]
50%%   Flag = module | function | overview | single
51%%
52%% Note that the pseudo-tag '@clear' is not listed here.
53%% (Cf. the function 'filter_tags'.)
54%%
55%% Rejected tag suggestions:
56%% - @keywords (never up to date; free text search is better)
57%% - @uses [modules] (never up to date; false dependencies)
58%% - @maintainer (never up to date; duplicates author info)
59%% - @contributor (unnecessary; mention in normal documentation)
60%% - @creator (unnecessary; already have copyright/author)
61%% - @history (never properly updated; use version control etc.)
62%% - @category (useless; superseded by keywords or free text search)
63
64tags() ->
65    All = [module,footer,function,overview],
66    [{author, fun parse_contact/4, [module,overview]},
67     {copyright, text, [module,overview,single]},
68     {deprecated, xml, [module,function,single]},
69     {doc, xml,	[module,function,overview,single]},
70     {docfile, fun parse_file/4, All},
71     {'end', text, All},
72     {equiv, fun parse_expr/4, [function,single]},
73     {headerfile, fun parse_header/4, All},
74     {hidden, text, [module,function,single]},
75     {param, fun parse_param/4, [function]},
76     {private, text, [module,function,single]},
77     {reference, xml, [module,footer,overview]},
78     {returns, xml, [function,single]},
79     {see, fun parse_see/4, [module,function,overview]},
80     {since, text, [module,function,overview,single]},
81     {spec, fun parse_spec/4, [function,single]},
82     {throws, fun parse_throws/4, [function,single]},
83     {title, text, [overview,single]},
84     {'TODO', xml, All},
85     {todo, xml, All},
86     {type, fun parse_typedef/4, [module,footer,function]},
87     {version, text, [module,overview,single]}].
88
89aliases('TODO') -> todo;
90aliases(return) -> returns;
91aliases(T) -> T.
92
93%% Selecting tags based on flags.
94tags(Flag) ->
95    [T || {T,_,Fs} <- tags(), lists:member(Flag, Fs)].
96
97%% The set of known tags.
98tag_names() ->
99    [T || {T,_,_} <- tags()].
100
101%% The pairs of tags and their parsers.
102tag_parsers() ->
103    [{T,F} || {T,F,_} <- tags()].
104
105
106%% Scanning lines of comment text.
107
108scan_lines(Ss, L) ->
109    lists:reverse(scan_lines(Ss, L, [])).
110
111scan_lines([S | Ss], L, As) ->
112    scan_lines(S, Ss, L, As);
113scan_lines([], _L, As) ->
114    As.
115
116%% Looking for a leading '@', skipping whitespace.
117%% Also accept "TODO:" at start of line as equivalent to "@TODO".
118
119scan_lines([$\s | Cs], Ss, L, As) -> scan_lines(Cs, Ss, L, As);
120scan_lines([$\t | Cs], Ss, L, As) -> scan_lines(Cs, Ss, L, As);
121scan_lines([$@ | Cs], Ss, L, As) -> scan_tag(Cs, Ss, L, As, []);
122scan_lines(("TODO:"++_)=Cs, Ss, L, As) -> scan_tag(Cs, Ss, L, As, []);
123scan_lines(_, Ss, L, As) -> scan_lines(Ss, L + 1, As).
124
125%% Scanning chars following '@', accepting only nonempty valid names.
126%% See edoc_lib:is_name/1 for details on what is a valid name. In tags
127%% we also allow the initial letter to be uppercase or underscore.
128
129scan_tag([C | Cs], Ss, L, As, Ts) when C >= $a, C =< $z ->
130    scan_tag_1(Cs, Ss, L, As, [C | Ts]);
131scan_tag([C | Cs], Ss, L, As, Ts) when C >= $A, C =< $Z ->
132    scan_tag_1(Cs, Ss, L, As, [C | Ts]);
133scan_tag([C | Cs], Ss, L, As, Ts) when C >= $\300, C =< $\377,
134					 C =/= $\327, C =/= $\367 ->
135    scan_tag_1(Cs, Ss, L, As, [C | Ts]);
136scan_tag([$_ | Cs], Ss, L, As, Ts) ->
137    scan_tag_1(Cs, Ss, L, As, [$_ | Ts]);
138scan_tag(_Cs, Ss, L, As, _Ts) ->
139    scan_lines(Ss, L + 1, As).    % not a valid name
140
141scan_tag_1([C | Cs], Ss, L, As, Ts) when C >= $a, C =< $z ->
142    scan_tag_1(Cs, Ss, L, As, [C | Ts]);
143scan_tag_1([C | Cs], Ss, L, As, Ts) when C >= $A, C =< $Z ->
144    scan_tag_1(Cs, Ss, L, As, [C | Ts]);
145scan_tag_1([C | Cs], Ss, L, As, Ts) when C >= $0, C =< $9 ->
146    scan_tag_1(Cs, Ss, L, As, [C | Ts]);
147scan_tag_1([C | Cs], Ss, L, As, Ts) when C >= $\300, C =< $\377,
148					 C =/= $\327, C =/= $\367 ->
149    scan_tag_1(Cs, Ss, L, As, [C | Ts]);
150scan_tag_1([$_ | Cs], Ss, L, As, Ts) ->
151    scan_tag_1(Cs, Ss, L, As, [$_ | Ts]);
152scan_tag_1(Cs, Ss, L, As, Ts) ->
153    scan_tag_2(Cs, Ss, L, As, {Ts, L}).
154
155%% Check that the tag is followed by whitespace, linebreak, or colon.
156
157scan_tag_2([$\s | Cs], Ss, L, As, T) ->
158    scan_tag_lines(Ss, T, [Cs], L + 1, As);
159scan_tag_2([$\t | Cs], Ss, L, As, T) ->
160    scan_tag_lines(Ss, T, [Cs], L + 1, As);
161scan_tag_2([$: | Cs], Ss, L, As, T) ->
162    scan_tag_lines(Ss, T, [Cs], L + 1, As);
163scan_tag_2([], Ss, L, As, T) ->
164    scan_tag_lines(Ss, T, [[]], L + 1, As);
165scan_tag_2(_, Ss, L, As, _T) ->
166    scan_lines(Ss, L + 1, As).
167
168%% Scanning lines after a tag is found.
169
170scan_tag_lines([S | Ss], T, Ss1, L, As) ->
171    scan_tag_lines(S, S, Ss, T, Ss1, L, As);
172scan_tag_lines([], {Ts, L1}, Ss1, _L, As) ->
173    [make_tag(Ts, L1, Ss1) | As].
174
175%% Collecting tag text lines until end of comment or next tagged line.
176
177scan_tag_lines([$\s | Cs], S, Ss, T, Ss1, L, As) ->
178    scan_tag_lines(Cs, S, Ss, T, Ss1, L, As);
179scan_tag_lines([$\t | Cs], S, Ss, T, Ss1, L, As) ->
180    scan_tag_lines(Cs, S, Ss, T, Ss1, L, As);
181scan_tag_lines([$@, C | _Cs], S, Ss, {Ts, L1}, Ss1, L, As)
182  when C >= $a, C =< $z ->
183    scan_lines(S, Ss, L, [make_tag(Ts, L1, Ss1) | As]);
184scan_tag_lines([$@, C | _Cs], S, Ss, {Ts, L1}, Ss1, L, As)
185  when C >= $A, C =< $Z ->
186    scan_lines(S, Ss, L, [make_tag(Ts, L1, Ss1) | As]);
187scan_tag_lines([$@, C | _Cs], S, Ss, {Ts, L1}, Ss1, L, As)
188  when C >= $\300, C =< $\377, C =/= $\327, C =/= $\367 ->
189    scan_lines(S, Ss, L, [make_tag(Ts, L1, Ss1) | As]);
190scan_tag_lines("TODO:"++_, S, Ss, {Ts, L1}, Ss1, L, As) ->
191    scan_lines(S, Ss, L, [make_tag(Ts, L1, Ss1) | As]);
192scan_tag_lines(_Cs, S, Ss, T, Ss1, L, As) ->
193    scan_tag_lines(Ss, T, [S | Ss1], L + 1, As).
194
195make_tag(Cs, L, Ss) ->
196    #tag{name = aliases(list_to_atom(lists:reverse(Cs))),
197	 line = L,
198	 data = append_lines(lists:reverse(Ss))}.
199
200%% Flattening lines of text and inserting line breaks.
201
202append_lines([L]) -> L;
203append_lines([L | Ls]) -> L ++ [$\n | append_lines(Ls)];
204append_lines([]) -> [].
205
206%% Filtering out unknown tags.
207
208filter_tags(Ts, Tags) ->
209    filter_tags(Ts, Tags, no).
210
211filter_tags(Ts, Tags, Where) ->
212    filter_tags(Ts, Tags, Where, []).
213
214filter_tags([#tag{name = clear} | Ts], Tags, Where, _Ts1) ->
215    filter_tags(Ts, Tags, Where);
216filter_tags([#tag{name = N, line = L} = T | Ts], Tags, Where, Ts1) ->
217    case sets:is_element(N, Tags) of
218	true ->
219	    filter_tags(Ts, Tags, Where, [T | Ts1]);
220	false ->
221	    case Where of
222		no -> ok;
223		_ -> warning(L, Where, "tag @~s not recognized.", [N])
224	    end,
225	    filter_tags(Ts, Tags, Where, Ts1)
226    end;
227filter_tags([], _, _, Ts) ->
228    lists:reverse(Ts).
229
230%% Check occurrences of tags.
231
232check_tags(Ts, Allow, Single, Where) ->
233    check_tags(Ts, Allow, Single, Where, false, sets:new()).
234
235check_tags([#tag{name = T, line = L} | Ts], Allow, Single, Where, Error, Seen) ->
236    case sets:is_element(T, Seen) of
237	true ->
238	    case sets:is_element(T, Single) of
239		false ->
240		    check_tags(Ts, Allow, Single, Where, Error, Seen);
241		true ->
242		    report(L, Where, "multiple @~s tag.", [T]),
243		    check_tags(Ts, Allow, Single, Where, true, Seen)
244	    end;
245	false ->
246	    Seen1 = sets:add_element(T, Seen),
247	    case sets:is_element(T, Allow) of
248		true ->
249		    check_tags(Ts, Allow, Single, Where, Error, Seen1);
250		false ->
251		    report(L, Where, "tag @~s not allowed here.", [T]),
252		    check_tags(Ts, Allow, Single, Where, true, Seen1)
253	    end
254    end;
255check_tags([], _, _, _, Error, _) ->
256    Error.
257
258
259%% Parses tag contents for specific tags.
260
261parse_tags(Ts, How, Env, Where) ->
262    parse_tags(Ts, How, Env, Where, []).
263
264parse_tags([#tag{name = Name} = T | Ts], How, Env, Where, Ts1) ->
265    case dict:fetch(Name, How) of
266	text ->
267	    parse_tags(Ts, How, Env, Where, [T | Ts1]);
268	xml ->
269	    [T1] = parse_tag(T, fun parse_xml/4, Env, Where),
270	    parse_tags(Ts, How, Env, Where, [T1 | Ts1]);
271	F when is_function(F) ->
272	    Ts2 = parse_tag(T, F, Env, Where),
273	    parse_tags(Ts, How, Env, Where, lists:reverse(Ts2, Ts1))
274    end;
275parse_tags([], _How, _Env, _Where, Ts) ->
276    lists:reverse(Ts).
277
278parse_tag(T, F, Env, Where) ->
279    case catch {ok, F(T#tag.data, T#tag.line, Env, Where)} of
280	{ok, Data} ->
281	    [T#tag{data = Data}];
282	{expand, Ts} ->
283	    Ts;
284	{error, L, Error} ->
285	    error(L, Where, Error),
286	    exit(error);
287	{'EXIT', R} -> exit(R);
288	Other -> throw(Other)
289    end.
290
291%% parser functions for the built-in content types. They also perform
292%% some sanity checks on the results.
293
294parse_xml(Data, Line, _Env, _Where) ->
295    edoc_wiki:parse_xml(Data, Line).
296
297parse_see(Data, Line, _Env, _Where) ->
298    edoc_parser:parse_see(Data, Line).
299
300parse_expr(Data, Line, _Env, _Where) ->
301    edoc_lib:parse_expr(Data, Line).
302
303parse_spec(Data, Line, _Env, {_, {F, A}} = _Where) ->
304    Spec = edoc_parser:parse_spec(Data, Line),
305    #t_spec{name = N, type = #t_fun{args = As}} = Spec,
306    if length(As) /= A ->
307	    throw_error(Line, "@spec arity does not match.");
308       true ->
309	    case N of
310		undefined ->
311		    Spec#t_spec{name = #t_name{module = [], name = F}};
312		#t_name{module = [], name = F} ->
313		    Spec;
314		_ ->
315		    throw_error(Line, "@spec name does not match.")
316	    end
317    end.
318
319parse_param(Data, Line, _Env, {_, {_F, _A}} = _Where) ->
320    edoc_parser:parse_param(Data, Line).
321
322parse_throws(Data, Line, _Env, {_, {_F, _A}} = _Where) ->
323    edoc_parser:parse_throws(Data, Line).
324
325parse_contact(Data, Line, _Env, _Where) ->
326    case edoc_lib:parse_contact(Data, Line) of
327	{"", "", _URI} ->
328	    throw_error(Line, "must specify name or e-mail.");
329	Info ->
330	    Info
331    end.
332
333parse_typedef(Data, Line, _Env, Where) ->
334    Def = edoc_parser:parse_typedef(Data, Line),
335    {#t_typedef{name = #t_name{name = T}, args = As}, _} = Def,
336    NAs = length(As),
337    case edoc_types:is_predefined(T, NAs) of
338	true ->
339            case edoc_types:is_new_predefined(T, NAs) of
340                false ->
341                    throw_error(Line, {"redefining built-in type '~w'.",
342                                       [T]});
343                true ->
344                    warning(Line, Where, "redefining built-in type '~w'.",
345                            [T]),
346                    Def
347            end;
348	false ->
349	    Def
350    end.
351
352-type line() :: erl_anno:line().
353
354-spec parse_file(_, line(), _, _) -> no_return().
355
356parse_file(Data, Line, Env, _Where) ->
357    case edoc_lib:parse_expr(Data, Line) of
358	{string, _, File0} ->
359	    File = edoc_lib:strip_space(File0),
360	    case edoc_extract:file(File, module, Env, []) of
361		{ok, Ts} ->
362		    throw({expand, Ts});
363		{error, R} ->
364		    throw_error(Line, {read_file, File, R})
365	    end;
366	_ ->
367	    throw_error(Line, file_not_string)
368    end.
369
370-spec parse_header(_, line(), _, _) -> no_return().
371
372parse_header(Data, Line, Env, {Where, _}) ->
373    parse_header(Data, Line, Env, Where);
374parse_header(Data, Line, Env, Where) when is_list(Where) ->
375    case edoc_lib:parse_expr(Data, Line) of
376	{string, _, File} ->
377	    Dir = filename:dirname(Where),
378	    Path = Env#env.includes ++ [Dir],
379	    case edoc_lib:find_file(Path, File) of
380		"" ->
381		    throw_error(Line, {file_not_found, File});
382		File1 ->
383		    Ts = edoc_extract:header(File1, Env, []),
384		    throw({expand, Ts})
385	    end;
386	_ ->
387	    throw_error(Line, file_not_string)
388    end.
389
390-type err() :: 'file_not_string'
391             | {'file_not_found', file:filename()}
392             | {'read_file', file:filename(), term()}
393             | string().
394
395-spec throw_error(line(), err()) -> no_return().
396
397throw_error(L, {read_file, File, R}) ->
398    throw_error(L, {"error reading file '~ts': ~w",
399		    [edoc_lib:filename(File), R]});
400throw_error(L, {file_not_found, F}) ->
401    throw_error(L, {"file not found: ~ts", [F]});
402throw_error(L, file_not_string) ->
403    throw_error(L, "expected file name as a string");
404throw_error(L, D) ->
405    throw({error, L, D}).
406
407%% Checks local types.
408
409-record(parms, {tab, warn, file, line}).
410
411check_types(Entries, Opts, File) ->
412    Tags = edoc_data:get_all_tags(Entries),
413    TypeTags = [Tag || #tag{data = {#t_typedef{},_}}=Tag <- Tags],
414    Entries2 = edoc_data:hidden_filter(Entries, Opts),
415    Tags2 = edoc_data:get_all_tags(Entries2),
416    SpecTags = [Tag || #tag{data = #t_spec{}}=Tag <- Tags2],
417    DT = ets:new(types, [bag]),
418    _ = [add_type(DT, Name, As, File, Line) ||
419            #tag{line = Line,
420                 data = {#t_typedef{name = Name, args = As},_}} <- TypeTags],
421    Warn = proplists:get_value(report_missing_types, Opts,
422                               ?REPORT_MISSING_TYPES) =:= true,
423    P = #parms{tab = DT, warn = Warn, file = File, line = 0},
424    try check_types3(TypeTags++SpecTags, P, [])
425    after true = ets:delete(DT)
426    end.
427
428add_type(DT, Name, Args, File, Line) ->
429    NArgs = length(Args),
430    TypeName = {Name, NArgs},
431    case lists:member(TypeName, ets:lookup(DT, Name)) of
432        true ->
433            #t_name{name = N} = Name,
434            type_warning(Line, File, "duplicated type", N, NArgs);
435        false ->
436            ets:insert(DT, {Name, NArgs})
437    end.
438
439check_types3([], _P, _Ls)->
440    ok;
441check_types3([Tag | Tags], P, Ls) ->
442    check_type(Tag, P, Ls, Tags).
443
444check_type(#tag{line = L, data = Data}, P0, Ls, Ts) ->
445    P = P0#parms{line = L},
446    case Data of
447        {#t_typedef{type = Type, defs = Defs},_} ->
448            check_type(Type, P, Ls, Defs++Ts);
449        #t_spec{type = Type, defs = Defs} ->
450            LocalTypes =
451                [{N,length(Args)} ||
452                    #t_def{name = #t_type{name = N, args = Args}} <- Defs],
453            check_type(Type, P, LocalTypes, Defs),
454            check_types3(Ts, P, Ls);
455        _->
456            check_types3(Ts, P0, Ls)
457    end;
458check_type(#t_def{type = Type}, P, Ls, Ts) ->
459    check_type(Type, P, Ls, Ts);
460check_type(#t_type{name = Name, args = Args}, P, Ls, Ts) ->
461    check_used_type(Name, Args, P, Ls),
462    check_types3(Args++Ts, P, Ls);
463check_type(#t_var{}, P, Ls, Ts) ->
464    check_types3(Ts, P, Ls);
465check_type(#t_fun{args = Args, range = Range}, P, Ls, Ts) ->
466    check_type(Range, P, Ls, Args++Ts);
467check_type(#t_map{}, P, Ls, Ts) ->
468    check_types3(Ts, P, Ls);
469check_type(#t_tuple{types = Types}, P, Ls, Ts) ->
470    check_types3(Types ++Ts, P, Ls);
471check_type(#t_list{type = Type}, P, Ls, Ts) ->
472    check_type(Type, P, Ls, Ts);
473check_type(#t_nil{}, P, Ls, Ts) ->
474    check_types3(Ts, P, Ls);
475check_type(#t_paren{type = Type}, P, Ls, Ts) ->
476    check_type(Type, P, Ls, Ts);
477check_type(#t_nonempty_list{type = Type}, P, Ls, Ts) ->
478    check_type(Type, P, Ls, Ts);
479check_type(#t_atom{}, P, Ls, Ts) ->
480    check_types3(Ts, P, Ls);
481check_type(#t_integer{}, P, Ls, Ts) ->
482    check_types3(Ts, P, Ls);
483check_type(#t_integer_range{}, P, Ls, Ts) ->
484    check_types3(Ts, P, Ls);
485check_type(#t_binary{}, P, Ls, Ts) ->
486    check_types3(Ts, P, Ls);
487check_type(#t_float{}, P, Ls, Ts) ->
488    check_types3(Ts, P, Ls);
489check_type(#t_union{types = Types}, P, Ls, Ts) ->
490    check_types3(Types++Ts, P, Ls);
491check_type(#t_record{fields = Fields}, P, Ls, Ts) ->
492    check_types3(Fields++Ts, P, Ls);
493check_type(#t_field{type = Type}, P, Ls, Ts) ->
494    check_type(Type, P, Ls, Ts);
495check_type(undefined, P, Ls, Ts) ->
496    check_types3(Ts, P, Ls).
497
498check_used_type(#t_name{name = N, module = Mod}=Name, Args, P, LocalTypes) ->
499    NArgs = length(Args),
500    TypeName = {Name, NArgs},
501    DT = P#parms.tab,
502    case
503        Mod =/= []
504        orelse lists:member(TypeName, ets:lookup(DT, Name))
505        orelse edoc_types:is_predefined(N, NArgs)
506        orelse lists:member(TypeName, LocalTypes)
507    of
508        true ->
509            ok;
510        false ->
511            #parms{warn = W, line = L, file = File} = P,
512            %% true = ets:insert(DT, TypeName),
513            _ = [type_warning(L, File, "missing type", N, NArgs) || W],
514	    ok
515    end.
516
517type_warning(Line, File, S, N, NArgs) ->
518    AS = ["/"++integer_to_list(NArgs) || NArgs > 0],
519    warning(Line, File, S++" ~w~s", [N, AS]).
520