1%% =====================================================================
2%% Licensed under the Apache License, Version 2.0 (the "License"); you may
3%% not use this file except in compliance with the License. You may obtain
4%% a copy of the License at <http://www.apache.org/licenses/LICENSE-2.0>
5%%
6%% Unless required by applicable law or agreed to in writing, software
7%% distributed under the License is distributed on an "AS IS" BASIS,
8%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9%% See the License for the specific language governing permissions and
10%% limitations under the License.
11%%
12%% Alternatively, you may use this file under the terms of the GNU Lesser
13%% General Public License (the "LGPL") as published by the Free Software
14%% Foundation; either version 2.1, or (at your option) any later version.
15%% If you wish to allow use of your version of this file only under the
16%% terms of the LGPL, you should delete the provisions above and replace
17%% them with the notice and other provisions required by the LGPL; see
18%% <http://www.gnu.org/licenses/>. If you do not delete the provisions
19%% above, a recipient may use your version of this file under the terms of
20%% either the Apache License or the LGPL.
21%%
22%% @copyright 2001-2003 Richard Carlsson
23%% @author Richard Carlsson <carlsson.richard@gmail.com>
24%% @see edoc
25%% @end
26%% =====================================================================
27
28%% @doc Utility functions for EDoc.
29
30-module(edoc_lib).
31
32-export([count/2, lines/1, split_at/2, split_at_stop/1,
33	 split_at_space/1, filename/1, transpose/1, segment/2,
34	 get_first_sentence/1, is_space/1, strip_space/1, parse_expr/2,
35	 parse_contact/2, escape_uri/1, join_uri/2, is_relative_uri/1,
36	 is_name/1, to_label/1, find_doc_dirs/0, find_sources/2,
37	 find_file/2, try_subdir/2, unique/1,
38	 write_file/3, write_file/4, write_info_file/3,
39	 read_info_file/1, get_doc_env/1, get_doc_env/3, copy_file/2,
40	 uri_get/1, run_doclet/2, run_layout/2,
41	 simplify_path/1, timestr/1, datestr/1, read_encoding/2]).
42
43-import(edoc_report, [report/2, warning/2]).
44
45-include("edoc.hrl").
46-include_lib("xmerl/include/xmerl.hrl").
47
48-define(FILE_BASE, "/").
49
50
51%% ---------------------------------------------------------------------
52%% List and string utilities
53
54%% @private
55timestr({H,M,Sec}) ->
56    lists:flatten(io_lib:fwrite("~2.2.0w:~2.2.0w:~2.2.0w",[H,M,Sec])).
57
58%% @private
59datestr({Y,M,D}) ->
60    Ms = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
61	  "Oct", "Nov", "Dec"],
62    lists:flatten(io_lib:fwrite("~s ~w ~w",[lists:nth(M, Ms),D,Y])).
63
64%% @private
65read_encoding(File, Options) ->
66    case epp:read_encoding(File, Options) of
67        none -> epp:default_encoding();
68        Encoding -> Encoding
69    end.
70
71%% @private
72count(X, Xs) ->
73    count(X, Xs, 0).
74
75count(X, [X | Xs], N) ->
76    count(X, Xs, N + 1);
77count(X, [_ | Xs], N) ->
78    count(X, Xs, N);
79count(_X, [], N) ->
80    N.
81
82%% @private
83lines(Cs) ->
84    lines(Cs, [], []).
85
86lines([$\n | Cs], As, Ls) ->
87    lines(Cs, [], [lists:reverse(As) | Ls]);
88lines([C | Cs], As, Ls) ->
89    lines(Cs, [C | As], Ls);
90lines([], As, Ls) ->
91    lists:reverse([lists:reverse(As) | Ls]).
92
93%% @private
94split_at(Cs, K) ->
95    split_at(Cs, K, []).
96
97split_at([K | Cs], K, As) ->
98    {lists:reverse(As), Cs};
99split_at([C | Cs], K, As) ->
100    split_at(Cs, K, [C | As]);
101split_at([], _K, As) ->
102    {lists:reverse(As), []}.
103
104%% @private
105split_at_stop(Cs) ->
106    split_at_stop(Cs, []).
107
108split_at_stop([$., $\s | Cs], As) ->
109    {lists:reverse(As), Cs};
110split_at_stop([$., $\t | Cs], As) ->
111    {lists:reverse(As), Cs};
112split_at_stop([$., $\n | Cs], As) ->
113    {lists:reverse(As), Cs};
114split_at_stop([$.], As) ->
115    {lists:reverse(As), []};
116split_at_stop([C | Cs], As) ->
117    split_at_stop(Cs, [C | As]);
118split_at_stop([], As) ->
119    {lists:reverse(As), []}.
120
121%% @private
122split_at_space(Cs) ->
123    split_at_space(Cs, []).
124
125split_at_space([$\s | Cs], As) ->
126    {lists:reverse(As), Cs};
127split_at_space([$\t | Cs], As) ->
128    {lists:reverse(As), Cs};
129split_at_space([$\n | Cs], As) ->
130    {lists:reverse(As), Cs};
131split_at_space([C | Cs], As) ->
132    split_at_space(Cs, [C | As]);
133split_at_space([], As) ->
134    {lists:reverse(As), []}.
135
136%% @private
137is_space([$\s | Cs]) -> is_space(Cs);
138is_space([$\t | Cs]) -> is_space(Cs);
139is_space([$\n | Cs]) -> is_space(Cs);
140is_space([_C | _Cs]) -> false;
141is_space([]) -> true.
142
143%% @private
144strip_space([$\s | Cs]) -> strip_space(Cs);
145strip_space([$\t | Cs]) -> strip_space(Cs);
146strip_space([$\n | Cs]) -> strip_space(Cs);
147strip_space(Cs) -> Cs.
148
149%% @private
150segment(Es, N) ->
151    segment(Es, [], [], 0, N).
152
153segment([E | Es], As, Cs, N, M) when N < M ->
154    segment(Es, [E | As], Cs, N + 1, M);
155segment([_ | _] = Es, As, Cs, _N, M) ->
156    segment(Es, [], [lists:reverse(As) | Cs], 0, M);
157segment([], [], Cs, _N, _M) ->
158    lists:reverse(Cs);
159segment([], As, Cs, _N, _M) ->
160    lists:reverse([lists:reverse(As) | Cs]).
161
162%% @private
163transpose([]) -> [];
164transpose([[] | Xss]) -> transpose(Xss);
165transpose([[X | Xs] | Xss]) ->
166    [[X | [H || [H | _T] <- Xss]]
167     | transpose([Xs | [T || [_H | T] <- Xss]])].
168
169%% Note that the parser will not produce two adjacent text segments;
170%% thus, if a text segment ends with a period character, it marks the
171%% end of the summary sentence only if it is also the last segment in
172%% the list, or is followed by a 'p' or 'br' ("whitespace") element.
173
174%% @private
175get_first_sentence([#xmlElement{name = p, content = Es} | _]) ->
176    %% Descend into initial paragraph.
177    get_first_sentence_1(Es);
178get_first_sentence(Es) ->
179    get_first_sentence_1(Es).
180
181get_first_sentence_1([E = #xmlText{value = Txt} | Es]) ->
182    Last = case Es of
183	       [#xmlElement{name = p} | _] -> true;
184	       [#xmlElement{name = br} | _] -> true;
185	       [] -> true;
186	       _ -> false
187	   end,
188    case end_of_sentence(Txt, Last) of
189	{value, Txt1} ->
190	    [E#xmlText{value = Txt1}];
191	none ->
192	    [E | get_first_sentence_1(Es)]
193    end;
194get_first_sentence_1([E | Es]) ->
195    % Skip non-text segments - don't descend further
196    [E | get_first_sentence_1(Es)];
197get_first_sentence_1([]) ->
198    [].
199
200end_of_sentence(Cs, Last) ->
201    end_of_sentence(Cs, Last, []).
202
203%% We detect '.' and '!' as end-of-sentence markers.
204
205end_of_sentence([C=$., $\s | _], _, As) ->
206    end_of_sentence_1(C, true, As);
207end_of_sentence([C=$., $\t | _], _, As) ->
208    end_of_sentence_1(C, true, As);
209end_of_sentence([C=$., $\n | _], _, As) ->
210    end_of_sentence_1(C, true, As);
211end_of_sentence([C=$.], Last, As) ->
212    end_of_sentence_1(C, Last, As);
213end_of_sentence([C=$!, $\s | _], _, As) ->
214    end_of_sentence_1(C, true, As);
215end_of_sentence([C=$!, $\t | _], _, As) ->
216    end_of_sentence_1(C, true, As);
217end_of_sentence([C=$!, $\n | _], _, As) ->
218    end_of_sentence_1(C, true, As);
219end_of_sentence([C=$!], Last, As) ->
220    end_of_sentence_1(C, Last, As);
221end_of_sentence([C | Cs], Last, As) ->
222    end_of_sentence(Cs, Last, [C | As]);
223end_of_sentence([], Last, As) ->
224    end_of_sentence_1($., Last, strip_space(As)).  % add a '.'
225
226end_of_sentence_1(C, true, As) ->
227    {value, lists:reverse([C | As])};
228end_of_sentence_1(_, false, _) ->
229    none.
230
231%% For handling ISO 8859-1 (Latin-1) we use the following information:
232%%
233%% 000 - 037	NUL - US	control
234%% 040 - 057	SPC - /		punctuation
235%% 060 - 071	0 - 9		digit
236%% 072 - 100	: - @		punctuation
237%% 101 - 132	A - Z		uppercase
238%% 133 - 140	[ - `		punctuation
239%% 141 - 172	a - z		lowercase
240%% 173 - 176	{ - ~		punctuation
241%% 177		DEL		control
242%% 200 - 237			control
243%% 240 - 277	NBSP - ¿	punctuation
244%% 300 - 326	À - Ö		uppercase
245%% 327		×		punctuation
246%% 330 - 336	Ø - Þ		uppercase
247%% 337 - 366	ß - ö		lowercase
248%% 367		÷		punctuation
249%% 370 - 377	ø - ÿ		lowercase
250
251%% Names must begin with a lowercase letter and contain only
252%% alphanumerics and underscores.
253
254%% @private
255is_name([C | Cs]) when C >= $a, C =< $z ->
256    is_name_1(Cs);
257is_name([C | Cs]) when C >= $\337, C =< $\377, C =/= $\367 ->
258    is_name_1(Cs);
259is_name(_) -> false.
260
261is_name_1([C | Cs]) when C >= $a, C =< $z ->
262    is_name_1(Cs);
263is_name_1([C | Cs]) when C >= $A, C =< $Z ->
264    is_name_1(Cs);
265is_name_1([C | Cs]) when C >= $0, C =< $9 ->
266    is_name_1(Cs);
267is_name_1([C | Cs]) when C >= $\300, C =< $\377, C =/= $\327, C =/= $\367 ->
268    is_name_1(Cs);
269is_name_1([$_ | Cs]) ->
270    is_name_1(Cs);
271is_name_1([]) -> true;
272is_name_1(_) -> false.
273
274%% @private
275unique([X | Xs]) -> [X | unique(Xs, X)];
276unique([]) -> [].
277
278unique([X | Xs], X) -> unique(Xs, X);
279unique([X | Xs], _) -> [X | unique(Xs, X)];
280unique([], _) -> [].
281
282
283%% ---------------------------------------------------------------------
284%% Parsing utilities
285
286%% @doc EDoc Erlang expression parsing. For parsing things like the
287%% content of <a href="overview-summary.html#ftag-equiv">`@equiv'</a>
288%% tags, and strings denoting file names, e.g. in @headerfile. Also used
289%% by {@link edoc_run}.
290%% @private
291
292parse_expr(S, L) ->
293    case erl_scan:string(S ++ ".", L) of
294	{ok, Ts, _} ->
295	    case erl_parse:parse_exprs(Ts) of
296		{ok, [Expr]} ->
297		    Expr;
298 		{error, {999999, erl_parse, _}} ->
299 		    throw_error(eof, L);
300 		{error, E} ->
301 		    throw_error(E, L)
302	    end;
303	{error, E, _} ->
304	    throw_error(E, L)
305    end.
306
307
308%% @doc EDoc "contact information" parsing. This is the type of the
309%% content in e.g.
310%% <a href="overview-summary.html#mtag-author">`@author'</a> tags.
311%% @private
312
313%% % @type info() = #info{name  = string(),
314%% %                      email = string(),
315%% %                      uri   = string()}
316
317-record(info, {name = ""  :: string(),
318	       email = "" :: string(),
319	       uri = ""   :: string()}).
320
321parse_contact(S, L) ->
322    I = scan_name(S, L, #info{}, []),
323    {I#info.name, I#info.email, I#info.uri}.
324
325%% The name is taken as the first non-whitespace-only string before,
326%% between, or following the e-mail/URI sections. Subsequent text that
327%% is not e/mail or URI is ignored.
328
329scan_name([$< | Cs], L, I, As) ->
330    case I#info.email of
331	"" ->
332	    {Cs1, I1} = scan_email(Cs, L, set_name(I, As), []),
333	    scan_name(Cs1, L, I1, []);
334	_ ->
335	    throw_error("multiple '<...>' sections.", L)
336    end;
337scan_name([$[ | Cs], L, I, As) ->
338    case I#info.uri of
339	"" ->
340	    {Cs1, I1} = scan_uri(Cs, L, set_name(I, As), []),
341	    scan_name(Cs1, L, I1, []);
342	_ ->
343	    throw_error("multiple '[...]' sections.", L)
344    end;
345scan_name([$\n | Cs], L, I, As) ->
346    scan_name(Cs, L + 1, I, [$\n | As]);
347scan_name([C | Cs], L, I, As) ->
348    scan_name(Cs, L, I, [C | As]);
349scan_name([], _L, I, As) ->
350    set_name(I, As).
351
352scan_uri([$] | Cs], _L, I, As) ->
353    {Cs, I#info{uri = strip_and_reverse(As)}};
354scan_uri([$\n | Cs], L, I, As) ->
355    scan_uri(Cs, L + 1, I, [$\n | As]);
356scan_uri([C | Cs], L, I, As) ->
357    scan_uri(Cs, L, I, [C | As]);
358scan_uri([], L, _I, _As) ->
359    throw_error({missing, $]}, L).
360
361scan_email([$> | Cs], _L, I, As) ->
362    {Cs, I#info{email = strip_and_reverse(As)}};
363scan_email([$\n | Cs], L, I, As) ->
364    scan_email(Cs, L + 1, I, [$\n | As]);
365scan_email([C | Cs], L, I, As) ->
366    scan_email(Cs, L, I, [C | As]);
367scan_email([], L, _I, _As) ->
368    throw_error({missing, $>}, L).
369
370set_name(I, As) ->
371    case I#info.name of
372	"" -> I#info{name = strip_and_reverse(As)};
373	_ -> I
374    end.
375
376strip_and_reverse(As) ->
377    edoc_lib:strip_space(lists:reverse(edoc_lib:strip_space(As))).
378
379
380%% ---------------------------------------------------------------------
381%% URI and Internet
382
383%% This is a conservative URI escaping, which escapes anything that may
384%% not appear in an NMTOKEN ([a-zA-Z0-9]|'.'|'-'|'_'), including ':'.
385%% Characters are first encoded in UTF-8.
386%%
387%% Note that this should *not* be applied to complete URI, but only to
388%% segments that may need escaping, when forming a complete URI.
389%%
390%% TODO: general utf-8 encoding for all of Unicode (0-16#10ffff)
391
392%% @private
393escape_uri([C | Cs]) when C >= $a, C =< $z ->
394    [C | escape_uri(Cs)];
395escape_uri([C | Cs]) when C >= $A, C =< $Z ->
396    [C | escape_uri(Cs)];
397escape_uri([C | Cs]) when C >= $0, C =< $9 ->
398    [C | escape_uri(Cs)];
399escape_uri([C = $. | Cs]) ->
400    [C | escape_uri(Cs)];
401escape_uri([C = $- | Cs]) ->
402    [C | escape_uri(Cs)];
403escape_uri([C = $_ | Cs]) ->
404    [C | escape_uri(Cs)];
405escape_uri([C | Cs]) when C > 16#7f ->
406    %% This assumes that characters are at most 16 bits wide.
407    escape_byte(((C band 16#c0) bsr 6) + 16#c0)
408	++ escape_byte(C band 16#3f + 16#80)
409	++ escape_uri(Cs);
410escape_uri([C | Cs]) ->
411    escape_byte(C) ++ escape_uri(Cs);
412escape_uri([]) ->
413    [].
414
415escape_byte(C) when C >= 0, C =< 255 ->
416    [$%, hex_digit(C bsr 4), hex_digit(C band 15)].
417
418hex_digit(N) when N >= 0, N =< 9 ->
419    N + $0;
420hex_digit(N) when N > 9, N =< 15 ->
421    N + $a - 10.
422
423% utf8([C | Cs]) when C > 16#7f ->
424%     [((C band 16#c0) bsr 6) + 16#c0, C band 16#3f ++ 16#80 | utf8(Cs)];
425% utf8([C | Cs]) ->
426%     [C | utf8(Cs)];
427% utf8([]) ->
428%     [].
429
430%% Please note that URI are *not* file names. Don't use the stdlib
431%% 'filename' module for operations on (any parts of) URI.
432
433%% @private
434join_uri(Base, "") ->
435    Base;
436join_uri("", Path) ->
437    Path;
438join_uri(Base, Path) ->
439    Base ++ "/" ++ Path.
440
441%% Check for relative URI; "network paths" ("//...") not included!
442
443%% @private
444is_relative_uri([$: | _]) ->
445    false;
446is_relative_uri([$/, $/ | _]) ->
447    false;
448is_relative_uri([$/ | _]) ->
449    true;
450is_relative_uri([$? | _]) ->
451    true;
452is_relative_uri([$# | _]) ->
453    true;
454is_relative_uri([_ | Cs]) ->
455    is_relative_uri(Cs);
456is_relative_uri([]) ->
457    true.
458
459%% @private
460uri_get("file:///" ++ Path) ->
461    uri_get_file(Path);
462uri_get("file://localhost/" ++ Path) ->
463    uri_get_file(Path);
464uri_get("file://" ++ Path) ->
465    Msg = io_lib:format("cannot handle 'file:' scheme with "
466			"nonlocal network-path: 'file://~ts'.",
467			[Path]),
468    {error, Msg};
469uri_get("file:/" ++ Path) ->
470    uri_get_file(Path);
471uri_get("file:" ++ Path) ->
472    Msg = io_lib:format("ignoring malformed URI: 'file:~ts'.", [Path]),
473    {error, Msg};
474uri_get("http:" ++ Path) ->
475    uri_get_http("http:" ++ Path);
476uri_get("ftp:" ++ Path) ->
477    uri_get_ftp("ftp:" ++ Path);
478uri_get("//" ++ Path) ->
479    Msg = io_lib:format("cannot access network-path: '//~ts'.", [Path]),
480    {error, Msg};
481uri_get([C, $:, $/ | _]=Path) when C >= $A, C =< $Z; C >= $a, C =< $z ->
482    uri_get_file(Path);  % special case for Windows
483uri_get([C, $:, $\ | _]=Path) when C >= $A, C =< $Z; C >= $a, C =< $z ->
484    uri_get_file(Path);  % special case for Windows
485uri_get(URI) ->
486    case is_relative_uri(URI) of
487	true ->
488	    uri_get_file(URI);
489	false ->
490	    Msg = io_lib:format("cannot handle URI: '~ts'.", [URI]),
491	    {error, Msg}
492    end.
493
494uri_get_file(File0) ->
495    File = filename:join(?FILE_BASE, File0),
496    case read_file(File) of
497	{ok, Text} ->
498	    {ok, Text};
499	{error, R} ->
500	    {error, file:format_error(R)}
501    end.
502
503uri_get_http(URI) ->
504    %% Try using option full_result=false
505    case catch {ok, httpc:request(get, {URI,[]}, [],
506				  [{full_result, false}])} of
507	{'EXIT', _} ->
508	    uri_get_http_r10(URI);
509	Result ->
510	    uri_get_http_1(Result, URI)
511    end.
512
513uri_get_http_r10(URI) ->
514    %% Try most general form of request
515    Result = (catch {ok, httpc:request(get, {URI,[]}, [], [])}),
516    uri_get_http_1(Result, URI).
517
518uri_get_http_1(Result, URI) ->
519    case Result of
520	{ok, {ok, {200, Text}}} when is_list(Text) ->
521	    %% new short result format
522	    {ok, Text};
523	{ok, {ok, {Status, Text}}} when is_integer(Status), is_list(Text) ->
524	    %% new short result format when status /= 200
525	    Phrase = httpd_util:reason_phrase(Status),
526	    {error, http_errmsg(Phrase, URI)};
527	{ok, {ok, {{_Vsn, 200, _Phrase}, _Hdrs, Text}}} when is_list(Text) ->
528	    %% new long result format
529	    {ok, Text};
530	{ok, {ok, {{_Vsn, _Status, Phrase}, _Hdrs, Text}}} when is_list(Text) ->
531	    %% new long result format when status /= 200
532	    {error, http_errmsg(Phrase, URI)};
533	{ok, {200,_Hdrs,Text}} when is_list(Text) ->
534	    %% old result format
535	    {ok, Text};
536	{ok, {Status,_Hdrs,Text}} when is_list(Text) ->
537	    %% old result format when status /= 200
538	    Phrase = httpd_util:reason_phrase(Status),
539	    {error, http_errmsg(Phrase, URI)};
540	{ok, {error, R}} ->
541	    Reason = inet:format_error(R),
542	    {error, http_errmsg(Reason, URI)};
543	{ok, R} ->
544	    Reason = io_lib:format("bad return value ~tP", [R, 5]),
545	    {error, http_errmsg(Reason, URI)};
546	{'EXIT', R} ->
547	    Reason = io_lib:format("crashed with reason ~tw", [R]),
548	    {error, http_errmsg(Reason, URI)};
549	R ->
550	    Reason = io_lib:format("uncaught throw: ~tw", [R]),
551	    {error, http_errmsg(Reason, URI)}
552    end.
553
554http_errmsg(Reason, URI) ->
555    io_lib:format("http error: ~ts: '~ts'", [Reason, URI]).
556
557%% TODO: implement ftp access method
558
559uri_get_ftp(URI) ->
560    Msg = io_lib:format("cannot access ftp scheme yet: '~ts'.", [URI]),
561    {error, Msg}.
562
563%% @private
564to_label([$\s | Cs]) ->
565    to_label(Cs);
566to_label([$\t | Cs]) ->
567    to_label(Cs);
568to_label([$\n | Cs]) ->
569    to_label(Cs);
570to_label([]) ->
571    [];
572to_label(Cs) ->
573    to_label_1(Cs).
574
575to_label_1([$\s | Cs]) ->
576    to_label_2([$\s | Cs]);
577to_label_1([$\t | Cs]) ->
578    to_label_2([$\s | Cs]);
579to_label_1([$\n | Cs]) ->
580    to_label_2([$\s | Cs]);
581to_label_1([C | Cs]) ->
582    [C | to_label_1(Cs)];
583to_label_1([]) ->
584    [].
585
586to_label_2(Cs) ->
587    case to_label(Cs) of
588	[] -> [];
589	Cs1 -> [$_ | Cs1]
590    end.
591
592
593%% ---------------------------------------------------------------------
594%% Files
595
596%% @private
597filename([C | T]) when is_integer(C), C > 0 ->
598    [C | filename(T)];
599filename([H|T]) ->
600    filename(H) ++ filename(T);
601filename([]) ->
602    [];
603filename(N) when is_atom(N) ->
604    atom_to_list(N);
605filename(N) ->
606    report("bad filename: `~tP'.", [N, 25]),
607    exit(error).
608
609%% @private
610copy_file(From, To) ->
611    case file:copy(From, To) of
612	{ok, _} -> ok;
613	{error, R} ->
614	    R1 = file:format_error(R),
615	    report("error copying '~ts' to '~ts': ~ts.", [From, To, R1]),
616	    exit(error)
617    end.
618
619list_dir(Dir, Error) ->
620    case file:list_dir(Dir) of
621	{ok, Fs} ->
622	    Fs;
623	{error, R} ->
624	    F = case Error of
625		    %% true ->
626		    %%	fun (S, As) -> report(S, As), exit(error) end;
627		    false ->
628			fun (S, As) -> warning(S, As), [] end
629		end,
630	    R1 = file:format_error(R),
631	    F("could not read directory '~ts': ~ts.", [filename(Dir), R1])
632    end.
633
634%% @private
635simplify_path(P) ->
636    case filename:basename(P) of
637	"." ->
638	    simplify_path(filename:dirname(P));
639	".." ->
640	    simplify_path(filename:dirname(filename:dirname(P)));
641	_ ->
642	    P
643    end.
644
645%% The directories From and To are assumed to exist.
646
647%% copy_dir(From, To) ->
648%%     Es = list_dir(From, true),    % error if listing fails
649%%     lists:foreach(fun (E) -> copy_dir(From, To, E) end, Es).
650
651%% copy_dir(From, To, Entry) ->
652%%     From1 = filename:join(From, Entry),
653%%     To1 = filename:join(To, Entry),
654%%     case filelib:is_dir(From1) of
655%% 	true ->
656%% 	    make_dir(To1),
657%% 	    copy_dir(From1, To1);
658%% 	false ->
659%% 	    copy_file(From1, To1)
660%%     end.
661
662%% make_dir(Dir) ->
663%%     case file:make_dir(Dir) of
664%% 	ok -> ok;
665%% 	{error, R} ->
666%% 	    R1 = file:format_error(R),
667%% 	    report("cannot create directory '~ts': ~ts.", [Dir, R1]),
668%% 	    exit(error)
669%%     end.
670
671%% @private
672try_subdir(Dir, Subdir) ->
673    D = filename:join(Dir, Subdir),
674    case filelib:is_dir(D) of
675	true -> D;
676	false -> Dir
677    end.
678
679%% @spec (Text::deep_string(), Dir::edoc:filename(),
680%%        Name::edoc:filename()) -> ok
681%%
682%% @doc Write the given `Text' to the file named by `Name' in directory
683%% `Dir'. If the target directory does not exist, it will be created.
684%% @private
685
686write_file(Text, Dir, Name) ->
687    write_file(Text, Dir, Name, [{encoding,latin1}]).
688
689write_file(Text, Dir, Name, Options) ->
690    File = filename:join([Dir, Name]),
691    ok = filelib:ensure_dir(File),
692    case file:open(File, [write] ++ Options) of
693	{ok, FD} ->
694	    io:put_chars(FD, Text),
695	    ok = file:close(FD);
696	{error, R} ->
697	    R1 = file:format_error(R),
698	    report("could not write file '~ts': ~ts.", [File, R1]),
699	    exit(error)
700    end.
701
702%% @private
703write_info_file(App, Modules, Dir) ->
704    Ts = [{modules, Modules}],
705    Ts1 = if App =:= ?NO_APP -> Ts;
706	     true -> [{application, App} | Ts]
707	  end,
708    S0 = [io_lib:fwrite("~p.\n", [T]) || T <- Ts1],
709    S = ["%% encoding: UTF-8\n" | S0],
710    write_file(S, Dir, ?INFO_FILE, [{encoding,unicode}]).
711
712%% @spec (Name::edoc:filename()) -> {ok, string()} | {error, Reason}
713%%
714%% @doc Reads text from the file named by `Name'.
715
716read_file(File) ->
717    case file:read_file(File) of
718	{ok, Bin} ->
719            Enc = edoc_lib:read_encoding(File, []),
720            case catch unicode:characters_to_list(Bin, Enc) of
721                String when is_list(String) ->
722                    {ok, String};
723                _ ->
724                    {error, invalid_unicode}
725            end;
726	{error, Reason} -> {error, Reason}
727    end.
728
729
730%% ---------------------------------------------------------------------
731%% Info files
732
733info_file_data(Ts) ->
734    App = proplists:get_value(application, Ts, ?NO_APP),
735    Ms = proplists:append_values(modules, Ts),
736    {App, Ms}.
737
738%% Local file access - don't complain if file does not exist.
739
740%% @private
741read_info_file(Dir) ->
742    File = filename:join(Dir, ?INFO_FILE),
743    case filelib:is_file(File) of
744	true ->
745	    case read_file(File) of
746		{ok, Text} ->
747		    parse_info_file(Text, File);
748		{error, R} ->
749		    R1 = file:format_error(R),
750		    warning("could not read '~ts': ~ts.", [File, R1]),
751		    {?NO_APP, []}
752	    end;
753	false ->
754	    {?NO_APP, []}
755    end.
756
757%% URI access
758
759uri_get_info_file(Base) ->
760    URI = join_uri(Base, ?INFO_FILE),
761    case uri_get(URI) of
762	{ok, Text} ->
763	    parse_info_file(Text, URI);
764	{error, Msg} ->
765	    warning("could not read '~ts': ~ts.", [URI, Msg]),
766	    {?NO_APP, []}
767    end.
768
769parse_info_file(Text, Name) ->
770    case parse_terms(Text) of
771	{ok, Vs} ->
772	    info_file_data(Vs);
773	{error, eof} ->
774	    warning("unexpected end of file in '~ts'.", [Name]),
775	    {?NO_APP, []};
776	{error, {_Line,Module,R}} ->
777	    warning("~ts: ~ts.", [Module:format_error(R), Name]),
778	    {?NO_APP, []}
779    end.
780
781parse_terms(Text) ->
782    case erl_scan:string(Text) of
783	{ok, Ts, _Line} ->
784	    parse_terms_1(Ts, [], []);
785	{error, R, _Line} ->
786	    {error, R}
787    end.
788
789parse_terms_1([T={dot, _L} | Ts], As, Vs) ->
790    case erl_parse:parse_term(lists:reverse([T | As])) of
791	{ok, V} ->
792	    parse_terms_1(Ts, [], [V | Vs]);
793	{error, R} ->
794	    {error, R}
795    end;
796parse_terms_1([T | Ts], As, Vs) ->
797    parse_terms_1(Ts, [T | As], Vs);
798parse_terms_1([], [], Vs) ->
799    {ok, lists:reverse(Vs)};
800parse_terms_1([], _As, _Vs) ->
801    {error, eof}.
802
803
804%% ---------------------------------------------------------------------
805%% Source files
806
807%% @doc See {@link edoc:run/2} for a description of the options
808%% `subpackages', `source_suffix'.
809%% @private
810
811%% NEW-OPTIONS: subpackages, source_suffix
812%% DEFER-OPTIONS: edoc:run/2
813
814find_sources(Path, Opts) ->
815    Rec = proplists:get_bool(subpackages, Opts),
816    Ext = proplists:get_value(source_suffix, Opts, ?DEFAULT_SOURCE_SUFFIX),
817    find_sources(Path, Rec, Ext, Opts).
818
819find_sources(Path, Rec, Ext, _Opts) ->
820    lists:flatten(find_sources_1(Path, Rec, Ext)).
821
822find_sources_1([P | Ps], Rec, Ext) ->
823    Dir = P,
824    Fs1 = find_sources_1(Ps, Rec, Ext),
825    case filelib:is_dir(Dir) of
826	true ->
827	    [find_sources_2(Dir, Rec, Ext) | Fs1];
828	false ->
829	    Fs1
830    end;
831find_sources_1([], _Rec, _Ext) ->
832    [].
833
834find_sources_2(Dir, Rec, Ext) ->
835	Es = list_dir(Dir, false),    % just warn if listing fails
836	Es1 = [{E, Dir} || E <- Es, is_source_file(E, Ext)],
837	case Rec of
838		true ->
839			[find_sources_3(Es, Dir, Rec, Ext) | Es1];
840		false ->
841			Es1
842	end.
843
844find_sources_3(Es, Dir, Rec, Ext) ->
845    [find_sources_2(filename:join(Dir, E),
846		    Rec, Ext)
847     || E <- Es, is_source_dir(E, Dir)].
848
849is_source_file(Name, Ext) ->
850    (filename:extension(Name) == Ext)
851	andalso is_name(filename:rootname(Name, Ext)).
852
853is_source_dir(Name, Dir) ->
854    filelib:is_dir(filename:join(Dir, Name)).
855
856%% @private
857find_file([P | Ps], Name) ->
858    File = filename:join(P, Name),
859    case filelib:is_file(File) of
860	true ->
861	    File;
862	false ->
863	    find_file(Ps, Name)
864    end;
865find_file([], _Name) ->
866    "".
867
868%% @private
869find_doc_dirs() ->
870    find_doc_dirs(code:get_path()).
871
872find_doc_dirs([P0 | Ps]) ->
873    P = filename:absname(P0),
874    P1 = case filename:basename(P) of
875	     ?EBIN_DIR ->
876		 filename:dirname(P);
877	     _ ->
878		 P
879	 end,
880    Dir = try_subdir(P1, ?EDOC_DIR),
881    File = filename:join(Dir, ?INFO_FILE),
882    case filelib:is_file(File) of
883	true ->
884	    [Dir | find_doc_dirs(Ps)];
885	false ->
886	    find_doc_dirs(Ps)
887    end;
888find_doc_dirs([]) ->
889    [].
890
891%% All names with "internal linkage" are mapped to the empty string, so
892%% that relative references will be created. For apps, the empty string
893%% implies that we use the default app-path.
894
895%% NEW-OPTIONS: doc_path
896%% DEFER-OPTIONS: get_doc_env/3
897
898get_doc_links(App, Modules, Opts) ->
899    Path = proplists:append_values(doc_path, Opts) ++ find_doc_dirs(),
900    Ds = [{P, uri_get_info_file(P)} || P <- Path],
901    Ds1 = [{"", {App, Modules}} | Ds],
902    D = dict:new(),
903    make_links(Ds1, D, D).
904
905make_links([{Dir, {App, Ms}} | Ds], A, M) ->
906    A1 = if App == ?NO_APP -> A;
907	    true -> add_new(App, Dir, A)
908	 end,
909    F = fun (K, D) -> add_new(K, Dir, D) end,
910    M1 = lists:foldl(F, M, Ms),
911    make_links(Ds, A1, M1);
912make_links([], A,  M) ->
913    F = fun (D) ->
914		fun (K) ->
915			case dict:find(K, D) of
916			    {ok, V} -> V;
917			    error -> ""
918			end
919		end
920	end,
921    {F(A), F(M)}.
922
923add_new(K, V, D) ->
924    case dict:is_key(K, D) of
925	true ->
926	    D;
927	false ->
928	    dict:store(K, V, D)
929    end.
930
931%% @spec (Options::proplist()) -> edoc_env()
932%% @equiv get_doc_env([], [], Opts)
933%% @private
934
935get_doc_env(Opts) ->
936    get_doc_env([], [], Opts).
937
938%% @spec (App, Modules, Options::proplist()) -> edoc_env()
939%%     App = [] | atom()
940%%     Modules = [atom()]
941%%     proplist() = [term()]
942%%
943%% @type proplist() = //stdlib/proplists:property().
944%% @type edoc_env(). Environment information needed by EDoc for
945%% generating references. The data representation is not documented.
946%%
947%% @doc Creates an environment data structure used by parts of EDoc for
948%% generating references, etc. See {@link edoc:run/2} for a description
949%% of the options `file_suffix', `app_default' and `doc_path'.
950%%
951%% @see edoc_extract:source/4
952%% @see edoc:get_doc/3
953
954%% NEW-OPTIONS: file_suffix, app_default
955%% INHERIT-OPTIONS: get_doc_links/4
956%% DEFER-OPTIONS: edoc:run/2
957
958get_doc_env(App, Modules, Opts) ->
959    Suffix = proplists:get_value(file_suffix, Opts,
960				 ?DEFAULT_FILE_SUFFIX),
961    AppDefault = proplists:get_value(app_default, Opts, ?APP_DEFAULT),
962    Includes = proplists:append_values(includes, Opts),
963
964    {A, M} = get_doc_links(App, Modules, Opts),
965    #env{file_suffix = Suffix,
966	 apps = A,
967	 modules = M,
968	 app_default = AppDefault,
969	 includes = Includes
970	}.
971
972%% ---------------------------------------------------------------------
973%% Plug-in modules
974
975%% @doc See {@link edoc:run/2} for a description of the `doclet' option.
976
977%% NEW-OPTIONS: doclet
978%% DEFER-OPTIONS: edoc:run/2
979
980%% @private
981run_doclet(Fun, Opts) ->
982    run_plugin(doclet, ?DEFAULT_DOCLET, Fun, Opts).
983
984%% @doc See {@link edoc:layout/2} for a description of the `layout'
985%% option.
986
987%% NEW-OPTIONS: layout
988%% DEFER-OPTIONS: edoc:layout/2
989
990%% @private
991run_layout(Fun, Opts) ->
992    run_plugin(layout, ?DEFAULT_LAYOUT, Fun, Opts).
993
994run_plugin(Name, Default, Fun, Opts) ->
995    run_plugin(Name, Name, Default, Fun, Opts).
996
997run_plugin(Name, Key, Default, Fun, Opts) when is_atom(Name) ->
998    Module = get_plugin(Key, Default, Opts),
999    case catch {ok, Fun(Module)} of
1000	{ok, Value} ->
1001	    Value;
1002	R ->
1003	    report("error in ~ts '~w': ~tP.", [Name, Module, R, 20]),
1004	    exit(error)
1005    end.
1006
1007get_plugin(Key, Default, Opts) ->
1008    case proplists:get_value(Key, Opts, Default) of
1009	M when is_atom(M) ->
1010	    M;
1011	Other ->
1012	    report("bad value for option '~w': ~tP.", [Key, Other, 10]),
1013	    exit(error)
1014    end.
1015
1016
1017%% ---------------------------------------------------------------------
1018%% Error handling
1019
1020-type line() :: erl_anno:line().
1021-type err()  :: 'eof'
1022	      | {'missing', char()}
1023	      | {line(), atom(), string()}
1024	      | string().
1025
1026-spec throw_error(err(), line()) -> no_return().
1027
1028throw_error({missing, C}, L) ->
1029    throw_error({"missing '~c'.", [C]}, L);
1030throw_error(eof, L) ->
1031    throw({error,L,"unexpected end of expression."});
1032throw_error({L, M, D}, _L) ->
1033    throw({error,L,{format_error,M,D}});
1034throw_error(D, L) ->
1035    throw({error, L, D}).
1036