1%% =====================================================================
2%% Licensed under the Apache License, Version 2.0 (the "License"); you may
3%% not use this file except in compliance with the License. You may obtain
4%% a copy of the License at <http://www.apache.org/licenses/LICENSE-2.0>
5%%
6%% Unless required by applicable law or agreed to in writing, software
7%% distributed under the License is distributed on an "AS IS" BASIS,
8%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9%% See the License for the specific language governing permissions and
10%% limitations under the License.
11%%
12%% Alternatively, you may use this file under the terms of the GNU Lesser
13%% General Public License (the "LGPL") as published by the Free Software
14%% Foundation; either version 2.1, or (at your option) any later version.
15%% If you wish to allow use of your version of this file only under the
16%% terms of the LGPL, you should delete the provisions above and replace
17%% them with the notice and other provisions required by the LGPL; see
18%% <http://www.gnu.org/licenses/>. If you do not delete the provisions
19%% above, a recipient may use your version of this file under the terms of
20%% either the Apache License or the LGPL.
21%%
22%% @private
23%% @copyright 2001-2003 Richard Carlsson
24%% @author Richard Carlsson <carlsson.richard@gmail.com>
25%% @see edoc
26%% @end
27%% =====================================================================
28
29%% @doc EDoc wiki expansion, parsing and postprocessing of XML text.
30%% Uses {@link //xmerl. XMerL}.
31%% @end
32
33%% Notes:
34%%
35%% * Whatever happens in this module, it must interact nicely with the
36%% actual XML-parsing. It is not acceptable to break any existing and
37%% legal XML markup so that it does not parse or is rendered wrong.
38%%
39%% * The focus should always be on making *documentation* easier to
40%% write. No wiki notation should be introduced unless it is clear that
41%% it is better than using plain XHTML, making typing less cumbersome
42%% and the resulting text easier to read. The wiki notation should be a
43%% small bag of easy-to-remember tricks for making XHTML documentation
44%% easier to write, not a complete markup language in itself. As a
45%% typical example, it is hardly worthwile to introduce a special
46%% notation like say, ""..."" for emphasized text, since <em>...</em> is
47%% not much harder to write, not any less readable, and no more
48%% difficult to remember, especially since emphasis is not very often
49%% occurring in normal documentation.
50%%
51%% * The central reasoning for the code-quoting goes like this: I don't
52%% want to have special escape characters within the quotes (like
53%% backslash in C), to allow quoting of the quote characters themselves.
54%% I also don't want to use the "`" character both for opening and
55%% closing quotes. Therefore, you can either use `...' - and then you
56%% cannot use the "'" character without ending the quote - or you can
57%% use ``...'' - which allows single but not double "'" characters
58%% within the quote. Whitespace is automatically removed from the
59%% beginning and the end of the quoted strings; this allows you to write
60%% things like "`` 'foo@bar' ''". Text that contains "''" has to be
61%% written within <code>...</code>.
62%%
63%% To produce a single "`" character without starting a quote, write
64%% "`'" (no space between "`" and "'").
65%%
66%% For verbatim/preformatted text, the ```...'''-quotes expand to
67%% "<pre><![CDATA[...]]></pre>". The indentation at the start of the
68%% quoted string is preserved; whitespace is stripped only at the end.
69%% Whole leading lines of whitespace are however skipped.
70
71-module(edoc_wiki).
72
73-export([parse_xml/2, expand_text/2]).
74
75-include("edoc.hrl").
76-include_lib("xmerl/include/xmerl.hrl").
77
78-define(BASE_HEADING, 3).
79
80
81%% Parsing Wiki-XML with pre-and post-expansion.
82
83parse_xml(Data, Line) ->
84    par(parse_xml_1(expand_text(Data, Line), Line)).
85
86parse_xml_1(Text, Line) ->
87    Text1 = "<doc>" ++ Text ++ "</doc>",
88    %% Any coding except "utf-8".
89    Opts = [{line, Line}, {encoding, 'iso-8859-1'}],
90    case catch {ok, xmerl_scan:string(Text1, Opts)} of
91	{ok, {E, _}} ->
92	    E#xmlElement.content;
93	{'EXIT', {fatal, {Reason, L, _C}}} ->
94	    throw_error(L, {"XML parse error: ~p.", [Reason]});
95	{'EXIT', Reason} ->
96	    throw_error(Line, {"error in XML parser: ~P.", [Reason, 10]});
97	Other ->
98	    throw_error(Line, {"nocatch in XML parser: ~P.", [Other, 10]})
99    end.
100
101%% Expand wiki stuff in arbitrary text.
102
103expand_text(Cs, L) ->
104    lists:reverse(expand_new_line(Cs, L, [])).
105
106%% Interestingly, the reverse of "code" is "edoc". :-)
107
108expand_new_line([$\s = C | Cs], L, As) ->
109    expand_new_line(Cs, L, [C | As]);
110expand_new_line([$\t = C | Cs], L, As) ->
111    expand_new_line(Cs, L, [C | As]);
112expand_new_line([$\n = C | Cs], L, As) ->
113    expand_new_line(Cs, L + 1, [C | As]);
114expand_new_line([$=, $=, $=, $= | Cs], L, As) ->
115    expand_heading(Cs, 2, L, As);
116expand_new_line([$=, $=, $= | Cs], L, As) ->
117    expand_heading(Cs, 1, L, As);
118expand_new_line([$=, $= | Cs], L, As) ->
119    expand_heading(Cs, 0, L, As);
120expand_new_line(Cs, L, As) ->
121    expand(Cs, L, As).
122
123expand([$`, $' | Cs], L, As) ->
124    expand(Cs, L, [$` | As]);    % produce "`" - don't start a new quote
125expand([$`, $`, $` | Cs], L, As) ->
126    %% If this is the first thing on the line, compensate for the
127    %% indentation, unless we had to skip one or more empty lines.
128    {Cs1, Skipped} = strip_empty_lines(Cs),    % avoid vertical space
129    N = if Skipped > 0 ->
130		0;
131	   true ->
132		{As1, _} = edoc_lib:split_at(As, $\n),
133		case edoc_lib:is_space(As1) of
134		    true -> 3 + length(As1);
135		    false -> 2    % nice default - usually right.
136		end
137	end,
138    Ss = lists:duplicate(N, $\s),
139    expand_triple(Cs1, L + Skipped, Ss ++ "[ATADC[!<>erp<" ++ As);
140expand([$`, $` | Cs], L, As) ->
141    expand_double(edoc_lib:strip_space(Cs), L, ">edoc<" ++ As);
142expand([$` | Cs], L, As) ->
143    expand_single(edoc_lib:strip_space(Cs), L, ">edoc<" ++ As);
144expand([$[ | Cs], L, As) ->
145    expand_uri(Cs, L, As);
146expand([$\n = C | Cs], L, As) ->
147    expand_new_line(Cs, L + 1, [C | As]);
148expand([C | Cs], L, As) ->
149    expand(Cs, L, [C | As]);
150expand([], _, As) ->
151    As.
152
153%% == Heading ==
154%% === SubHeading ===
155%% ==== SubSubHeading ====
156
157expand_heading([$= | _] = Cs, N, L, As) ->
158    expand_heading_1(Cs, N, L, As);
159expand_heading(Cs, N, L, As) ->
160    {Cs1, Cs2} = edoc_lib:split_at(Cs, $\n),
161    case edoc_lib:strip_space(lists:reverse(Cs1)) of
162	[$=, $= | Cs3] ->
163	    {Es, Ts} = lists:splitwith(fun (X) -> X =:= $= end, Cs3),
164	    if length(Es) =:= N ->
165		    Ts1 = edoc_lib:strip_space(
166			    lists:reverse(edoc_lib:strip_space(Ts))),
167		    expand_heading_2(Ts1, Cs2, N, L, As);
168	       true ->
169		    H1 = lists:duplicate(N+2, $=),
170		    H2 = "==" ++ Es,
171		    throw_error(L, {"heading end marker mismatch: "
172				     "~s...~s", [H1, H2]})
173	    end;
174	_ ->
175	    expand_heading_1(Cs, N, L, As)
176    end.
177
178expand_heading_1(Cs, N, L, As) ->
179    expand(Cs, L, lists:duplicate(N + 2, $=) ++ As).
180
181expand_heading_2(Ts, Cs, N, L, As) ->
182    H = ?BASE_HEADING + N,
183    Ts1 = io_lib:format("<h~w><a name=\"~ts\">~ts</a></h~w>\n",
184			[H, make_label(Ts), Ts, H]),
185    expand_new_line(Cs, L + 1, lists:reverse(lists:flatten(Ts1), As)).
186
187make_label([$\s | Cs]) ->
188    [$_ | make_label(edoc_lib:strip_space(Cs))];
189make_label([$\t | Cs]) ->
190    [$_ | make_label(edoc_lib:strip_space(Cs))];
191make_label([$\n | Cs]) ->
192    [$_ | make_label(edoc_lib:strip_space(Cs))];
193make_label([C | Cs]) ->
194    [C | make_label(Cs)];
195make_label([]) ->
196    [].
197
198%% `...'
199
200expand_single(Cs, L, As) ->
201    expand_single(Cs, L, As, L).
202
203expand_single([$' | Cs], L, As, _L0) ->
204    expand(Cs, L, ">edoc/<" ++ edoc_lib:strip_space(As));
205expand_single([$< | Cs], L, As, L0) ->
206    expand_single(Cs, L, ";tl&" ++ As, L0);
207expand_single([$> | Cs], L, As, L0) ->
208    expand_single(Cs, L, ";tg&" ++ As, L0);
209expand_single([$& | Cs], L, As, L0) ->
210    expand_single(Cs, L, ";pma&" ++ As, L0);
211expand_single([$\n = C | Cs], L, As, L0) ->
212    expand_single(Cs, L + 1, [C | As], L0);
213expand_single([C | Cs], L, As, L0) ->
214    expand_single(Cs, L, [C | As], L0);
215expand_single([], L, _, L0) ->
216    throw_error(L0, {"`-quote ended unexpectedly at line ~w", [L]}).
217
218%% ``...''
219
220expand_double(Cs, L, As) ->
221    expand_double(Cs, L, As, L).
222
223expand_double([$', $' | Cs], L, As, _L0) ->
224    expand(Cs, L, ">edoc/<" ++ edoc_lib:strip_space(As));
225expand_double([$< | Cs], L, As, L0) ->
226    expand_double(Cs, L, ";tl&" ++ As, L0);
227expand_double([$> | Cs], L, As, L0) ->
228    expand_double(Cs, L, ";tg&" ++ As, L0);
229expand_double([$& | Cs], L, As, L0) ->
230    expand_double(Cs, L, ";pma&" ++ As, L0);
231expand_double([$\n = C | Cs], L, As, L0) ->
232    expand_double(Cs, L + 1, [C | As], L0);
233expand_double([C | Cs], L, As, L0) ->
234    expand_double(Cs, L, [C | As], L0);
235expand_double([], L, _, L0) ->
236    throw_error(L0, {"``-quote ended unexpectedly at line ~w", [L]}).
237
238%% ```...'''
239
240expand_triple(Cs, L, As) ->
241    expand_triple(Cs, L, As, L).
242
243expand_triple([$', $', $' | Cs], L, As, _L0) ->      % ' stupid emacs
244    expand(Cs, L, ">erp/<>]]" ++ edoc_lib:strip_space(As));
245expand_triple([$], $], $> | Cs], L, As, L0) ->
246    expand_triple(Cs, L, ";tg&]]" ++ As, L0);
247expand_triple([$\n = C | Cs], L, As, L0) ->
248    expand_triple(Cs, L + 1, [C | As], L0);
249expand_triple([C | Cs], L, As, L0) ->
250    expand_triple(Cs, L, [C | As], L0);
251expand_triple([], L, _, L0) ->
252    throw_error(L0, {"```-quote ended unexpectedly at line ~w", [L]}).
253
254%% e.g. [file:/...] or [http://... LinkText]
255
256expand_uri("http:/" ++ Cs, L, As) ->
257    expand_uri(Cs, L, "/:ptth", As);
258expand_uri("https:/" ++ Cs, L, As) ->
259    expand_uri(Cs, L, "/:sptth", As);
260expand_uri("ftp:/" ++ Cs, L, As) ->
261    expand_uri(Cs, L, "/:ptf", As);
262expand_uri("file:/" ++ Cs, L, As) ->
263    expand_uri(Cs, L, "/:elif", As);
264expand_uri("mailto:/" ++ Cs, L, As) ->
265    expand_uri(Cs, L, "/:otliam", As);
266expand_uri("nfs:/" ++ Cs, L, As) ->
267    expand_uri(Cs, L, "/:sfn", As);
268expand_uri("shttp:/" ++ Cs, L, As) ->
269    expand_uri(Cs, L, "/:ptths", As);
270expand_uri("xmpp:/" ++ Cs, L, As) ->
271    expand_uri(Cs, L, "/:ppmx", As);
272expand_uri(Cs, L, As) ->
273    expand(Cs, L, [$[ | As]).
274
275expand_uri([$] | Cs], L, Us, As) ->
276    expand(Cs, L, push_uri(Us, ">tt/<" ++ Us ++ ">tt<", As));
277expand_uri([$\s = C | Cs], L, Us, As) ->
278    expand_uri(Cs, 0, L, [C], Us, As);
279expand_uri([$\t = C | Cs], L, Us, As) ->
280    expand_uri(Cs, 0, L, [C], Us, As);
281expand_uri([$\n = C | Cs], L, Us, As) ->
282    expand_uri(Cs, 1, L, [C], Us, As);
283expand_uri([C | Cs], L, Us, As) ->
284    expand_uri(Cs, L, [C | Us], As);
285expand_uri([], L, Us, _As) ->
286    expand_uri_error(Us, L).
287
288expand_uri([$] | Cs], N, L, Ss, Us, As) ->
289    Ss1 = lists:reverse(edoc_lib:strip_space(
290			  lists:reverse(edoc_lib:strip_space(Ss)))),
291    expand(Cs, L + N, push_uri(Us, Ss1, As));
292expand_uri([$\n = C | Cs], N, L, Ss, Us, As) ->
293    expand_uri(Cs, N + 1, L, [C | Ss], Us, As);
294expand_uri([C | Cs], N, L, Ss, Us, As) ->
295    expand_uri(Cs, N, L, [C | Ss], Us, As);
296expand_uri([], _, L, _Ss, Us, _As) ->
297    expand_uri_error(Us, L).
298
299-spec expand_uri_error(list(), pos_integer()) -> no_return().
300
301expand_uri_error(Us, L) ->
302    {Ps, _} = edoc_lib:split_at(lists:reverse(Us), $:),
303    throw_error(L, {"reference '[~ts:...' ended unexpectedly", [Ps]}).
304
305
306push_uri(Us, Ss, As) ->
307    ">a/<" ++ Ss ++ ">\"pot_\"=tegrat \"" ++ Us ++ "\"=ferh a<" ++ As.
308
309
310strip_empty_lines(Cs) ->
311    strip_empty_lines(Cs, 0).
312
313strip_empty_lines([], N) ->
314    {[], N};					% reached the end of input
315strip_empty_lines(Cs, N) ->
316    {Cs1, Cs2} = edoc_lib:split_at(Cs, $\n),
317    case edoc_lib:is_space(Cs1) of
318	true ->
319	    strip_empty_lines(Cs2, N + 1);
320	false ->
321	    {Cs, N}
322    end.
323
324
325%% Scanning element content for paragraph breaks (empty lines).
326%% Paragraphs are flushed by block level elements.
327
328par(Es) ->
329    par(Es, [], []).
330
331par([E=#xmlText{value = Value} | Es], As, Bs) ->
332    par_text(Value, As, Bs, E, Es);
333par([E=#xmlElement{name = Name} | Es], As, Bs) ->
334    %% (Note that paragraphs may not contain any further block-level
335    %% elements, including other paragraphs. Tables get complicated.)
336    case Name of
337	'p'          -> par_flush(Es, [E | As], Bs);
338	'hr'         -> par_flush(Es, [E | As], Bs);
339	'h1'         -> par_flush(Es, [E | As], Bs);
340	'h2'         -> par_flush(Es, [E | As], Bs);
341	'h3'         -> par_flush(Es, [E | As], Bs);
342	'h4'         -> par_flush(Es, [E | As], Bs);
343	'h5'         -> par_flush(Es, [E | As], Bs);
344	'h6'         -> par_flush(Es, [E | As], Bs);
345	'pre'        -> par_flush(Es, [E | As], Bs);
346	'address'    -> par_flush(Es, [E | As], Bs);
347	'div'        -> par_flush(Es, [par_elem(E) | As], Bs);
348	'blockquote' -> par_flush(Es, [par_elem(E) | As], Bs);
349	'form'       -> par_flush(Es, [par_elem(E) | As], Bs);
350	'fieldset'   -> par_flush(Es, [par_elem(E) | As], Bs);
351	'noscript'   -> par_flush(Es, [par_elem(E) | As], Bs);
352	'ul'         -> par_flush(Es, [par_subelem(E) | As], Bs);
353	'ol'         -> par_flush(Es, [par_subelem(E) | As], Bs);
354	'dl'         -> par_flush(Es, [par_subelem(E) | As], Bs);
355	'table'      -> par_flush(Es, [par_subelem(E) | As], Bs);
356	_            -> par(Es, [E | As], Bs)
357    end;
358par([E | Es], As, Bs) ->
359    par(Es, [E | As], Bs);
360par([], As, Bs) ->
361    lists:reverse(As ++ Bs).
362
363par_text(Cs, As, Bs, E, Es) ->
364    case ptxt(Cs) of
365	none ->
366	    %% no blank lines: keep this element as it is
367	    par(Es, [E | As], Bs);
368	{Cs1, Ss, Cs2} ->
369	    Es1 = case Cs1 of
370		      [] -> lists:reverse(As);
371		      _ -> lists:reverse(As, [E#xmlText{value = Cs1}])
372		  end,
373	    Bs0 = case Es1 of
374		      [] -> Bs;
375		      _ -> [#xmlElement{name = p, content = Es1} | Bs]
376		  end,
377	    Bs1 = [#xmlText{value = Ss} | Bs0],
378	    case Cs2 of
379		[] ->
380		    par(Es, [], Bs1);
381		_ ->
382		    par_text(Cs2, [], Bs1, #xmlText{value = Cs2}, Es)
383	    end
384    end.
385
386par_flush(Es, As, Bs) ->
387    par(Es, [], As ++ Bs).
388
389par_elem(E) ->
390    E#xmlElement{content = par(E#xmlElement.content)}.
391
392%% Only process content of subelements; ignore immediate content.
393par_subelem(E) ->
394    E#xmlElement{content = par_subelem_1(E#xmlElement.content)}.
395
396par_subelem_1([E=#xmlElement{name = Name} | Es]) ->
397    E1 = case par_skip(Name) of
398	     true ->
399		 E;
400	     false ->
401		 case par_sub(Name) of
402		     true ->
403			 par_subelem(E);
404		     false ->
405			 par_elem(E)
406		 end
407	 end,
408    [E1 | par_subelem_1(Es)];
409par_subelem_1([E | Es]) ->
410    [E | par_subelem_1(Es)];
411par_subelem_1([]) ->
412    [].
413
414par_skip('caption') -> true;
415par_skip('col') -> true;
416par_skip('colgroup') -> true;
417par_skip(_) -> false.
418
419par_sub(tr) -> true;
420par_sub(thead) -> true;
421par_sub(tfoot) -> true;
422par_sub(tbody) -> true;
423par_sub(_) -> false.
424
425
426%% scanning text content for a blank line
427
428ptxt(Cs) ->
429    ptxt(Cs, []).
430
431ptxt([$\n | Cs], As) ->
432    ptxt_1(Cs, As, [$\n]);
433ptxt([C | Cs], As) ->
434    ptxt(Cs, [C | As]);
435ptxt([], _As) ->
436    none.
437
438%% scanning text following an initial newline
439ptxt_1([C=$\s | Cs], As, Ss) ->
440    ptxt_1(Cs, As, [C | Ss]);
441ptxt_1([C=$\t | Cs], As, Ss) ->
442    ptxt_1(Cs, As, [C | Ss]);
443ptxt_1([C=$\n | Cs], As, Ss) ->
444    %% blank line detected
445    ptxt_2(Cs, As, [C | Ss]);
446ptxt_1(Cs, As, Ss) ->
447    %% not a blank line
448    ptxt(Cs, lists:reverse(Ss, As)).
449
450%% collecting whitespace following a blank line
451ptxt_2([C=$\s | Cs], As, Ss) ->
452    ptxt_2(Cs, As, [C | Ss]);
453ptxt_2([C=$\t | Cs], As, Ss) ->
454    ptxt_2(Cs, As, [C | Ss]);
455ptxt_2([C=$\n | Cs], As, Ss) ->
456    ptxt_2(Cs, As, [C | Ss]);
457ptxt_2(Cs, As, Ss) ->
458    %% ended by non-whitespace or end of element
459    case edoc_lib:is_space(As) of
460	true ->
461	    {[], lists:reverse(Ss ++ As), Cs};
462	false ->
463	    {lists:reverse(As), lists:reverse(Ss), Cs}
464    end.
465
466
467-spec throw_error(non_neg_integer(), {string(), [_]}) -> no_return().
468
469throw_error(L, D) ->
470    throw({error, L, D}).
471