1%% ===================================================================== 2%% Licensed under the Apache License, Version 2.0 (the "License"); you may 3%% not use this file except in compliance with the License. You may obtain 4%% a copy of the License at <http://www.apache.org/licenses/LICENSE-2.0> 5%% 6%% Unless required by applicable law or agreed to in writing, software 7%% distributed under the License is distributed on an "AS IS" BASIS, 8%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9%% See the License for the specific language governing permissions and 10%% limitations under the License. 11%% 12%% Alternatively, you may use this file under the terms of the GNU Lesser 13%% General Public License (the "LGPL") as published by the Free Software 14%% Foundation; either version 2.1, or (at your option) any later version. 15%% If you wish to allow use of your version of this file only under the 16%% terms of the LGPL, you should delete the provisions above and replace 17%% them with the notice and other provisions required by the LGPL; see 18%% <http://www.gnu.org/licenses/>. If you do not delete the provisions 19%% above, a recipient may use your version of this file under the terms of 20%% either the Apache License or the LGPL. 21%% 22%% @private 23%% @copyright 2001-2003 Richard Carlsson 24%% @author Richard Carlsson <carlsson.richard@gmail.com> 25%% @see edoc 26%% @end 27%% ===================================================================== 28 29%% @doc EDoc wiki expansion, parsing and postprocessing of XML text. 30%% Uses {@link //xmerl. XMerL}. 31%% @end 32 33%% Notes: 34%% 35%% * Whatever happens in this module, it must interact nicely with the 36%% actual XML-parsing. It is not acceptable to break any existing and 37%% legal XML markup so that it does not parse or is rendered wrong. 38%% 39%% * The focus should always be on making *documentation* easier to 40%% write. No wiki notation should be introduced unless it is clear that 41%% it is better than using plain XHTML, making typing less cumbersome 42%% and the resulting text easier to read. The wiki notation should be a 43%% small bag of easy-to-remember tricks for making XHTML documentation 44%% easier to write, not a complete markup language in itself. As a 45%% typical example, it is hardly worthwile to introduce a special 46%% notation like say, ""..."" for emphasized text, since <em>...</em> is 47%% not much harder to write, not any less readable, and no more 48%% difficult to remember, especially since emphasis is not very often 49%% occurring in normal documentation. 50%% 51%% * The central reasoning for the code-quoting goes like this: I don't 52%% want to have special escape characters within the quotes (like 53%% backslash in C), to allow quoting of the quote characters themselves. 54%% I also don't want to use the "`" character both for opening and 55%% closing quotes. Therefore, you can either use `...' - and then you 56%% cannot use the "'" character without ending the quote - or you can 57%% use ``...'' - which allows single but not double "'" characters 58%% within the quote. Whitespace is automatically removed from the 59%% beginning and the end of the quoted strings; this allows you to write 60%% things like "`` 'foo@bar' ''". Text that contains "''" has to be 61%% written within <code>...</code>. 62%% 63%% To produce a single "`" character without starting a quote, write 64%% "`'" (no space between "`" and "'"). 65%% 66%% For verbatim/preformatted text, the ```...'''-quotes expand to 67%% "<pre><![CDATA[...]]></pre>". The indentation at the start of the 68%% quoted string is preserved; whitespace is stripped only at the end. 69%% Whole leading lines of whitespace are however skipped. 70 71-module(edoc_wiki). 72 73-export([parse_xml/2, expand_text/2]). 74 75-include("edoc.hrl"). 76-include_lib("xmerl/include/xmerl.hrl"). 77 78-define(BASE_HEADING, 3). 79 80 81%% Parsing Wiki-XML with pre-and post-expansion. 82 83parse_xml(Data, Line) -> 84 par(parse_xml_1(expand_text(Data, Line), Line)). 85 86parse_xml_1(Text, Line) -> 87 Text1 = "<doc>" ++ Text ++ "</doc>", 88 %% Any coding except "utf-8". 89 Opts = [{line, Line}, {encoding, 'iso-8859-1'}], 90 case catch {ok, xmerl_scan:string(Text1, Opts)} of 91 {ok, {E, _}} -> 92 E#xmlElement.content; 93 {'EXIT', {fatal, {Reason, L, _C}}} -> 94 throw_error(L, {"XML parse error: ~p.", [Reason]}); 95 {'EXIT', Reason} -> 96 throw_error(Line, {"error in XML parser: ~P.", [Reason, 10]}); 97 Other -> 98 throw_error(Line, {"nocatch in XML parser: ~P.", [Other, 10]}) 99 end. 100 101%% Expand wiki stuff in arbitrary text. 102 103expand_text(Cs, L) -> 104 lists:reverse(expand_new_line(Cs, L, [])). 105 106%% Interestingly, the reverse of "code" is "edoc". :-) 107 108expand_new_line([$\s = C | Cs], L, As) -> 109 expand_new_line(Cs, L, [C | As]); 110expand_new_line([$\t = C | Cs], L, As) -> 111 expand_new_line(Cs, L, [C | As]); 112expand_new_line([$\n = C | Cs], L, As) -> 113 expand_new_line(Cs, L + 1, [C | As]); 114expand_new_line([$=, $=, $=, $= | Cs], L, As) -> 115 expand_heading(Cs, 2, L, As); 116expand_new_line([$=, $=, $= | Cs], L, As) -> 117 expand_heading(Cs, 1, L, As); 118expand_new_line([$=, $= | Cs], L, As) -> 119 expand_heading(Cs, 0, L, As); 120expand_new_line(Cs, L, As) -> 121 expand(Cs, L, As). 122 123expand([$`, $' | Cs], L, As) -> 124 expand(Cs, L, [$` | As]); % produce "`" - don't start a new quote 125expand([$`, $`, $` | Cs], L, As) -> 126 %% If this is the first thing on the line, compensate for the 127 %% indentation, unless we had to skip one or more empty lines. 128 {Cs1, Skipped} = strip_empty_lines(Cs), % avoid vertical space 129 N = if Skipped > 0 -> 130 0; 131 true -> 132 {As1, _} = edoc_lib:split_at(As, $\n), 133 case edoc_lib:is_space(As1) of 134 true -> 3 + length(As1); 135 false -> 2 % nice default - usually right. 136 end 137 end, 138 Ss = lists:duplicate(N, $\s), 139 expand_triple(Cs1, L + Skipped, Ss ++ "[ATADC[!<>erp<" ++ As); 140expand([$`, $` | Cs], L, As) -> 141 expand_double(edoc_lib:strip_space(Cs), L, ">edoc<" ++ As); 142expand([$` | Cs], L, As) -> 143 expand_single(edoc_lib:strip_space(Cs), L, ">edoc<" ++ As); 144expand([$[ | Cs], L, As) -> 145 expand_uri(Cs, L, As); 146expand([$\n = C | Cs], L, As) -> 147 expand_new_line(Cs, L + 1, [C | As]); 148expand([C | Cs], L, As) -> 149 expand(Cs, L, [C | As]); 150expand([], _, As) -> 151 As. 152 153%% == Heading == 154%% === SubHeading === 155%% ==== SubSubHeading ==== 156 157expand_heading([$= | _] = Cs, N, L, As) -> 158 expand_heading_1(Cs, N, L, As); 159expand_heading(Cs, N, L, As) -> 160 {Cs1, Cs2} = edoc_lib:split_at(Cs, $\n), 161 case edoc_lib:strip_space(lists:reverse(Cs1)) of 162 [$=, $= | Cs3] -> 163 {Es, Ts} = lists:splitwith(fun (X) -> X =:= $= end, Cs3), 164 if length(Es) =:= N -> 165 Ts1 = edoc_lib:strip_space( 166 lists:reverse(edoc_lib:strip_space(Ts))), 167 expand_heading_2(Ts1, Cs2, N, L, As); 168 true -> 169 H1 = lists:duplicate(N+2, $=), 170 H2 = "==" ++ Es, 171 throw_error(L, {"heading end marker mismatch: " 172 "~s...~s", [H1, H2]}) 173 end; 174 _ -> 175 expand_heading_1(Cs, N, L, As) 176 end. 177 178expand_heading_1(Cs, N, L, As) -> 179 expand(Cs, L, lists:duplicate(N + 2, $=) ++ As). 180 181expand_heading_2(Ts, Cs, N, L, As) -> 182 H = ?BASE_HEADING + N, 183 Ts1 = io_lib:format("<h~w><a name=\"~ts\">~ts</a></h~w>\n", 184 [H, make_label(Ts), Ts, H]), 185 expand_new_line(Cs, L + 1, lists:reverse(lists:flatten(Ts1), As)). 186 187make_label([$\s | Cs]) -> 188 [$_ | make_label(edoc_lib:strip_space(Cs))]; 189make_label([$\t | Cs]) -> 190 [$_ | make_label(edoc_lib:strip_space(Cs))]; 191make_label([$\n | Cs]) -> 192 [$_ | make_label(edoc_lib:strip_space(Cs))]; 193make_label([C | Cs]) -> 194 [C | make_label(Cs)]; 195make_label([]) -> 196 []. 197 198%% `...' 199 200expand_single(Cs, L, As) -> 201 expand_single(Cs, L, As, L). 202 203expand_single([$' | Cs], L, As, _L0) -> 204 expand(Cs, L, ">edoc/<" ++ edoc_lib:strip_space(As)); 205expand_single([$< | Cs], L, As, L0) -> 206 expand_single(Cs, L, ";tl&" ++ As, L0); 207expand_single([$> | Cs], L, As, L0) -> 208 expand_single(Cs, L, ";tg&" ++ As, L0); 209expand_single([$& | Cs], L, As, L0) -> 210 expand_single(Cs, L, ";pma&" ++ As, L0); 211expand_single([$\n = C | Cs], L, As, L0) -> 212 expand_single(Cs, L + 1, [C | As], L0); 213expand_single([C | Cs], L, As, L0) -> 214 expand_single(Cs, L, [C | As], L0); 215expand_single([], L, _, L0) -> 216 throw_error(L0, {"`-quote ended unexpectedly at line ~w", [L]}). 217 218%% ``...'' 219 220expand_double(Cs, L, As) -> 221 expand_double(Cs, L, As, L). 222 223expand_double([$', $' | Cs], L, As, _L0) -> 224 expand(Cs, L, ">edoc/<" ++ edoc_lib:strip_space(As)); 225expand_double([$< | Cs], L, As, L0) -> 226 expand_double(Cs, L, ";tl&" ++ As, L0); 227expand_double([$> | Cs], L, As, L0) -> 228 expand_double(Cs, L, ";tg&" ++ As, L0); 229expand_double([$& | Cs], L, As, L0) -> 230 expand_double(Cs, L, ";pma&" ++ As, L0); 231expand_double([$\n = C | Cs], L, As, L0) -> 232 expand_double(Cs, L + 1, [C | As], L0); 233expand_double([C | Cs], L, As, L0) -> 234 expand_double(Cs, L, [C | As], L0); 235expand_double([], L, _, L0) -> 236 throw_error(L0, {"``-quote ended unexpectedly at line ~w", [L]}). 237 238%% ```...''' 239 240expand_triple(Cs, L, As) -> 241 expand_triple(Cs, L, As, L). 242 243expand_triple([$', $', $' | Cs], L, As, _L0) -> % ' stupid emacs 244 expand(Cs, L, ">erp/<>]]" ++ edoc_lib:strip_space(As)); 245expand_triple([$], $], $> | Cs], L, As, L0) -> 246 expand_triple(Cs, L, ";tg&]]" ++ As, L0); 247expand_triple([$\n = C | Cs], L, As, L0) -> 248 expand_triple(Cs, L + 1, [C | As], L0); 249expand_triple([C | Cs], L, As, L0) -> 250 expand_triple(Cs, L, [C | As], L0); 251expand_triple([], L, _, L0) -> 252 throw_error(L0, {"```-quote ended unexpectedly at line ~w", [L]}). 253 254%% e.g. [file:/...] or [http://... LinkText] 255 256expand_uri("http:/" ++ Cs, L, As) -> 257 expand_uri(Cs, L, "/:ptth", As); 258expand_uri("https:/" ++ Cs, L, As) -> 259 expand_uri(Cs, L, "/:sptth", As); 260expand_uri("ftp:/" ++ Cs, L, As) -> 261 expand_uri(Cs, L, "/:ptf", As); 262expand_uri("file:/" ++ Cs, L, As) -> 263 expand_uri(Cs, L, "/:elif", As); 264expand_uri("mailto:/" ++ Cs, L, As) -> 265 expand_uri(Cs, L, "/:otliam", As); 266expand_uri("nfs:/" ++ Cs, L, As) -> 267 expand_uri(Cs, L, "/:sfn", As); 268expand_uri("shttp:/" ++ Cs, L, As) -> 269 expand_uri(Cs, L, "/:ptths", As); 270expand_uri("xmpp:/" ++ Cs, L, As) -> 271 expand_uri(Cs, L, "/:ppmx", As); 272expand_uri(Cs, L, As) -> 273 expand(Cs, L, [$[ | As]). 274 275expand_uri([$] | Cs], L, Us, As) -> 276 expand(Cs, L, push_uri(Us, ">tt/<" ++ Us ++ ">tt<", As)); 277expand_uri([$\s = C | Cs], L, Us, As) -> 278 expand_uri(Cs, 0, L, [C], Us, As); 279expand_uri([$\t = C | Cs], L, Us, As) -> 280 expand_uri(Cs, 0, L, [C], Us, As); 281expand_uri([$\n = C | Cs], L, Us, As) -> 282 expand_uri(Cs, 1, L, [C], Us, As); 283expand_uri([C | Cs], L, Us, As) -> 284 expand_uri(Cs, L, [C | Us], As); 285expand_uri([], L, Us, _As) -> 286 expand_uri_error(Us, L). 287 288expand_uri([$] | Cs], N, L, Ss, Us, As) -> 289 Ss1 = lists:reverse(edoc_lib:strip_space( 290 lists:reverse(edoc_lib:strip_space(Ss)))), 291 expand(Cs, L + N, push_uri(Us, Ss1, As)); 292expand_uri([$\n = C | Cs], N, L, Ss, Us, As) -> 293 expand_uri(Cs, N + 1, L, [C | Ss], Us, As); 294expand_uri([C | Cs], N, L, Ss, Us, As) -> 295 expand_uri(Cs, N, L, [C | Ss], Us, As); 296expand_uri([], _, L, _Ss, Us, _As) -> 297 expand_uri_error(Us, L). 298 299-spec expand_uri_error(list(), pos_integer()) -> no_return(). 300 301expand_uri_error(Us, L) -> 302 {Ps, _} = edoc_lib:split_at(lists:reverse(Us), $:), 303 throw_error(L, {"reference '[~ts:...' ended unexpectedly", [Ps]}). 304 305 306push_uri(Us, Ss, As) -> 307 ">a/<" ++ Ss ++ ">\"pot_\"=tegrat \"" ++ Us ++ "\"=ferh a<" ++ As. 308 309 310strip_empty_lines(Cs) -> 311 strip_empty_lines(Cs, 0). 312 313strip_empty_lines([], N) -> 314 {[], N}; % reached the end of input 315strip_empty_lines(Cs, N) -> 316 {Cs1, Cs2} = edoc_lib:split_at(Cs, $\n), 317 case edoc_lib:is_space(Cs1) of 318 true -> 319 strip_empty_lines(Cs2, N + 1); 320 false -> 321 {Cs, N} 322 end. 323 324 325%% Scanning element content for paragraph breaks (empty lines). 326%% Paragraphs are flushed by block level elements. 327 328par(Es) -> 329 par(Es, [], []). 330 331par([E=#xmlText{value = Value} | Es], As, Bs) -> 332 par_text(Value, As, Bs, E, Es); 333par([E=#xmlElement{name = Name} | Es], As, Bs) -> 334 %% (Note that paragraphs may not contain any further block-level 335 %% elements, including other paragraphs. Tables get complicated.) 336 case Name of 337 'p' -> par_flush(Es, [E | As], Bs); 338 'hr' -> par_flush(Es, [E | As], Bs); 339 'h1' -> par_flush(Es, [E | As], Bs); 340 'h2' -> par_flush(Es, [E | As], Bs); 341 'h3' -> par_flush(Es, [E | As], Bs); 342 'h4' -> par_flush(Es, [E | As], Bs); 343 'h5' -> par_flush(Es, [E | As], Bs); 344 'h6' -> par_flush(Es, [E | As], Bs); 345 'pre' -> par_flush(Es, [E | As], Bs); 346 'address' -> par_flush(Es, [E | As], Bs); 347 'div' -> par_flush(Es, [par_elem(E) | As], Bs); 348 'blockquote' -> par_flush(Es, [par_elem(E) | As], Bs); 349 'form' -> par_flush(Es, [par_elem(E) | As], Bs); 350 'fieldset' -> par_flush(Es, [par_elem(E) | As], Bs); 351 'noscript' -> par_flush(Es, [par_elem(E) | As], Bs); 352 'ul' -> par_flush(Es, [par_subelem(E) | As], Bs); 353 'ol' -> par_flush(Es, [par_subelem(E) | As], Bs); 354 'dl' -> par_flush(Es, [par_subelem(E) | As], Bs); 355 'table' -> par_flush(Es, [par_subelem(E) | As], Bs); 356 _ -> par(Es, [E | As], Bs) 357 end; 358par([E | Es], As, Bs) -> 359 par(Es, [E | As], Bs); 360par([], As, Bs) -> 361 lists:reverse(As ++ Bs). 362 363par_text(Cs, As, Bs, E, Es) -> 364 case ptxt(Cs) of 365 none -> 366 %% no blank lines: keep this element as it is 367 par(Es, [E | As], Bs); 368 {Cs1, Ss, Cs2} -> 369 Es1 = case Cs1 of 370 [] -> lists:reverse(As); 371 _ -> lists:reverse(As, [E#xmlText{value = Cs1}]) 372 end, 373 Bs0 = case Es1 of 374 [] -> Bs; 375 _ -> [#xmlElement{name = p, content = Es1} | Bs] 376 end, 377 Bs1 = [#xmlText{value = Ss} | Bs0], 378 case Cs2 of 379 [] -> 380 par(Es, [], Bs1); 381 _ -> 382 par_text(Cs2, [], Bs1, #xmlText{value = Cs2}, Es) 383 end 384 end. 385 386par_flush(Es, As, Bs) -> 387 par(Es, [], As ++ Bs). 388 389par_elem(E) -> 390 E#xmlElement{content = par(E#xmlElement.content)}. 391 392%% Only process content of subelements; ignore immediate content. 393par_subelem(E) -> 394 E#xmlElement{content = par_subelem_1(E#xmlElement.content)}. 395 396par_subelem_1([E=#xmlElement{name = Name} | Es]) -> 397 E1 = case par_skip(Name) of 398 true -> 399 E; 400 false -> 401 case par_sub(Name) of 402 true -> 403 par_subelem(E); 404 false -> 405 par_elem(E) 406 end 407 end, 408 [E1 | par_subelem_1(Es)]; 409par_subelem_1([E | Es]) -> 410 [E | par_subelem_1(Es)]; 411par_subelem_1([]) -> 412 []. 413 414par_skip('caption') -> true; 415par_skip('col') -> true; 416par_skip('colgroup') -> true; 417par_skip(_) -> false. 418 419par_sub(tr) -> true; 420par_sub(thead) -> true; 421par_sub(tfoot) -> true; 422par_sub(tbody) -> true; 423par_sub(_) -> false. 424 425 426%% scanning text content for a blank line 427 428ptxt(Cs) -> 429 ptxt(Cs, []). 430 431ptxt([$\n | Cs], As) -> 432 ptxt_1(Cs, As, [$\n]); 433ptxt([C | Cs], As) -> 434 ptxt(Cs, [C | As]); 435ptxt([], _As) -> 436 none. 437 438%% scanning text following an initial newline 439ptxt_1([C=$\s | Cs], As, Ss) -> 440 ptxt_1(Cs, As, [C | Ss]); 441ptxt_1([C=$\t | Cs], As, Ss) -> 442 ptxt_1(Cs, As, [C | Ss]); 443ptxt_1([C=$\n | Cs], As, Ss) -> 444 %% blank line detected 445 ptxt_2(Cs, As, [C | Ss]); 446ptxt_1(Cs, As, Ss) -> 447 %% not a blank line 448 ptxt(Cs, lists:reverse(Ss, As)). 449 450%% collecting whitespace following a blank line 451ptxt_2([C=$\s | Cs], As, Ss) -> 452 ptxt_2(Cs, As, [C | Ss]); 453ptxt_2([C=$\t | Cs], As, Ss) -> 454 ptxt_2(Cs, As, [C | Ss]); 455ptxt_2([C=$\n | Cs], As, Ss) -> 456 ptxt_2(Cs, As, [C | Ss]); 457ptxt_2(Cs, As, Ss) -> 458 %% ended by non-whitespace or end of element 459 case edoc_lib:is_space(As) of 460 true -> 461 {[], lists:reverse(Ss ++ As), Cs}; 462 false -> 463 {lists:reverse(As), lists:reverse(Ss), Cs} 464 end. 465 466 467-spec throw_error(non_neg_integer(), {string(), [_]}) -> no_return(). 468 469throw_error(L, D) -> 470 throw({error, L, D}). 471