1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2009-2015. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19 20-module(demo_html_tagger). 21 22%% You will notice that this program has very few type declarations 23%% That's because this program uses some pretty dodgy techniques to 24%% get at the data it requires. 25 26%% I use epp_dodger to parse the file and the new imporved erl_scan 27%% find the exact values of the tokens 28 29%% epp_dodger returns an objects of type erl_syntax which are pretty 30%% nasty buggers. We could write the types out but it would hardly 31%% help. 32 33%% to test run 34 35%%-compile(export_all). 36 37 38-export([erl2htmltext/1, erl2htmlfile/1]). 39 40erl2htmltext(File) -> 41 try 42 erl2html0(File) 43 catch 44 What:Why -> 45 io:format("error in:~s ~p ~p~n",[File,What,Why]) 46 end. 47 48erl2htmlfile(File) -> 49 try 50 Text = erl2html0(File), 51 Root = filename:basename(filename:rootname(File)), 52 Out = "./html/" ++ Root ++ ".html", 53 file:write_file(Out, [Text]) 54 catch 55 What:Why -> 56 io:format("error in:~s ~p ~p~n",[File,What,Why]) 57 end. 58 59 60splitErlang(File) -> 61 {ok, Forms} = dodge_file(File), 62 {Anchors, Patches} = analyse(Forms), 63 Raw = read_raw_forms(File), 64 Raw1 = merge_anchors(Anchors, Raw), 65 Raw2 = merge_forms(Raw1, Patches, []), 66 Rtf = [final(I) || I <- Raw2], 67 {taggedBlocks, Rtf}. 68 69erl2html0(File) -> 70 Tb = splitErlang(File), 71 Html = to_html(Tb), 72 prelude(Html). 73 74merge_forms([{Tag,L1}|T], Patches, L) -> 75 {L2, Patches1} = apply_patches(L1, Patches), 76 merge_forms(T, Patches1, [{Tag,L2}|L]); 77merge_forms([], _, L) -> 78 lists:reverse(L). 79 80apply_patches(Toks, []) -> 81 %% we've run out of patches but we must still simplify 82 %% every term 83 {[simplify(I) || I <- Toks], []}; 84apply_patches(Toks, Patches) -> 85 apply_patches(Toks, Patches, []). 86 87apply_patches([{atom,Ln,Val}=A|T], Patches, L) -> 88 case do_patch(Ln, Patches) of 89 {yes, New, Patches1} -> 90 New1 = reformat(New, Val), 91 apply_patches(T, Patches1, [New1|L]); 92 {no, Patches1} -> 93 apply_patches(T, Patches1, [simplify(A)|L]) 94 end; 95apply_patches([H|T], Patches, L) -> 96 apply_patches(T, Patches, [simplify(H)|L]); 97apply_patches([], Patches, L) -> 98 {lists:reverse(L), Patches}. 99 100 101simplify({atom,_,Str}) -> 102 case (catch list_to_existing_atom(Str)) of 103 {'EXIT', _} -> 104 {atom, Str}; 105 A -> 106 case is_keyword(A) of 107 true -> {keyword, Str}; 108 false -> 109 {atom, Str} 110 end 111 end; 112simplify({dot,_,Str}) -> 113 {terminal, Str}; 114simplify({Tag,_,Str}) -> 115 case is_keyword(Tag) of 116 true -> 117 {keyword, Str}; 118 false -> 119 case is_terminal(Tag) of 120 true -> 121 {terminal, Str}; 122 false -> 123 {Tag, Str} 124 end 125 end; 126simplify(X) -> 127 io:format("simplify wtfit:~p~n",[X]), 128 X. 129 130do_patch(Ln, [{Ln,Tag}|P]) -> {yes, Tag, P}; 131do_patch(Ln, [{Ln1,_}|_] = P) when Ln1 > Ln -> {no, P}; 132do_patch(Ln, [_|T]) -> do_patch(Ln, T); 133do_patch(_, []) -> {no, []}. 134 135reformat({local,{F,A}}, Str) -> {local,F,A,Str}; 136reformat({remote,M,F,A}, Str) -> {remote,M,F,A,Str}; 137reformat({remote,{M,F,A}}, Str) -> {remote,M,F,A,Str}; 138reformat({bif,{F,A}}, Str) -> {bif,F,A,Str}; 139reformat(Tag, Str) -> 140 io:format("reformat*:~p ~p~n",[Tag,Str]), 141 {Tag,Str}. 142 143to_html({taggedBlocks, L}) -> 144 [[anchor1(Anchor),to_html(Body)] || {Anchor,Body} <- L]; 145to_html({taggedToks, L}) -> 146 [to_html1(I) || I <- L]. 147 148anchor1({func, F, A}) -> 149 ["<a name='",linkname(F,A),"'></a>"]; 150anchor1({specification, F, A}) -> 151 ["<a name='",linkname(F,A),"'></a>"]; 152anchor1(_X) -> 153 "". 154 155linkname(F, A) when is_atom(F) -> 156 a2s(F) ++ "-" ++ integer_to_list(A); 157linkname(F, A) when is_list(F) -> 158 F ++ "-" ++ integer_to_list(A). 159 160a2s(A) -> 161 atom_to_list(A). 162 163font(C, S) -> 164 ["<font color=\"", C, "\">", htmlquote(S), "</font>"]. 165 166htmlquote("<" ++ T) -> ["<"|htmlquote(T)]; 167htmlquote([H|T]) -> [H|htmlquote(T)]; 168htmlquote([]) -> []. 169 170to_html1({white_space,V}) -> V; 171to_html1({comment, V}) -> font("#B22222", V); 172to_html1({var,V}) -> font("orange", V); 173to_html1({string,V}) -> font("#FA8072", V); 174to_html1({integer,V}) -> font("#1111AA", V); 175to_html1({bif,_F,_A,Str}) -> font("#FF00FF", Str); 176to_html1({keyword, V}) -> font("#FF00FF", V); 177to_html1({atom, V}) -> V; 178to_html1({terminal,V}) -> V; 179to_html1({char,V}) -> V; 180to_html1({float,V}) -> V; 181to_html1({anchor,F,A}) -> 182 ["<a name='",linkname(F,A),"'></a>"]; 183to_html1({local,F,A,Str}) -> 184 ["<a href='#",linkname(F,A),"'>", 185 htmlquote(Str),"</a>"]; 186to_html1({remote,_M,_F,_A,Str}) -> 187 %%["<a href='",htmlname(M), "#",linkname(F,A),"'>",htmlquote(Str),"</a>"], 188 Str. 189 190%% merge the anchors 191%% there should be one block per anchor 192%% we check the containing form (for safety) 193 194%% merge_anchors([{_,{file,_}}|A], B) -> 195%% merge_anchors(A, B); 196merge_anchors([{Tag,Val}=H|A], [B|T]) -> 197 case contains(Tag, B) of 198 true -> 199 [{Val,B}|merge_anchors(A, T)]; 200 false -> 201 io:format("Logic error: H=~p B=~p~n",[H,B]), 202 exit(1) 203 end; 204merge_anchors([], []) -> []; 205merge_anchors([], [X]) -> 206 %% this is the last block - 207 %% trailing white space and comments have no tag 208 %% because eos is not a tag ... 209 [{eof, X}]; 210merge_anchors(X, Y) -> 211 io:format("ops:~p~n",[{X,Y}]), 212 []. 213 214contains(Loc, [{_,Loc,_}|_]) -> true; 215contains(Loc, [_|T]) -> contains(Loc, T); 216contains(_, []) -> false. 217 218 219dodge_file(File) -> 220 case file:open(File, [read]) of 221 {ok, Handle} -> 222 {ok, F} = epp_dodger:parse(Handle, {1,1}), 223 file:close(Handle), 224 L = [revert_forms(I) || I <- F], 225 {ok, L}; 226 Error -> 227 Error 228 end. 229 230revert_forms(F) -> 231 case erl_syntax:is_form(F) of 232 true -> 233 %% revert fails on ifdef ... etc 234 case (catch erl_syntax:revert(F)) of 235 {'EXIT', _Why} -> 236 io:format("error reverting:~p=~p~n",[F,_Why]), 237 F; 238 Other -> 239 Other 240 end; 241 false -> 242 io:format("uugh:~p~n",[F]) 243 end. 244 245%% read up to dot 246%% read_raw_forms(File) -> [form()] 247%% form() = [tok()] 248%% tok() = {Type,{Line::int,Col::int},string} 249%% Type = atom | int | var | string ... 250 251read_raw_forms(File) -> 252 {ok, Bin} = file:read_file(File), 253 Str = binary_to_list(Bin), 254 loop(erl_scan:tokens([], Str, {1,1}, [return,text]), []). 255 256loop({done, {eof,_}, eof}, L) -> 257 lists:reverse(L); 258loop({done, {ok, Toks, _}, eof}, L) -> 259 lists:reverse([normalize_toks(Toks)|L]); 260loop({done, {ok, Toks, Ln}, Str1}, L) -> 261 loop(erl_scan:tokens([], Str1, Ln, [return,text]), 262 [normalize_toks(Toks)|L]); 263loop({more, X}, L) -> 264 loop(erl_scan:tokens(X, eof, {1,1}, [return,text]), L). 265 266normalize_toks(Toks) -> 267 [normalize_tok(I) || I <- Toks]. 268 269normalize_tok(Tok) -> 270 %% this is the portable way ... 271 Type = erl_scan:category(Tok), 272 Line = erl_scan:line(Tok), 273 Col = erl_scan:column(Tok), 274 Txt = erl_scan:text(Tok), 275 Val = {Type,{Line,Col},Txt}, 276 %% io:format("here:X=~p ~p~n",[Tok,Val]), 277 Val. 278 279 280%% analse the result of dodge_file 281 282analyse(Forms) -> 283 Calls = calls(Forms), 284 Anchors = compute_anchors(Forms), 285 Imports = [{{F,A},Mod} || 286 {attribute,_,import,{Mod,L}} <- Forms, {F,A} <- L], 287 D = dict:from_list(Imports), 288 Patches = [{Loc, resolve(X, D)} || {Loc, X} <- Calls], 289 {Anchors, Patches}. 290 291%% An anchor is something that is placed at the start of every form 292%% The anchor is named after the first item in the form 293%% compute_anchors(Forms) -> [{{Line,Col}, anchor()}] 294%% {Line,Col} is the line and column of where the 295%% form starts - this is not the same as the first token in 296%% the form since we might have skipped comments and white space 297%% at the start of the form. 298%% anchor() is a term decscribing the anchor 299%% anchor(() = {func,Name,Aritry} (for functions) 300%% | 301%% | {Type,{Line,Col}} anythis else 302 303compute_anchors(Forms) -> 304 A1 = [anchor0(I) || I <- Forms], 305 merge_specs(A1). 306 307%% If a specification is immediately followed by 308%% a function when we promote the function anchor to point 309%% at the specification. 310%% We change the second tag to func2 - because we still want a 311%% tag for every block 312 313merge_specs([{_Ln1,{specification,F,A}}=H,{Ln2, {func,F,A}}|T]) -> 314 [H,{Ln2,{func1,F,A}}|merge_specs(T)]; 315merge_specs([H|T]) -> 316 [H|merge_specs(T)]; 317merge_specs([]) -> 318 []. 319 320anchor0(I) -> 321 case anchor(I) of 322 {{Line,Col,_,_}, Val} -> 323 {{Line,Col}, Val}; 324 {{_,_}, _} = X -> 325 X 326 end. 327 328anchor({function, Ln, F, A, _}) -> {Ln, {func, F, A}}; 329anchor({attribute,Ln,'spec', {{F,A},_}}) -> 330 {Ln, {specification,F,A}}; 331anchor({attribute,Ln,module, M}) -> 332 {Ln, {module,M}}; 333anchor({attribute,Ln,Type,_}) -> {Ln, {Type, Ln}}; 334anchor({eof,Ln}) -> {Ln, eof}; 335anchor({error,{Ln,_,_}}) -> 336 %% Ln is in a different format in errors (sigh) 337 {Line, Col} = Ln, 338 Ln1 = {Line,Col,0,""}, 339 {Ln1, {error, Ln}}; 340anchor({tree,attribute,{attr,{_,_,_,Type}=Ln,_,_},_}) -> 341 {Ln, {attribute,Type,Ln}}; 342anchor({tree,attribute,_, 343 {attribute, {atom,Ln,Type}, _}}) -> 344 {Ln, {attribute,Type,Ln}}; 345anchor({tree,attribute, 346 {attr,Ln,[],none}, 347 _}=X) -> 348 io:format("FIX ME this is a bug????:~p~n",[X]), 349 {Ln, {other, Ln}}; 350anchor(X) -> 351 %% this is some syntactic form that I don't know 352 %% about yet ... 353 io:format("FIX ME this is a bug????:~p~n",[X]), 354 exit(1). 355 356resolve({F,A}=Tup, D) -> 357 case dict:find({F,A}, D) of 358 {ok, Mod} -> 359 {remote,Mod,F,A}; 360 error -> 361 case erlang:is_builtin(erlang, F, A) of 362 true -> {bif, {F,A}}; 363 false -> {local,Tup} 364 end 365 end; 366resolve({erlang,F,A}, _) -> 367 {bif,{F,A}}; 368resolve({anchor,_,_}=A, _) -> 369 A; 370resolve(X, _D) -> 371 {remote, X}. 372 373calls(X) -> lists:reverse(calls(X, [])). 374 375calls({call,_,{atom,Ln,Func},Args}, L) -> 376 calls(Args, [{normalise(Ln),{Func,length(Args)}}|L]); 377calls({call,_,{remote,_,{atom,Ln1,Mod},{atom,_Ln2,Func}}, Args}, L) -> 378 calls(Args, [{normalise(Ln1),{Mod,Func,length(Args)}}|L]); 379calls(T, L) when is_tuple(T) -> 380 calls(tuple_to_list(T), L); 381calls([], L) -> 382 L; 383calls(T, L) when is_list(T) -> 384 lists:foldl(fun calls/2, L, T); 385calls(_, L) -> 386 L. 387 388normalise({_Line,_Col}=X) -> 389 X; 390normalise({Line,Col,_Len,_Text}) -> 391 {Line, Col}. 392 393 394prelude(L) -> 395 ["<html>\n" 396 "<head>\n" 397 "</head>\n" 398 "<body>\n" 399 "<ul><pre>\n",L,"\n</pre></ul></body>"]. 400 401 402final({Tag, Toks}) -> 403 {Tag, {taggedToks, final1(Tag, Toks)}}. 404 405final1({Tag,_,_}, Toks) when Tag =:= func; Tag =:= func1 -> 406 %% io:format("fix_remote:~p~n",[Toks]), 407 fix_remote(Toks); 408final1({export,_}, Toks) -> 409 fix_exports(Toks); 410final1({import,_}, Toks) -> 411 fix_imports(Toks); 412final1(_, Toks) -> 413 %% io:format("final:~p~n",[X]), 414 Toks. 415 416 417fix_imports(Toks) -> 418 %% io:format("fix imports:~p~n",[Toks]), 419 Mod = find_imported_module(Toks), 420 %% io:format("Mod =~p~n",[Mod]), 421 fix_imports(Toks, Mod). 422 423fix_imports([{atom,A},{terminal,"/"},{integer,N}|T], Mod) -> 424 [{remote, Mod,A,list_to_integer(N),A++"/"++N}| 425 fix_imports(T, Mod)]; 426fix_imports([H|T], Mod) -> 427 [H|fix_imports(T, Mod)]; 428fix_imports([], _) -> 429 []. 430 431%% skip to the atom import, then take the first atom after import 432find_imported_module([{atom,"import"}|T]) -> find_imported_module1(T); 433find_imported_module([_|T]) -> find_imported_module(T). 434 435find_imported_module1([{atom,M}|_]) -> list_to_atom(M); 436find_imported_module1([_|T]) -> find_imported_module1(T). 437 438%% won't work if there is white space between the symbols 439%% fix later 440 441fix_exports([{atom,A},{terminal,"/"},{integer,N}|T]) -> 442 [{local,A,list_to_integer(N),A++"/"++N}|fix_exports(T)]; 443fix_exports([H|T]) -> 444 [H|fix_exports(T)]; 445fix_exports([]) -> 446 []. 447 448%% fix_remote merges Mod : Func into a single string 449%% the problem is that 450%% we only tag the first atom in a remote call mod:func(...) 451%% mod is tagged as remote - but we want to 452%% extend the tagging to include the entire mod:func 453%% call ... 454 455fix_remote([{remote,M,F,A,Str},{terminal,":"},{atom,Str1}|T]) -> 456 [{remote,M,F,A,Str ++ ":" ++ Str1}|fix_remote(T)]; 457fix_remote([{remote,M,F,A,Str},{white_space,S1},{terminal,":"},{atom,Str1}|T]) -> 458 [{remote,M,F,A,Str ++ S1 ++ ":" ++ Str1}|fix_remote(T)]; 459fix_remote([{remote,M,F,A,Str},{white_space,S1},{terminal,":"},{white_space,S2},{atom,Str1}|T]) -> 460 [{remote,M,F,A,Str ++ S1 ++ ":" ++ S2 ++ Str1}|fix_remote(T)]; 461fix_remote([{remote,M,F,A,Str},{terminal,":"},{white_space,S2},{atom,Str1}|T]) -> 462 [{remote,M,F,A,Str ++ ":" ++ S2 ++ Str1}|fix_remote(T)]; 463fix_remote([H|T]) -> 464 [H|fix_remote(T)]; 465fix_remote([]) -> 466 []. 467 468-spec is_keyword(atom()) -> boolean(). 469 470is_keyword('after' ) -> true; 471is_keyword('and') -> true; 472is_keyword('andalso' ) -> true; 473is_keyword('band' ) -> true; 474is_keyword('begin' ) -> true; 475is_keyword('bnot' ) -> true; 476is_keyword('bor' ) -> true; 477is_keyword('bsl' ) -> true; 478is_keyword('bsr' ) -> true; 479is_keyword('bxor' ) -> true; 480is_keyword('case' ) -> true; 481is_keyword('catch' ) -> true; 482is_keyword('cond') -> true; 483is_keyword('div' ) -> true; 484is_keyword('end' ) -> true; 485is_keyword('fun' ) -> true; 486is_keyword('if' ) -> true; 487is_keyword('not') -> true; 488is_keyword('of' ) -> true; 489is_keyword('or' ) -> true; 490is_keyword('orelse' ) -> true; 491is_keyword('receive' ) -> true; 492is_keyword('rem' ) -> true; 493is_keyword('spec') -> true; 494is_keyword('try' ) -> true; 495is_keyword('when') -> true; 496is_keyword('xor') -> true; 497is_keyword(_) -> false. 498 499is_terminal('!') -> true; 500is_terminal('#') -> true; 501is_terminal('(') -> true; 502is_terminal(')') -> true; 503is_terminal('*') -> true; 504is_terminal('+') -> true; 505is_terminal('++') -> true; 506is_terminal(',') -> true; 507is_terminal('-') -> true; 508is_terminal('--') -> true; 509is_terminal('->') -> true; 510is_terminal('.') -> true; 511is_terminal('/') -> true; 512is_terminal('/=') -> true; 513is_terminal(':') -> true; 514is_terminal(':-') -> true; 515is_terminal('::') -> true; 516is_terminal(';') -> true; 517is_terminal('<') -> true; 518is_terminal('<-') -> true; 519is_terminal('<<') -> true; 520is_terminal('<=') -> true; 521is_terminal('=') -> true; 522is_terminal('=/=') -> true; 523is_terminal('=:=') -> true; 524is_terminal('=<') -> true; 525is_terminal('==') -> true; 526is_terminal('>') -> true; 527is_terminal('>=') -> true; 528is_terminal('>>') -> true; 529is_terminal('?') -> true; 530is_terminal('[') -> true; 531is_terminal(']') -> true; 532is_terminal('{') -> true; 533is_terminal('|') -> true; 534is_terminal('||') -> true; 535is_terminal('}') -> true; 536is_terminal(_) -> false. 537