1(***********************************************************************)
2(*                                                                     *)
3(*                          HEVEA                                      *)
4(*                                                                     *)
5(*  Luc Maranget, projet Moscova, INRIA Rocquencourt                   *)
6(*                                                                     *)
7(*  Copyright 2012 Institut National de Recherche en Informatique et   *)
8(*  Automatique.  Distributed only by permission.                      *)
9(*                                                                     *)
10(***********************************************************************)
11
12(* Extract/abstract style attributes *)
13
14{
15open Printf
16open Emisc
17
18let error msg _lb =  raise (Emisc.LexError msg)
19}
20
21let blank = [' ''\t''\n''\r']
22let tag = ['a'-'z''A'-'Z''0'-'9']+
23let class_name = ['a'-'z''A'-'Z''0'-'9''-']+
24let attr_name = ['a'-'z''A'-'Z''0'-'9']['a'-'z''A'-'Z''-''0'-'9'':']*
25
26rule extract styles = parse
27(* TOC comments are scanned *)
28| "<!--TOC" blank+
29   { extract styles lexbuf }
30| "<!--"
31  {skip_comment lexbuf ;
32   extract styles lexbuf }
33| "<!"| ("</" tag)
34  { skip_tag lexbuf ;
35    extract styles lexbuf ; }
36| '<'  (tag as tag)
37  { let styles = extract_tag tag styles lexbuf in
38    extract styles lexbuf }
39| [^'<']+ { extract styles lexbuf }
40| eof { styles }
41| "" { error "extract" lexbuf }
42
43and extract_tag tag styles = parse
44| '>' { styles }
45| blank+ { extract_tag tag styles lexbuf }
46| ("style"|"STYLE") blank* "=" blank*
47  (('\'' ([^'\'']* as v) '\''
48  | '"' ([^'"']* as v) '"'
49  | ('#'?['a'-'z''A'-'Z''0'-'9''-''+''_'':''.']+ as v)))
50(* '"' *)
51  { extract_tag tag (StringCount.incr v styles) lexbuf }
52| attr_name ( blank* "=" blank*
53  ('\'' ([^'\'']*) '\''
54  | '"' ([^'"']*) '"'
55  | '#'?['a'-'z''A'-'Z''0'-'9''-''+''_'':''.']+))?
56(* '"' *)
57  {  extract_tag tag styles lexbuf }
58| "" { error "extract_tag"lexbuf }
59
60and skip_tag = parse
61| [^'>']* '>' { () }
62| "" { error "skip_tag" lexbuf }
63
64and skip_comment = parse
65| "-->" '\n'? { }
66| _
67   {skip_comment lexbuf}
68| eof
69   {error "End of file in comment" lexbuf}
70| "" { error "comment" lexbuf }
71
72and dump m out = parse
73| "<style" blank+ "type" blank* "=" blank* '"' "text/css" '"' blank* '>' '\n'?
74  as lxm
75   { fprintf out "%s" lxm ;
76     Emisc.StringMap.iter
77       (fun st cl -> Emisc.dump_class out cl st)
78       m ;
79     dump m out lexbuf }
80| "<!--TOC" blank+ as lxm
81   { output_string out lxm ;
82     dump m out lexbuf }
83| "<!--" as lxm
84  {fprintf out "%s" lxm ;
85   dump_comment out lexbuf ;
86   dump m out lexbuf }
87| "<!"| ("</" tag) as lxm
88  { fprintf out "%s" lxm ;
89    dump_tag out lexbuf ;
90    dump m out lexbuf }
91| '<'  tag as lxm
92  { output_string out lxm ;
93    abstract_tag [] [] m out lexbuf ;
94    dump m out lexbuf }
95| [^'<']+ as lxm
96   { output_string out lxm ; dump m out lexbuf }
97| eof { true }
98| "" { error "dump" lexbuf }
99
100and dump_comment out = parse
101| "-->" '\n'? as lxm { output_string out lxm }
102| _ as c             { output_char out c ; dump_comment out lexbuf }
103| eof                {error "End of file in comment" lexbuf}
104| "" { error "dump_comment" lexbuf }
105
106and dump_tag out = parse
107| [^'>']* '>' as lxm { output_string out lxm }
108| ""                 { error "dump_tag" lexbuf }
109
110and abstract_tag cl attrs m out = parse
111| '>'
112  {
113   let cl = match cl with
114   | [] -> []
115   | _  -> [sprintf "class=\"%s\"" (String.concat " " (List.rev cl))] in
116   let na = cl @ List.rev attrs in
117   List.iter
118     (fprintf out " %s")
119     na ;
120   output_char out '>'
121  }
122| blank+ {  abstract_tag cl attrs m out lexbuf }
123| ("style"|"STYLE") blank* "=" blank*
124  (('\'' ([^'\'']* as v) '\''
125  | '"' ([^'"']* as v) '"'
126  | ('#'?['a'-'z''A'-'Z''0'-'9''-''+''_'':''.']+ as v))) as a
127(* '"' *)
128  {
129    try
130      let tcl = Emisc.StringMap.find v m  in
131      abstract_tag (tcl::cl) attrs m out lexbuf
132    with Not_found ->
133      abstract_tag cl (a::attrs) m out lexbuf
134   }
135| ("class"|"CLASS") blank* "=" blank*
136  (('\'' ([^'\'']* as v) '\''
137  | '"' ([^'"']* as v) '"'
138  | ('#'?['a'-'z''A'-'Z''0'-'9''-''+''_'':''.']+ as v)))
139(* '"' *)
140  { abstract_tag (v::cl) attrs m out lexbuf }
141| attr_name ( blank* "=" blank*
142  ('\'' ([^'\'']*) '\''
143  | '"' ([^'"']*) '"'
144  | '#'?['a'-'z''A'-'Z''0'-'9''-''+''_'':''.']+))? as a
145(* '"' *)
146  {  abstract_tag cl (a::attrs) m out lexbuf }
147| "" { error "abstract_tag" lexbuf }
148
149
150{
151 let get lexbuf = extract StringCount.empty lexbuf
152
153 let set m out lexbuf = dump m out lexbuf
154}
155