1 /*============================================================================= 2 Copyright (c) 2005 2006 Joel de Guzman 3 http://spirit.sourceforge.net/ 4 5 Use, modification and distribution is subject to the Boost Software 6 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 7 http://www.boost.org/LICENSE_1_0.txt) 8 =============================================================================*/ 9 #include "post_process.hpp" 10 #include <cctype> 11 #include <set> 12 #include <stack> 13 #include <boost/bind.hpp> 14 #include <boost/spirit/include/classic_core.hpp> 15 #include <boost/spirit/include/phoenix1_operators.hpp> 16 #include <boost/spirit/include/phoenix1_primitives.hpp> 17 18 namespace quickbook 19 { 20 namespace cl = boost::spirit::classic; 21 namespace ph = phoenix; 22 typedef std::string::const_iterator iter_type; 23 24 struct pretty_printer 25 { pretty_printerquickbook::pretty_printer26 pretty_printer(std::string& out_, int& current_indent_, int linewidth_) 27 : prev(0) 28 , out(out_) 29 , current_indent(current_indent_) 30 , column(0) 31 , in_string(false) 32 , linewidth(linewidth_) 33 { 34 } 35 indentquickbook::pretty_printer36 void indent() 37 { 38 BOOST_ASSERT(current_indent >= 0); // this should not happen! 39 for (int i = 0; i < current_indent; ++i) 40 out += ' '; 41 column = current_indent; 42 } 43 trim_spacesquickbook::pretty_printer44 void trim_spaces() 45 { 46 out.erase(out.find_last_not_of(' ') + 1); // trim trailing spaces 47 } 48 break_linequickbook::pretty_printer49 void break_line() 50 { 51 trim_spaces(); 52 out += '\n'; 53 indent(); 54 } 55 line_is_emptyquickbook::pretty_printer56 bool line_is_empty() const 57 { 58 for (iter_type i = out.end() - (column - current_indent); 59 i != out.end(); ++i) { 60 if (*i != ' ') return false; 61 } 62 return true; 63 } 64 align_indentquickbook::pretty_printer65 void align_indent() 66 { 67 // make sure we are at the proper indent position 68 if (column != current_indent) { 69 if (column > current_indent) { 70 if (line_is_empty()) { 71 // trim just enough trailing spaces down to 72 // current_indent position 73 out.erase( 74 out.end() - (column - current_indent), out.end()); 75 column = current_indent; 76 } 77 else { 78 // nope, line is not empty. do a hard CR 79 break_line(); 80 } 81 } 82 else { 83 // will this happen? (i.e. column <= current_indent) 84 while (column != current_indent) { 85 out += ' '; 86 ++column; 87 } 88 } 89 } 90 } 91 printquickbook::pretty_printer92 void print(char ch) 93 { 94 // Print a char. Attempt to break the line if we are exceeding 95 // the target linewidth. The linewidth is not an absolute limit. 96 // There are many cases where a line will exceed the linewidth 97 // and there is no way to properly break the line. Preformatted 98 // code that exceeds the linewidth are examples. We cannot break 99 // preformatted code. We shall not attempt to be very strict with 100 // line breaking. What's more important is to have a reproducable 101 // output (i.e. processing two logically equivalent xml files 102 // results in two lexically equivalent xml files). *** pretty 103 // formatting is a secondary goal *** 104 105 // Strings will occur only in tag attributes. Normal content 106 // will have " instead. We shall deal only with tag 107 // attributes here. 108 if (ch == '"') in_string = !in_string; // don't break strings! 109 110 if (!in_string && std::isspace(static_cast<unsigned char>(ch))) { 111 // we can break spaces if they are not inside strings 112 if (!std::isspace(static_cast<unsigned char>(prev))) { 113 if (column >= linewidth) { 114 break_line(); 115 if (column == 0 && ch == ' ') { 116 ++column; 117 out += ' '; 118 } 119 } 120 else { 121 ++column; 122 out += ' '; 123 } 124 } 125 } 126 else { 127 // we can break tag boundaries and stuff after 128 // delimiters if they are not inside strings 129 // and *only-if* the preceding char is a space 130 if (!in_string && column >= linewidth && 131 (ch == '<' && 132 std::isspace(static_cast<unsigned char>(prev)))) 133 break_line(); 134 out += ch; 135 ++column; 136 } 137 138 prev = ch; 139 } 140 printquickbook::pretty_printer141 void print(iter_type f, iter_type l) 142 { 143 for (iter_type i = f; i != l; ++i) 144 print(*i); 145 } 146 print_tagquickbook::pretty_printer147 void print_tag(iter_type f, iter_type l, bool is_flow_tag) 148 { 149 if (is_flow_tag) { 150 print(f, l); 151 } 152 else { 153 // This is not a flow tag, so, we're going to do a 154 // carriage return anyway. Let us remove extra right 155 // spaces. 156 std::string str(f, l); 157 BOOST_ASSERT(f != l); // this should not happen 158 iter_type i = str.end(); 159 while (i != str.begin() && 160 std::isspace(static_cast<unsigned char>(*(i - 1)))) 161 --i; 162 print(str.begin(), i); 163 } 164 } 165 166 char prev; 167 std::string& out; 168 int& current_indent; 169 int column; 170 bool in_string; 171 int linewidth; 172 173 private: 174 pretty_printer& operator=(pretty_printer const&); 175 }; 176 177 char const* html_block_tags_[] = { 178 "div", "p", "blockquote", "address", "h1", "h2", "h3", 179 "h4", "h5", "h6", "ul", "ol", "li", "dl", 180 "dt", "dd", "table", "tr", "th", "td", "tbody", 181 "thead", "form", "fieldset", "hr", "noscript", "html", "body"}; 182 183 char const* block_tags_[] = { 184 "author", "blockquote", "bridgehead", "callout", 185 "calloutlist", "caution", "copyright", "entry", 186 "important", "informaltable", "itemizedlist", "legalnotice", 187 "listitem", "note", "orderedlist", "para", 188 "row", "section", "simpara", "table", 189 "tbody", "textobject", "tgroup", "thead", 190 "tip", "variablelist", "varlistentry", "warning", 191 "xml", "xi:include"}; 192 193 char const* doc_types_[] = {"book", "article", "library", "chapter", 194 "part", "appendix", "preface", "qandadiv", 195 "qandaset", "reference", "set"}; 196 197 struct tidy_compiler 198 { tidy_compilerquickbook::tidy_compiler199 tidy_compiler(std::string& out_, int linewidth_, bool is_html) 200 : out(out_) 201 , current_indent(0) 202 , printer(out_, current_indent, linewidth_) 203 { 204 if (is_html) { 205 static std::size_t const n_block_tags = 206 sizeof(html_block_tags_) / sizeof(char const*); 207 for (std::size_t i = 0; i != n_block_tags; ++i) { 208 block_tags.insert(html_block_tags_[i]); 209 } 210 } 211 else { 212 static std::size_t const n_block_tags = 213 sizeof(block_tags_) / sizeof(char const*); 214 for (std::size_t i = 0; i != n_block_tags; ++i) { 215 block_tags.insert(block_tags_[i]); 216 } 217 218 static std::size_t const n_doc_types = 219 sizeof(doc_types_) / sizeof(char const*); 220 for (std::size_t i = 0; i != n_doc_types; ++i) { 221 block_tags.insert(doc_types_[i]); 222 block_tags.insert(doc_types_[i] + std::string("info")); 223 block_tags.insert(doc_types_[i] + std::string("purpose")); 224 } 225 } 226 } 227 is_flow_tagquickbook::tidy_compiler228 bool is_flow_tag(std::string const& tag) 229 { 230 return block_tags.find(tag) == block_tags.end(); 231 } 232 233 std::set<std::string> block_tags; 234 std::stack<std::string> tags; 235 std::string& out; 236 int current_indent; 237 pretty_printer printer; 238 std::string current_tag; 239 240 private: 241 tidy_compiler& operator=(tidy_compiler const&); 242 }; 243 244 struct tidy_grammar : cl::grammar<tidy_grammar> 245 { tidy_grammarquickbook::tidy_grammar246 tidy_grammar(tidy_compiler& state_, int indent_, bool is_html_) 247 : state(state_), indent(indent_), is_html(is_html_) 248 { 249 } 250 251 template <typename Scanner> struct definition 252 { definitionquickbook::tidy_grammar::definition253 definition(tidy_grammar const& self) 254 { 255 // clang-format off 256 257 tag = (cl::lexeme_d[+(cl::alnum_p | '_' | ':')]) [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)]; 258 259 code = cl::eps_p(ph::var(self.is_html)) 260 >> "<" 261 >> cl::lexeme_d[cl::str_p("pre")] 262 >> *(cl::anychar_p - '>') 263 >> ">" 264 >> *(cl::anychar_p - "</pre>") 265 >> "</pre" 266 >> cl::lexeme_d[">" >> *cl::space_p] 267 | cl::eps_p(!ph::var(self.is_html)) 268 >> "<programlisting>" 269 >> *(cl::anychar_p - "</programlisting>") 270 >> "</programlisting" 271 >> cl::lexeme_d[">" >> *cl::space_p] 272 ; 273 274 // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ? 275 // It is there to preserve the space after the tag that is 276 // otherwise consumed by the cl::space_p skipper. 277 278 escape = 279 cl::str_p("<!--quickbook-escape-prefix-->") >> 280 (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->"))) 281 [ 282 boost::bind(&tidy_grammar::do_escape, &self, _1, _2) 283 ] 284 >> cl::lexeme_d 285 [ 286 cl::str_p("<!--quickbook-escape-postfix-->") >> 287 (*cl::space_p) 288 [ 289 boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2) 290 ] 291 ] 292 ; 293 294 start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; 295 start_end_tag = 296 '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p] 297 | "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p] 298 | "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p] 299 | "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p] 300 ; 301 content = cl::lexeme_d[ +(cl::anychar_p - '<') ]; 302 end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; 303 304 markup = 305 escape 306 | code [boost::bind(&tidy_grammar::do_code, &self, _1, _2)] 307 | start_end_tag [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)] 308 | start_tag [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)] 309 | end_tag [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)] 310 | content [boost::bind(&tidy_grammar::do_content, &self, _1, _2)] 311 ; 312 313 tidy = *markup; 314 315 // clang-format on 316 } 317 startquickbook::tidy_grammar::definition318 cl::rule<Scanner> const& start() { return tidy; } 319 320 cl::rule<Scanner> tidy, tag, start_tag, start_end_tag, content, 321 end_tag, markup, code, escape; 322 }; 323 do_escape_postquickbook::tidy_grammar324 void do_escape_post(iter_type f, iter_type l) const 325 { 326 for (iter_type i = f; i != l; ++i) 327 state.out += *i; 328 } 329 do_escapequickbook::tidy_grammar330 void do_escape(iter_type f, iter_type l) const 331 { 332 while (f != l && std::isspace(*f)) { 333 ++f; 334 } 335 while (f != l && std::isspace(*(l - 1))) { 336 --l; 337 } 338 for (iter_type i = f; i != l; ++i) { 339 state.out += *i; 340 } 341 } 342 do_codequickbook::tidy_grammar343 void do_code(iter_type f, iter_type l) const 344 { 345 state.printer.trim_spaces(); 346 if (state.out[state.out.size() - 1] != '\n') state.out += '\n'; 347 348 // trim trailing space from after closing tag 349 while (f != l && std::isspace(*(l - 1))) { 350 --l; 351 } 352 353 // print the string taking care of line 354 // ending CR/LF platform issues 355 for (iter_type i = f; i != l;) { 356 if (*i == '\n') { 357 state.printer.trim_spaces(); 358 state.out += '\n'; 359 ++i; 360 if (i != l && *i == '\r') { 361 ++i; 362 } 363 } 364 else if (*i == '\r') { 365 state.printer.trim_spaces(); 366 state.out += '\n'; 367 ++i; 368 if (i != l && *i == '\n') { 369 ++i; 370 } 371 } 372 else { 373 state.out += *i; 374 ++i; 375 } 376 } 377 state.out += '\n'; 378 state.printer.indent(); 379 } 380 do_tagquickbook::tidy_grammar381 void do_tag(iter_type f, iter_type l) const 382 { 383 state.current_tag = std::string(f, l); 384 } 385 do_start_end_tagquickbook::tidy_grammar386 void do_start_end_tag(iter_type f, iter_type l) const 387 { 388 bool is_flow_tag = state.is_flow_tag(state.current_tag); 389 if (!is_flow_tag) state.printer.align_indent(); 390 state.printer.print_tag(f, l, is_flow_tag); 391 if (!is_flow_tag) state.printer.break_line(); 392 } 393 do_start_tagquickbook::tidy_grammar394 void do_start_tag(iter_type f, iter_type l) const 395 { 396 state.tags.push(state.current_tag); 397 bool is_flow_tag = state.is_flow_tag(state.current_tag); 398 if (!is_flow_tag) state.printer.align_indent(); 399 state.printer.print_tag(f, l, is_flow_tag); 400 if (!is_flow_tag) { 401 state.current_indent += indent; 402 state.printer.break_line(); 403 } 404 } 405 do_contentquickbook::tidy_grammar406 void do_content(iter_type f, iter_type l) const 407 { 408 state.printer.print(f, l); 409 } 410 do_end_tagquickbook::tidy_grammar411 void do_end_tag(iter_type f, iter_type l) const 412 { 413 if (state.tags.empty()) 414 throw quickbook::post_process_failure("Mismatched tags."); 415 416 bool is_flow_tag = state.is_flow_tag(state.tags.top()); 417 if (!is_flow_tag) { 418 state.current_indent -= indent; 419 state.printer.align_indent(); 420 } 421 state.printer.print_tag(f, l, is_flow_tag); 422 if (!is_flow_tag) state.printer.break_line(); 423 state.tags.pop(); 424 } 425 426 tidy_compiler& state; 427 int indent; 428 bool is_html; 429 430 private: 431 tidy_grammar& operator=(tidy_grammar const&); 432 }; 433 post_process(std::string const & in,int indent,int linewidth,bool is_html)434 std::string post_process( 435 std::string const& in, int indent, int linewidth, bool is_html) 436 { 437 if (indent == -1) indent = 2; // set default to 2 438 if (linewidth == -1) linewidth = 80; // set default to 80 439 440 std::string tidy; 441 tidy_compiler state(tidy, linewidth, is_html); 442 tidy_grammar g(state, indent, is_html); 443 cl::parse_info<iter_type> r = 444 parse(in.begin(), in.end(), g, cl::space_p); 445 if (r.full) { 446 return tidy; 447 } 448 else { 449 throw quickbook::post_process_failure("Post Processing Failed."); 450 } 451 } 452 } 453