1 /*============================================================================= 2 Copyright (c) 2002 2004 2006 Joel de Guzman 3 Copyright (c) 2004 Eric Niebler 4 http://spirit.sourceforge.net/ 5 6 Use, modification and distribution is subject to the Boost Software 7 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 8 http://www.boost.org/LICENSE_1_0.txt) 9 =============================================================================*/ 10 #include <boost/spirit/include/classic_core.hpp> 11 #include <boost/spirit/include/classic_confix.hpp> 12 #include <boost/spirit/include/classic_chset.hpp> 13 #include <boost/spirit/include/classic_symbols.hpp> 14 #include <boost/spirit/include/classic_loops.hpp> 15 #include "grammar.hpp" 16 #include "state.hpp" 17 #include "actions.hpp" 18 #include "syntax_highlight.hpp" 19 #include "utils.hpp" 20 #include "files.hpp" 21 #include "native_text.hpp" 22 #include "phrase_tags.hpp" 23 24 namespace quickbook 25 { 26 namespace cl = boost::spirit::classic; 27 28 // Syntax Highlight Actions 29 30 struct syntax_highlight_actions 31 { 32 quickbook::state& state; 33 do_macro_action do_macro_impl; 34 35 // State 36 bool support_callouts; 37 boost::string_ref marked_text; 38 syntax_highlight_actionsquickbook::syntax_highlight_actions39 syntax_highlight_actions(quickbook::state& state, bool is_block) : 40 state(state), 41 do_macro_impl(state), 42 support_callouts(is_block && (qbk_version_n >= 107u || 43 state.current_file->is_code_snippets)), 44 marked_text() 45 {} 46 47 void span(parse_iterator, parse_iterator, char const*); 48 void span_start(parse_iterator, parse_iterator, char const*); 49 void span_end(parse_iterator, parse_iterator); 50 void unexpected_char(parse_iterator, parse_iterator); 51 void plain_char(parse_iterator, parse_iterator); 52 void pre_escape_back(parse_iterator, parse_iterator); 53 void post_escape_back(parse_iterator, parse_iterator); 54 void do_macro(std::string const&); 55 56 void mark_text(parse_iterator, parse_iterator); 57 void callout(parse_iterator, parse_iterator); 58 }; 59 span(parse_iterator first,parse_iterator last,char const * name)60 void syntax_highlight_actions::span(parse_iterator first, 61 parse_iterator last, char const* name) 62 { 63 state.phrase << "<phrase role=\"" << name << "\">"; 64 while (first != last) 65 detail::print_char(*first++, state.phrase.get()); 66 state.phrase << "</phrase>"; 67 } 68 span_start(parse_iterator first,parse_iterator last,char const * name)69 void syntax_highlight_actions::span_start(parse_iterator first, 70 parse_iterator last, char const* name) 71 { 72 state.phrase << "<phrase role=\"" << name << "\">"; 73 while (first != last) 74 detail::print_char(*first++, state.phrase.get()); 75 } 76 span_end(parse_iterator first,parse_iterator last)77 void syntax_highlight_actions::span_end(parse_iterator first, 78 parse_iterator last) 79 { 80 while (first != last) 81 detail::print_char(*first++, state.phrase.get()); 82 state.phrase << "</phrase>"; 83 } 84 unexpected_char(parse_iterator first,parse_iterator last)85 void syntax_highlight_actions::unexpected_char(parse_iterator first, 86 parse_iterator last) 87 { 88 file_position const pos = state.current_file->position_of(first.base()); 89 90 detail::outwarn(state.current_file->path, pos.line) 91 << "in column:" << pos.column 92 << ", unexpected character: " << std::string(first.base(), last.base()) 93 << "\n"; 94 95 // print out an unexpected character 96 state.phrase << "<phrase role=\"error\">"; 97 while (first != last) 98 detail::print_char(*first++, state.phrase.get()); 99 state.phrase << "</phrase>"; 100 } 101 plain_char(parse_iterator first,parse_iterator last)102 void syntax_highlight_actions::plain_char(parse_iterator first, 103 parse_iterator last) 104 { 105 while (first != last) 106 detail::print_char(*first++, state.phrase.get()); 107 } 108 pre_escape_back(parse_iterator,parse_iterator)109 void syntax_highlight_actions::pre_escape_back(parse_iterator, 110 parse_iterator) 111 { 112 state.push_output(); // save the stream 113 } 114 post_escape_back(parse_iterator,parse_iterator)115 void syntax_highlight_actions::post_escape_back(parse_iterator, 116 parse_iterator) 117 { 118 std::string tmp; 119 state.phrase.swap(tmp); 120 state.pop_output(); // restore the stream 121 state.phrase << tmp; 122 } 123 do_macro(std::string const & v)124 void syntax_highlight_actions::do_macro(std::string const& v) 125 { 126 do_macro_impl(v); 127 } 128 mark_text(parse_iterator first,parse_iterator last)129 void syntax_highlight_actions::mark_text(parse_iterator first, 130 parse_iterator last) 131 { 132 marked_text = boost::string_ref(first.base(), last.base() - first.base()); 133 } 134 callout(parse_iterator,parse_iterator)135 void syntax_highlight_actions::callout(parse_iterator, parse_iterator) 136 { 137 state.phrase << state.add_callout(qbk_value(state.current_file, 138 marked_text.begin(), marked_text.end())); 139 marked_text.clear(); 140 } 141 142 // Syntax 143 144 struct keywords_holder 145 { 146 cl::symbols<> cpp, python; 147 keywords_holderquickbook::keywords_holder148 keywords_holder() 149 { 150 cpp 151 = "alignas", "alignof", "and_eq", "and", "asm", "auto", 152 "bitand", "bitor", "bool", "break", "case", "catch", 153 "char", "char16_t", "char32_t", "class", "compl", 154 "const", "const_cast", "constexpr", "continue", 155 "decltype", "default", "delete", "do", "double", 156 "dynamic_cast", "else", "enum", "explicit", "export", 157 "extern", "false", "float", "for", "friend", "goto", 158 "if", "inline", "int", "long", "mutable", "namespace", 159 "new", "noexcept", "not_eq", "not", "nullptr", 160 "operator", "or_eq", "or", "private", "protected", 161 "public", "register", "reinterpret_cast", "return", 162 "short", "signed", "sizeof", "static", "static_assert", 163 "static_cast", "struct", "switch", "template", "this", 164 "thread_local", "throw", "true", "try", "typedef", 165 "typeid", "typename", "union", "unsigned", "using", 166 "virtual", "void", "volatile", "wchar_t", "while", 167 "xor_eq", "xor" 168 ; 169 170 python 171 = 172 "and", "del", "for", "is", "raise", 173 "assert", "elif", "from", "lambda", "return", 174 "break", "else", "global", "not", "try", 175 "class", "except", "if", "or", "while", 176 "continue", "exec", "import", "pass", "yield", 177 "def", "finally", "in", "print", 178 179 // Technically "as" and "None" are not yet keywords (at Python 180 // 2.4). They are destined to become keywords, and we treat them 181 // as such for syntax highlighting purposes. 182 183 "as", "None" 184 ; 185 } 186 }; 187 188 namespace { 189 keywords_holder keywords; 190 } 191 192 // Grammar for C++ highlighting 193 struct cpp_highlight : public cl::grammar<cpp_highlight> 194 { cpp_highlightquickbook::cpp_highlight195 cpp_highlight(syntax_highlight_actions& actions) 196 : actions(actions) {} 197 198 template <typename Scanner> 199 struct definition 200 { definitionquickbook::cpp_highlight::definition201 definition(cpp_highlight const& self) 202 : g(self.actions.state.grammar()) 203 { 204 member_action1<syntax_highlight_actions, char const*> 205 span(self.actions, &syntax_highlight_actions::span), 206 span_start(self.actions, &syntax_highlight_actions::span_start); 207 member_action<syntax_highlight_actions> 208 span_end(self.actions, &syntax_highlight_actions::span_end), 209 unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char), 210 plain_char(self.actions, &syntax_highlight_actions::plain_char), 211 pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back), 212 post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back), 213 mark_text(self.actions, &syntax_highlight_actions::mark_text), 214 callout(self.actions, &syntax_highlight_actions::callout); 215 member_action_value<syntax_highlight_actions, std::string const&> 216 do_macro(self.actions, &syntax_highlight_actions::do_macro); 217 error_action error(self.actions.state); 218 219 program = 220 *( (*cl::space_p) [plain_char] 221 >> (line_start | rest_of_line) 222 >> *rest_of_line 223 ) 224 ; 225 226 line_start = 227 preprocessor [span("preprocessor")] 228 ; 229 230 rest_of_line = 231 (+cl::blank_p) [plain_char] 232 | macro 233 | escape 234 | cl::eps_p(ph::var(self.actions.support_callouts)) 235 >> ( line_callout [callout] 236 | inline_callout [callout] 237 ) 238 | comment 239 | keyword [span("keyword")] 240 | identifier [span("identifier")] 241 | special [span("special")] 242 | string_ [span("string")] 243 | char_ [span("char")] 244 | number [span("number")] 245 | ~cl::eps_p(cl::eol_p) 246 >> u8_codepoint_p [unexpected_char] 247 ; 248 249 macro = 250 // must not be followed by alpha or underscore 251 cl::eps_p(self.actions.state.macro 252 >> (cl::eps_p - (cl::alpha_p | '_'))) 253 >> self.actions.state.macro 254 [do_macro] 255 ; 256 257 escape = 258 cl::str_p("``") [pre_escape_back] 259 >> 260 ( 261 ( 262 ( 263 (+(cl::anychar_p - "``") >> cl::eps_p("``")) 264 & g.phrase_start 265 ) 266 >> cl::str_p("``") 267 ) 268 | 269 ( 270 cl::eps_p [error] 271 >> *cl::anychar_p 272 ) 273 ) [post_escape_back] 274 ; 275 276 preprocessor 277 = '#' >> *cl::space_p >> ((cl::alpha_p | '_') >> *(cl::alnum_p | '_')) 278 ; 279 280 inline_callout 281 = cl::confix_p( 282 "/*<" >> *cl::space_p, 283 (*cl::anychar_p) [mark_text], 284 ">*/" 285 ) 286 ; 287 288 line_callout 289 = cl::confix_p( 290 "/*<<" >> *cl::space_p, 291 (*cl::anychar_p) [mark_text], 292 ">>*/" 293 ) 294 >> *cl::space_p 295 ; 296 297 comment 298 = cl::str_p("//") [span_start("comment")] 299 >> *( escape 300 | (+(cl::anychar_p - (cl::eol_p | "``"))) 301 [plain_char] 302 ) 303 >> cl::eps_p [span_end] 304 | cl::str_p("/*") [span_start("comment")] 305 >> *( escape 306 | (+(cl::anychar_p - (cl::str_p("*/") | "``"))) 307 [plain_char] 308 ) 309 >> (!cl::str_p("*/")) [span_end] 310 ; 311 312 keyword 313 = keywords.cpp >> (cl::eps_p - (cl::alnum_p | '_')) 314 ; // make sure we recognize whole words only 315 316 special 317 = +cl::chset_p("~!%^&*()+={[}]:;,<.>?/|\\#-") 318 ; 319 320 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\'); 321 322 string_ 323 = !cl::as_lower_d['l'] >> cl::confix_p('"', *string_char, '"') 324 ; 325 326 char_ 327 = !cl::as_lower_d['l'] >> cl::confix_p('\'', *string_char, '\'') 328 ; 329 330 number 331 = ( 332 cl::as_lower_d["0x"] >> cl::hex_p 333 | '0' >> cl::oct_p 334 | cl::real_p 335 ) 336 >> *cl::as_lower_d[cl::chset_p("ldfu")] 337 ; 338 339 identifier 340 = (cl::alpha_p | '_') >> *(cl::alnum_p | '_') 341 ; 342 } 343 344 cl::rule<Scanner> 345 program, line_start, rest_of_line, macro, preprocessor, 346 inline_callout, line_callout, comment, 347 special, string_, 348 char_, number, identifier, keyword, escape, 349 string_char; 350 351 quickbook_grammar& g; 352 353 cl::rule<Scanner> const& startquickbook::cpp_highlight::definition354 start() const { return program; } 355 }; 356 357 syntax_highlight_actions& actions; 358 }; 359 360 // Grammar for Python highlighting 361 // See also: The Python Reference Manual 362 // http://docs.python.org/ref/ref.html 363 struct python_highlight : public cl::grammar<python_highlight> 364 { python_highlightquickbook::python_highlight365 python_highlight(syntax_highlight_actions& actions) 366 : actions(actions) {} 367 368 template <typename Scanner> 369 struct definition 370 { definitionquickbook::python_highlight::definition371 definition(python_highlight const& self) 372 : g(self.actions.state.grammar()) 373 { 374 member_action1<syntax_highlight_actions, char const*> 375 span(self.actions, &syntax_highlight_actions::span), 376 span_start(self.actions, &syntax_highlight_actions::span_start); 377 member_action<syntax_highlight_actions> 378 span_end(self.actions, &syntax_highlight_actions::span_end), 379 unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char), 380 plain_char(self.actions, &syntax_highlight_actions::plain_char), 381 pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back), 382 post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back), 383 mark_text(self.actions, &syntax_highlight_actions::mark_text), 384 callout(self.actions, &syntax_highlight_actions::callout); 385 member_action_value<syntax_highlight_actions, std::string const&> 386 do_macro(self.actions, &syntax_highlight_actions::do_macro); 387 error_action error(self.actions.state); 388 389 program 390 = 391 *( (+cl::space_p) [plain_char] 392 | macro 393 | escape 394 | comment 395 | keyword [span("keyword")] 396 | identifier [span("identifier")] 397 | special [span("special")] 398 | string_ [span("string")] 399 | number [span("number")] 400 | u8_codepoint_p [unexpected_char] 401 ) 402 ; 403 404 macro = 405 // must not be followed by alpha or underscore 406 cl::eps_p(self.actions.state.macro 407 >> (cl::eps_p - (cl::alpha_p | '_'))) 408 >> self.actions.state.macro 409 [do_macro] 410 ; 411 412 escape = 413 cl::str_p("``") [pre_escape_back] 414 >> 415 ( 416 ( 417 ( 418 (+(cl::anychar_p - "``") >> cl::eps_p("``")) 419 & g.phrase_start 420 ) 421 >> cl::str_p("``") 422 ) 423 | 424 ( 425 cl::eps_p [error] 426 >> *cl::anychar_p 427 ) 428 ) [post_escape_back] 429 ; 430 431 comment 432 = cl::str_p("#") [span_start("comment")] 433 >> *( escape 434 | (+(cl::anychar_p - (cl::eol_p | "``"))) 435 [plain_char] 436 ) 437 >> cl::eps_p [span_end] 438 ; 439 440 keyword 441 = keywords.python >> (cl::eps_p - (cl::alnum_p | '_')) 442 ; // make sure we recognize whole words only 443 444 special 445 = +cl::chset_p("~!%^&*()+={[}]:;,<.>/|\\-") 446 ; 447 448 string_prefix 449 = cl::as_lower_d[cl::str_p("u") >> ! cl::str_p("r")] 450 ; 451 452 string_ 453 = ! string_prefix >> (long_string | short_string) 454 ; 455 456 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\'); 457 458 short_string 459 = cl::confix_p('\'', * string_char, '\'') | 460 cl::confix_p('"', * string_char, '"') 461 ; 462 463 long_string 464 // Note: the "cl::str_p" on the next two lines work around 465 // an INTERNAL COMPILER ERROR when using VC7.1 466 = cl::confix_p(cl::str_p("'''"), * string_char, "'''") | 467 cl::confix_p(cl::str_p("\"\"\""), * string_char, "\"\"\"") 468 ; 469 470 number 471 = ( 472 cl::as_lower_d["0x"] >> cl::hex_p 473 | '0' >> cl::oct_p 474 | cl::real_p 475 ) 476 >> *cl::as_lower_d[cl::chset_p("lj")] 477 ; 478 479 identifier 480 = (cl::alpha_p | '_') >> *(cl::alnum_p | '_') 481 ; 482 } 483 484 cl::rule<Scanner> 485 program, macro, comment, special, string_, string_prefix, 486 short_string, long_string, number, identifier, keyword, 487 escape, string_char; 488 489 quickbook_grammar& g; 490 491 cl::rule<Scanner> const& startquickbook::python_highlight::definition492 start() const { return program; } 493 }; 494 495 syntax_highlight_actions& actions; 496 }; 497 498 // Grammar for plain text (no actual highlighting) 499 struct teletype_highlight : public cl::grammar<teletype_highlight> 500 { teletype_highlightquickbook::teletype_highlight501 teletype_highlight(syntax_highlight_actions& actions) 502 : actions(actions) {} 503 504 template <typename Scanner> 505 struct definition 506 { definitionquickbook::teletype_highlight::definition507 definition(teletype_highlight const& self) 508 : g(self.actions.state.grammar()) 509 { 510 member_action<syntax_highlight_actions> 511 plain_char(self.actions, &syntax_highlight_actions::plain_char), 512 pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back), 513 post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back); 514 member_action_value<syntax_highlight_actions, std::string const&> 515 do_macro(self.actions, &syntax_highlight_actions::do_macro); 516 error_action error(self.actions.state); 517 518 program 519 = 520 *( macro 521 | escape 522 | u8_codepoint_p [plain_char] 523 ) 524 ; 525 526 macro = 527 // must not be followed by alpha or underscore 528 cl::eps_p(self.actions.state.macro 529 >> (cl::eps_p - (cl::alpha_p | '_'))) 530 >> self.actions.state.macro 531 [do_macro] 532 ; 533 534 escape = 535 cl::str_p("``") [pre_escape_back] 536 >> 537 ( 538 ( 539 ( 540 (+(cl::anychar_p - "``") >> cl::eps_p("``")) 541 & g.phrase_start 542 ) 543 >> cl::str_p("``") 544 ) 545 | 546 ( 547 cl::eps_p [error] 548 >> *cl::anychar_p 549 ) 550 ) [post_escape_back] 551 ; 552 } 553 554 cl::rule<Scanner> program, macro, escape; 555 556 quickbook_grammar& g; 557 558 cl::rule<Scanner> const& startquickbook::teletype_highlight::definition559 start() const { return program; } 560 }; 561 562 syntax_highlight_actions& actions; 563 }; 564 syntax_highlight(parse_iterator first,parse_iterator last,quickbook::state & state,source_mode_type source_mode,bool is_block)565 void syntax_highlight( 566 parse_iterator first, 567 parse_iterator last, 568 quickbook::state& state, 569 source_mode_type source_mode, 570 bool is_block) 571 { 572 syntax_highlight_actions syn_actions(state, is_block); 573 574 // print the code with syntax coloring 575 switch(source_mode) 576 { 577 case source_mode_tags::cpp: { 578 cpp_highlight cpp_p(syn_actions); 579 boost::spirit::classic::parse(first, last, cpp_p); 580 break; 581 } 582 case source_mode_tags::python: { 583 python_highlight python_p(syn_actions); 584 boost::spirit::classic::parse(first, last, python_p); 585 break; 586 } 587 case source_mode_tags::teletype: { 588 teletype_highlight teletype_p(syn_actions); 589 boost::spirit::classic::parse(first, last, teletype_p); 590 break; 591 } 592 default: 593 BOOST_ASSERT(0); 594 } 595 } 596 } 597