1 /*=============================================================================
2     Copyright (c) 2002 2004 2006 Joel de Guzman
3     Copyright (c) 2004 Eric Niebler
4     http://spirit.sourceforge.net/
5 
6     Use, modification and distribution is subject to the Boost Software
7     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
8     http://www.boost.org/LICENSE_1_0.txt)
9 =============================================================================*/
10 #include <boost/spirit/include/classic_core.hpp>
11 #include <boost/spirit/include/classic_confix.hpp>
12 #include <boost/spirit/include/classic_chset.hpp>
13 #include <boost/spirit/include/classic_symbols.hpp>
14 #include <boost/spirit/include/classic_loops.hpp>
15 #include "grammar.hpp"
16 #include "state.hpp"
17 #include "actions.hpp"
18 #include "syntax_highlight.hpp"
19 #include "utils.hpp"
20 #include "files.hpp"
21 #include "native_text.hpp"
22 #include "phrase_tags.hpp"
23 
24 namespace quickbook
25 {
26     namespace cl = boost::spirit::classic;
27 
28     // Syntax Highlight Actions
29 
30     struct syntax_highlight_actions
31     {
32         quickbook::state& state;
33         do_macro_action do_macro_impl;
34 
35         // State
36         bool support_callouts;
37         boost::string_ref marked_text;
38 
syntax_highlight_actionsquickbook::syntax_highlight_actions39         syntax_highlight_actions(quickbook::state& state, bool is_block) :
40             state(state),
41             do_macro_impl(state),
42             support_callouts(is_block && (qbk_version_n >= 107u ||
43                 state.current_file->is_code_snippets)),
44             marked_text()
45         {}
46 
47         void span(parse_iterator, parse_iterator, char const*);
48         void span_start(parse_iterator, parse_iterator, char const*);
49         void span_end(parse_iterator, parse_iterator);
50         void unexpected_char(parse_iterator, parse_iterator);
51         void plain_char(parse_iterator, parse_iterator);
52         void pre_escape_back(parse_iterator, parse_iterator);
53         void post_escape_back(parse_iterator, parse_iterator);
54         void do_macro(std::string const&);
55 
56         void mark_text(parse_iterator, parse_iterator);
57         void callout(parse_iterator, parse_iterator);
58     };
59 
span(parse_iterator first,parse_iterator last,char const * name)60     void syntax_highlight_actions::span(parse_iterator first,
61             parse_iterator last, char const* name)
62     {
63         state.phrase << "<phrase role=\"" << name << "\">";
64         while (first != last)
65             detail::print_char(*first++, state.phrase.get());
66         state.phrase << "</phrase>";
67     }
68 
span_start(parse_iterator first,parse_iterator last,char const * name)69     void syntax_highlight_actions::span_start(parse_iterator first,
70             parse_iterator last, char const* name)
71     {
72         state.phrase << "<phrase role=\"" << name << "\">";
73         while (first != last)
74             detail::print_char(*first++, state.phrase.get());
75     }
76 
span_end(parse_iterator first,parse_iterator last)77     void syntax_highlight_actions::span_end(parse_iterator first,
78             parse_iterator last)
79     {
80         while (first != last)
81             detail::print_char(*first++, state.phrase.get());
82         state.phrase << "</phrase>";
83     }
84 
unexpected_char(parse_iterator first,parse_iterator last)85     void syntax_highlight_actions::unexpected_char(parse_iterator first,
86             parse_iterator last)
87     {
88         file_position const pos = state.current_file->position_of(first.base());
89 
90         detail::outwarn(state.current_file->path, pos.line)
91             << "in column:" << pos.column
92             << ", unexpected character: " << std::string(first.base(), last.base())
93             << "\n";
94 
95         // print out an unexpected character
96         state.phrase << "<phrase role=\"error\">";
97         while (first != last)
98             detail::print_char(*first++, state.phrase.get());
99         state.phrase << "</phrase>";
100     }
101 
plain_char(parse_iterator first,parse_iterator last)102     void syntax_highlight_actions::plain_char(parse_iterator first,
103             parse_iterator last)
104     {
105         while (first != last)
106             detail::print_char(*first++, state.phrase.get());
107     }
108 
pre_escape_back(parse_iterator,parse_iterator)109     void syntax_highlight_actions::pre_escape_back(parse_iterator,
110             parse_iterator)
111     {
112         state.push_output(); // save the stream
113     }
114 
post_escape_back(parse_iterator,parse_iterator)115     void syntax_highlight_actions::post_escape_back(parse_iterator,
116             parse_iterator)
117     {
118         std::string tmp;
119         state.phrase.swap(tmp);
120         state.pop_output(); // restore the stream
121         state.phrase << tmp;
122     }
123 
do_macro(std::string const & v)124     void syntax_highlight_actions::do_macro(std::string const& v)
125     {
126         do_macro_impl(v);
127     }
128 
mark_text(parse_iterator first,parse_iterator last)129     void syntax_highlight_actions::mark_text(parse_iterator first,
130             parse_iterator last)
131     {
132         marked_text = boost::string_ref(first.base(), last.base() - first.base());
133     }
134 
callout(parse_iterator,parse_iterator)135     void syntax_highlight_actions::callout(parse_iterator, parse_iterator)
136     {
137         state.phrase << state.add_callout(qbk_value(state.current_file,
138             marked_text.begin(), marked_text.end()));
139         marked_text.clear();
140     }
141 
142     // Syntax
143 
144     struct keywords_holder
145     {
146         cl::symbols<> cpp, python;
147 
keywords_holderquickbook::keywords_holder148         keywords_holder()
149         {
150             cpp
151                     =   "alignas", "alignof", "and_eq", "and", "asm", "auto",
152                         "bitand", "bitor", "bool", "break", "case", "catch",
153                         "char", "char16_t", "char32_t", "class", "compl",
154                         "const", "const_cast", "constexpr", "continue",
155                         "decltype", "default", "delete", "do", "double",
156                         "dynamic_cast",  "else", "enum", "explicit", "export",
157                         "extern", "false", "float", "for", "friend", "goto",
158                         "if", "inline", "int", "long", "mutable", "namespace",
159                         "new", "noexcept", "not_eq", "not", "nullptr",
160                         "operator", "or_eq", "or", "private", "protected",
161                         "public", "register", "reinterpret_cast", "return",
162                         "short", "signed", "sizeof", "static", "static_assert",
163                         "static_cast", "struct", "switch", "template", "this",
164                         "thread_local", "throw", "true", "try", "typedef",
165                         "typeid", "typename", "union", "unsigned", "using",
166                         "virtual", "void", "volatile", "wchar_t", "while",
167                         "xor_eq", "xor"
168                     ;
169 
170             python
171                     =
172                     "and",       "del",       "for",       "is",        "raise",
173                     "assert",    "elif",      "from",      "lambda",    "return",
174                     "break",     "else",      "global",    "not",       "try",
175                     "class",     "except",    "if",        "or",        "while",
176                     "continue",  "exec",      "import",    "pass",      "yield",
177                     "def",       "finally",   "in",        "print",
178 
179                     // Technically "as" and "None" are not yet keywords (at Python
180                     // 2.4). They are destined to become keywords, and we treat them
181                     // as such for syntax highlighting purposes.
182 
183                     "as", "None"
184                     ;
185         }
186     };
187 
188     namespace {
189         keywords_holder keywords;
190     }
191 
192     // Grammar for C++ highlighting
193     struct cpp_highlight : public cl::grammar<cpp_highlight>
194     {
cpp_highlightquickbook::cpp_highlight195         cpp_highlight(syntax_highlight_actions& actions)
196             : actions(actions) {}
197 
198         template <typename Scanner>
199         struct definition
200         {
definitionquickbook::cpp_highlight::definition201             definition(cpp_highlight const& self)
202                 : g(self.actions.state.grammar())
203             {
204                 member_action1<syntax_highlight_actions, char const*>
205                     span(self.actions, &syntax_highlight_actions::span),
206                     span_start(self.actions, &syntax_highlight_actions::span_start);
207                 member_action<syntax_highlight_actions>
208                     span_end(self.actions, &syntax_highlight_actions::span_end),
209                     unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char),
210                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
211                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
212                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back),
213                     mark_text(self.actions, &syntax_highlight_actions::mark_text),
214                     callout(self.actions, &syntax_highlight_actions::callout);
215                 member_action_value<syntax_highlight_actions, std::string const&>
216                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
217                 error_action error(self.actions.state);
218 
219                 program =
220                     *(  (*cl::space_p)                  [plain_char]
221                     >>  (line_start | rest_of_line)
222                     >>  *rest_of_line
223                     )
224                     ;
225 
226                 line_start =
227                         preprocessor                    [span("preprocessor")]
228                     ;
229 
230                 rest_of_line =
231                         (+cl::blank_p)                  [plain_char]
232                     |   macro
233                     |   escape
234                     |   cl::eps_p(ph::var(self.actions.support_callouts))
235                     >>  (   line_callout                [callout]
236                         |   inline_callout              [callout]
237                         )
238                     |   comment
239                     |   keyword                         [span("keyword")]
240                     |   identifier                      [span("identifier")]
241                     |   special                         [span("special")]
242                     |   string_                         [span("string")]
243                     |   char_                           [span("char")]
244                     |   number                          [span("number")]
245                     |   ~cl::eps_p(cl::eol_p)
246                     >>  u8_codepoint_p                  [unexpected_char]
247                     ;
248 
249                 macro =
250                     // must not be followed by alpha or underscore
251                     cl::eps_p(self.actions.state.macro
252                         >> (cl::eps_p - (cl::alpha_p | '_')))
253                     >> self.actions.state.macro
254                                                         [do_macro]
255                     ;
256 
257                 escape =
258                     cl::str_p("``")                     [pre_escape_back]
259                     >>
260                     (
261                         (
262                             (
263                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
264                                 & g.phrase_start
265                             )
266                             >>  cl::str_p("``")
267                         )
268                         |
269                         (
270                             cl::eps_p                   [error]
271                             >> *cl::anychar_p
272                         )
273                     )                                   [post_escape_back]
274                     ;
275 
276                 preprocessor
277                     =   '#' >> *cl::space_p >> ((cl::alpha_p | '_') >> *(cl::alnum_p | '_'))
278                     ;
279 
280                 inline_callout
281                     =   cl::confix_p(
282                             "/*<" >> *cl::space_p,
283                             (*cl::anychar_p)            [mark_text],
284                             ">*/"
285                         )
286                         ;
287 
288                 line_callout
289                     =   cl::confix_p(
290                             "/*<<" >> *cl::space_p,
291                             (*cl::anychar_p)            [mark_text],
292                             ">>*/"
293                         )
294                     >>  *cl::space_p
295                     ;
296 
297                 comment
298                     =   cl::str_p("//")                 [span_start("comment")]
299                     >>  *(  escape
300                         |   (+(cl::anychar_p - (cl::eol_p | "``")))
301                                                         [plain_char]
302                         )
303                     >>  cl::eps_p                       [span_end]
304                     |   cl::str_p("/*")                 [span_start("comment")]
305                     >>  *(  escape
306                         |   (+(cl::anychar_p - (cl::str_p("*/") | "``")))
307                                                         [plain_char]
308                         )
309                     >>  (!cl::str_p("*/"))              [span_end]
310                     ;
311 
312                 keyword
313                     =   keywords.cpp >> (cl::eps_p - (cl::alnum_p | '_'))
314                     ;   // make sure we recognize whole words only
315 
316                 special
317                     =   +cl::chset_p("~!%^&*()+={[}]:;,<.>?/|\\#-")
318                     ;
319 
320                 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\');
321 
322                 string_
323                     =   !cl::as_lower_d['l'] >> cl::confix_p('"', *string_char, '"')
324                     ;
325 
326                 char_
327                     =   !cl::as_lower_d['l'] >> cl::confix_p('\'', *string_char, '\'')
328                     ;
329 
330                 number
331                     =   (
332                             cl::as_lower_d["0x"] >> cl::hex_p
333                         |   '0' >> cl::oct_p
334                         |   cl::real_p
335                         )
336                         >>  *cl::as_lower_d[cl::chset_p("ldfu")]
337                     ;
338 
339                 identifier
340                     =   (cl::alpha_p | '_') >> *(cl::alnum_p | '_')
341                     ;
342             }
343 
344             cl::rule<Scanner>
345                             program, line_start, rest_of_line, macro, preprocessor,
346                             inline_callout, line_callout, comment,
347                             special, string_,
348                             char_, number, identifier, keyword, escape,
349                             string_char;
350 
351             quickbook_grammar& g;
352 
353             cl::rule<Scanner> const&
startquickbook::cpp_highlight::definition354             start() const { return program; }
355         };
356 
357         syntax_highlight_actions& actions;
358     };
359 
360     // Grammar for Python highlighting
361     // See also: The Python Reference Manual
362     // http://docs.python.org/ref/ref.html
363     struct python_highlight : public cl::grammar<python_highlight>
364     {
python_highlightquickbook::python_highlight365         python_highlight(syntax_highlight_actions& actions)
366             : actions(actions) {}
367 
368         template <typename Scanner>
369         struct definition
370         {
definitionquickbook::python_highlight::definition371             definition(python_highlight const& self)
372                 : g(self.actions.state.grammar())
373             {
374                 member_action1<syntax_highlight_actions, char const*>
375                     span(self.actions, &syntax_highlight_actions::span),
376                     span_start(self.actions, &syntax_highlight_actions::span_start);
377                 member_action<syntax_highlight_actions>
378                     span_end(self.actions, &syntax_highlight_actions::span_end),
379                     unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char),
380                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
381                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
382                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back),
383                     mark_text(self.actions, &syntax_highlight_actions::mark_text),
384                     callout(self.actions, &syntax_highlight_actions::callout);
385                 member_action_value<syntax_highlight_actions, std::string const&>
386                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
387                 error_action error(self.actions.state);
388 
389                 program
390                     =
391                     *(  (+cl::space_p)                  [plain_char]
392                     |   macro
393                     |   escape
394                     |   comment
395                     |   keyword                         [span("keyword")]
396                     |   identifier                      [span("identifier")]
397                     |   special                         [span("special")]
398                     |   string_                         [span("string")]
399                     |   number                          [span("number")]
400                     |   u8_codepoint_p                  [unexpected_char]
401                     )
402                     ;
403 
404                 macro =
405                     // must not be followed by alpha or underscore
406                     cl::eps_p(self.actions.state.macro
407                         >> (cl::eps_p - (cl::alpha_p | '_')))
408                     >> self.actions.state.macro
409                                                         [do_macro]
410                     ;
411 
412                 escape =
413                     cl::str_p("``")                     [pre_escape_back]
414                     >>
415                     (
416                         (
417                             (
418                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
419                                 & g.phrase_start
420                             )
421                             >>  cl::str_p("``")
422                         )
423                         |
424                         (
425                             cl::eps_p                   [error]
426                             >> *cl::anychar_p
427                         )
428                     )                                   [post_escape_back]
429                     ;
430 
431                 comment
432                     =   cl::str_p("#")                  [span_start("comment")]
433                     >>  *(  escape
434                         |   (+(cl::anychar_p - (cl::eol_p | "``")))
435                                                         [plain_char]
436                         )
437                     >>  cl::eps_p                       [span_end]
438                     ;
439 
440                 keyword
441                     =   keywords.python >> (cl::eps_p - (cl::alnum_p | '_'))
442                     ;   // make sure we recognize whole words only
443 
444                 special
445                     =   +cl::chset_p("~!%^&*()+={[}]:;,<.>/|\\-")
446                     ;
447 
448                 string_prefix
449                     =    cl::as_lower_d[cl::str_p("u") >> ! cl::str_p("r")]
450                     ;
451 
452                 string_
453                     =   ! string_prefix >> (long_string | short_string)
454                     ;
455 
456                 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\');
457 
458                 short_string
459                     =   cl::confix_p('\'', * string_char, '\'') |
460                         cl::confix_p('"', * string_char, '"')
461                     ;
462 
463                 long_string
464                     // Note: the "cl::str_p" on the next two lines work around
465                     // an INTERNAL COMPILER ERROR when using VC7.1
466                     =   cl::confix_p(cl::str_p("'''"), * string_char, "'''") |
467                         cl::confix_p(cl::str_p("\"\"\""), * string_char, "\"\"\"")
468                     ;
469 
470                 number
471                     =   (
472                             cl::as_lower_d["0x"] >> cl::hex_p
473                         |   '0' >> cl::oct_p
474                         |   cl::real_p
475                         )
476                         >>  *cl::as_lower_d[cl::chset_p("lj")]
477                     ;
478 
479                 identifier
480                     =   (cl::alpha_p | '_') >> *(cl::alnum_p | '_')
481                     ;
482             }
483 
484             cl::rule<Scanner>
485                             program, macro, comment, special, string_, string_prefix,
486                             short_string, long_string, number, identifier, keyword,
487                             escape, string_char;
488 
489             quickbook_grammar& g;
490 
491             cl::rule<Scanner> const&
startquickbook::python_highlight::definition492             start() const { return program; }
493         };
494 
495         syntax_highlight_actions& actions;
496     };
497 
498     // Grammar for plain text (no actual highlighting)
499     struct teletype_highlight : public cl::grammar<teletype_highlight>
500     {
teletype_highlightquickbook::teletype_highlight501         teletype_highlight(syntax_highlight_actions& actions)
502             : actions(actions) {}
503 
504         template <typename Scanner>
505         struct definition
506         {
definitionquickbook::teletype_highlight::definition507             definition(teletype_highlight const& self)
508                 : g(self.actions.state.grammar())
509             {
510                 member_action<syntax_highlight_actions>
511                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
512                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
513                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back);
514                 member_action_value<syntax_highlight_actions, std::string const&>
515                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
516                 error_action error(self.actions.state);
517 
518                 program
519                     =
520                     *(  macro
521                     |   escape
522                     |   u8_codepoint_p                  [plain_char]
523                     )
524                     ;
525 
526                 macro =
527                     // must not be followed by alpha or underscore
528                     cl::eps_p(self.actions.state.macro
529                         >> (cl::eps_p - (cl::alpha_p | '_')))
530                     >> self.actions.state.macro
531                                                         [do_macro]
532                     ;
533 
534                 escape =
535                     cl::str_p("``")                     [pre_escape_back]
536                     >>
537                     (
538                         (
539                             (
540                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
541                                 & g.phrase_start
542                             )
543                             >>  cl::str_p("``")
544                         )
545                         |
546                         (
547                             cl::eps_p                   [error]
548                             >> *cl::anychar_p
549                         )
550                     )                                   [post_escape_back]
551                     ;
552             }
553 
554             cl::rule<Scanner> program, macro, escape;
555 
556             quickbook_grammar& g;
557 
558             cl::rule<Scanner> const&
startquickbook::teletype_highlight::definition559             start() const { return program; }
560         };
561 
562         syntax_highlight_actions& actions;
563     };
564 
syntax_highlight(parse_iterator first,parse_iterator last,quickbook::state & state,source_mode_type source_mode,bool is_block)565     void syntax_highlight(
566         parse_iterator first,
567         parse_iterator last,
568         quickbook::state& state,
569         source_mode_type source_mode,
570         bool is_block)
571     {
572         syntax_highlight_actions syn_actions(state, is_block);
573 
574         // print the code with syntax coloring
575         switch(source_mode)
576         {
577             case source_mode_tags::cpp: {
578                 cpp_highlight cpp_p(syn_actions);
579                 boost::spirit::classic::parse(first, last, cpp_p);
580                 break;
581             }
582             case source_mode_tags::python: {
583                 python_highlight python_p(syn_actions);
584                 boost::spirit::classic::parse(first, last, python_p);
585                 break;
586             }
587             case source_mode_tags::teletype: {
588                 teletype_highlight teletype_p(syn_actions);
589                 boost::spirit::classic::parse(first, last, teletype_p);
590                 break;
591             }
592             default:
593                 BOOST_ASSERT(0);
594         }
595     }
596 }
597