1 /*=============================================================================
2     Boost.Wave: A Standard compliant C++ preprocessor library
3 
4     Detect the need to insert a whitespace token into the output stream
5 
6     http://www.boost.org/
7 
8     Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
9     Software License, Version 1.0. (See accompanying file
10     LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
12 #if !defined(INSERT_WHITESPACE_DETECTION_HPP_765EF77B_0513_4967_BDD6_6A38148C4C96_INCLUDED)
13 #define INSERT_WHITESPACE_DETECTION_HPP_765EF77B_0513_4967_BDD6_6A38148C4C96_INCLUDED
14 
15 #include <boost/wave/wave_config.hpp>
16 #include <boost/wave/token_ids.hpp>
17 
18 // this must occur after all of the includes and before any code appears
19 #ifdef BOOST_HAS_ABI_HEADERS
20 #include BOOST_ABI_PREFIX
21 #endif
22 
23 ///////////////////////////////////////////////////////////////////////////////
24 namespace boost {
25 namespace wave {
26 namespace util {
27 
28 namespace impl {
29 
30 // T_IDENTIFIER
31     template <typename StringT>
32     inline bool
would_form_universal_char(StringT const & value)33     would_form_universal_char (StringT const &value)
34     {
35         if ('u' != value[0] && 'U' != value[0])
36             return false;
37         if ('u' == value[0] && value.size() < 5)
38             return false;
39         if ('U' == value[0] && value.size() < 9)
40             return false;
41 
42     typename StringT::size_type pos =
43         value.find_first_not_of("0123456789abcdefABCDEF", 1);
44 
45         if (StringT::npos == pos ||
46             ('u' == value[0] && pos > 5) ||
47             ('U' == value[0] && pos > 9))
48         {
49             return true;        // would form an universal char
50         }
51         return false;
52     }
53     template <typename StringT>
54     inline bool
handle_identifier(boost::wave::token_id prev,boost::wave::token_id before,StringT const & value)55     handle_identifier(boost::wave::token_id prev,
56         boost::wave::token_id before, StringT const &value)
57     {
58         using namespace boost::wave;
59         switch (static_cast<unsigned int>(prev)) {
60         case T_IDENTIFIER:
61         case T_NONREPLACABLE_IDENTIFIER:
62         case T_COMPL_ALT:
63         case T_OR_ALT:
64         case T_AND_ALT:
65         case T_NOT_ALT:
66         case T_XOR_ALT:
67         case T_ANDASSIGN_ALT:
68         case T_ORASSIGN_ALT:
69         case T_XORASSIGN_ALT:
70         case T_NOTEQUAL_ALT:
71         case T_FIXEDPOINTLIT:
72             return true;
73 
74         case T_FLOATLIT:
75         case T_INTLIT:
76         case T_PP_NUMBER:
77             return (value.size() > 1 || (value[0] != 'e' && value[0] != 'E'));
78 
79          // avoid constructing universal characters (\u1234)
80         case TOKEN_FROM_ID('\\', UnknownTokenType):
81             return would_form_universal_char(value);
82         }
83         return false;
84     }
85 // T_INTLIT
86     inline bool
handle_intlit(boost::wave::token_id prev,boost::wave::token_id)87     handle_intlit(boost::wave::token_id prev, boost::wave::token_id /*before*/)
88     {
89         using namespace boost::wave;
90         switch (static_cast<unsigned int>(prev)) {
91         case T_IDENTIFIER:
92         case T_NONREPLACABLE_IDENTIFIER:
93         case T_INTLIT:
94         case T_FLOATLIT:
95         case T_FIXEDPOINTLIT:
96         case T_PP_NUMBER:
97             return true;
98         }
99         return false;
100     }
101 // T_FLOATLIT
102     inline bool
handle_floatlit(boost::wave::token_id prev,boost::wave::token_id)103     handle_floatlit(boost::wave::token_id prev,
104         boost::wave::token_id /*before*/)
105     {
106         using namespace boost::wave;
107         switch (static_cast<unsigned int>(prev)) {
108         case T_IDENTIFIER:
109         case T_NONREPLACABLE_IDENTIFIER:
110         case T_INTLIT:
111         case T_FLOATLIT:
112         case T_FIXEDPOINTLIT:
113         case T_PP_NUMBER:
114             return true;
115         }
116         return false;
117     }
118 // <% T_LEFTBRACE
119     inline bool
handle_alt_leftbrace(boost::wave::token_id prev,boost::wave::token_id)120     handle_alt_leftbrace(boost::wave::token_id prev,
121         boost::wave::token_id /*before*/)
122     {
123         using namespace boost::wave;
124         switch (static_cast<unsigned int>(prev)) {
125         case T_LESS:        // <<%
126         case T_SHIFTLEFT:   // <<<%
127             return true;
128         }
129         return false;
130     }
131 // <: T_LEFTBRACKET
132     inline bool
handle_alt_leftbracket(boost::wave::token_id prev,boost::wave::token_id)133     handle_alt_leftbracket(boost::wave::token_id prev,
134         boost::wave::token_id /*before*/)
135     {
136         using namespace boost::wave;
137         switch (static_cast<unsigned int>(prev)) {
138         case T_LESS:        // <<:
139         case T_SHIFTLEFT:   // <<<:
140             return true;
141         }
142         return false;
143     }
144 // T_FIXEDPOINTLIT
145     inline bool
handle_fixedpointlit(boost::wave::token_id prev,boost::wave::token_id)146     handle_fixedpointlit(boost::wave::token_id prev,
147         boost::wave::token_id /*before*/)
148     {
149         using namespace boost::wave;
150         switch (static_cast<unsigned int>(prev)) {
151         case T_IDENTIFIER:
152         case T_NONREPLACABLE_IDENTIFIER:
153         case T_INTLIT:
154         case T_FLOATLIT:
155         case T_FIXEDPOINTLIT:
156         case T_PP_NUMBER:
157             return true;
158         }
159         return false;
160     }
161 // T_DOT
162     inline bool
handle_dot(boost::wave::token_id prev,boost::wave::token_id before)163     handle_dot(boost::wave::token_id prev, boost::wave::token_id before)
164     {
165         using namespace boost::wave;
166         switch (static_cast<unsigned int>(prev)) {
167         case T_DOT:
168             if (T_DOT == before)
169                 return true;    // ...
170             break;
171         }
172         return false;
173     }
174 // T_QUESTION_MARK
175     inline bool
handle_questionmark(boost::wave::token_id prev,boost::wave::token_id)176     handle_questionmark(boost::wave::token_id prev,
177         boost::wave::token_id /*before*/)
178     {
179         using namespace boost::wave;
180         switch(static_cast<unsigned int>(prev)) {
181         case TOKEN_FROM_ID('\\', UnknownTokenType):     // \?
182         case T_QUESTION_MARK:   // ??
183             return true;
184         }
185         return false;
186     }
187 // T_NEWLINE
188     inline bool
handle_newline(boost::wave::token_id prev,boost::wave::token_id before)189     handle_newline(boost::wave::token_id prev,
190         boost::wave::token_id before)
191     {
192         using namespace boost::wave;
193         switch(static_cast<unsigned int>(prev)) {
194         case TOKEN_FROM_ID('\\', UnknownTokenType): // \ \n
195         case T_DIVIDE:
196             if (T_QUESTION_MARK == before)
197                 return true;    // ?/\n     // may be \\n
198             break;
199         }
200         return false;
201     }
202 
203     inline bool
handle_parens(boost::wave::token_id prev)204     handle_parens(boost::wave::token_id prev)
205     {
206         switch (static_cast<unsigned int>(prev)) {
207         case T_LEFTPAREN:
208         case T_RIGHTPAREN:
209         case T_LEFTBRACKET:
210         case T_RIGHTBRACKET:
211         case T_LEFTBRACE:
212         case T_RIGHTBRACE:
213         case T_SEMICOLON:
214         case T_COMMA:
215         case T_COLON:
216             // no insertion between parens/brackets/braces and operators
217             return false;
218 
219         default:
220             break;
221         }
222         return true;
223     }
224 
225 }   // namespace impl
226 
227 class insert_whitespace_detection
228 {
229 public:
insert_whitespace_detection(bool insert_whitespace_=true)230     insert_whitespace_detection(bool insert_whitespace_ = true)
231     :   insert_whitespace(insert_whitespace_),
232         prev(boost::wave::T_EOF), beforeprev(boost::wave::T_EOF)
233     {}
234 
235     template <typename StringT>
must_insert(boost::wave::token_id current,StringT const & value)236     bool must_insert(boost::wave::token_id current, StringT const &value)
237     {
238         if (!insert_whitespace)
239             return false;       // skip whitespace insertion alltogether
240 
241         using namespace boost::wave;
242         switch (static_cast<unsigned int>(current)) {
243         case T_NONREPLACABLE_IDENTIFIER:
244         case T_IDENTIFIER:
245             return impl::handle_identifier(prev, beforeprev, value);
246         case T_PP_NUMBER:
247         case T_INTLIT:
248             return impl::handle_intlit(prev, beforeprev);
249         case T_FLOATLIT:
250             return impl::handle_floatlit(prev, beforeprev);
251         case T_STRINGLIT:
252             if (TOKEN_FROM_ID('L', IdentifierTokenType) == prev)       // 'L'
253                 return true;
254             break;
255         case T_LEFTBRACE_ALT:
256             return impl::handle_alt_leftbrace(prev, beforeprev);
257         case T_LEFTBRACKET_ALT:
258             return impl::handle_alt_leftbracket(prev, beforeprev);
259         case T_FIXEDPOINTLIT:
260             return impl::handle_fixedpointlit(prev, beforeprev);
261         case T_DOT:
262             return impl::handle_dot(prev, beforeprev);
263         case T_QUESTION_MARK:
264             return impl::handle_questionmark(prev, beforeprev);
265         case T_NEWLINE:
266             return impl::handle_newline(prev, beforeprev);
267 
268         case T_LEFTPAREN:
269         case T_RIGHTPAREN:
270         case T_LEFTBRACKET:
271         case T_RIGHTBRACKET:
272         case T_SEMICOLON:
273         case T_COMMA:
274         case T_COLON:
275             switch (static_cast<unsigned int>(prev)) {
276             case T_LEFTPAREN:
277             case T_RIGHTPAREN:
278             case T_LEFTBRACKET:
279             case T_RIGHTBRACKET:
280             case T_LEFTBRACE:
281             case T_RIGHTBRACE:
282                 return false;   // no insertion between parens/brackets/braces
283 
284             default:
285                 if (IS_CATEGORY(prev, OperatorTokenType))
286                     return false;
287                 break;
288             }
289             break;
290 
291         case T_LEFTBRACE:
292         case T_RIGHTBRACE:
293             switch (static_cast<unsigned int>(prev)) {
294             case T_LEFTPAREN:
295             case T_RIGHTPAREN:
296             case T_LEFTBRACKET:
297             case T_RIGHTBRACKET:
298             case T_LEFTBRACE:
299             case T_RIGHTBRACE:
300             case T_SEMICOLON:
301             case T_COMMA:
302             case T_COLON:
303                 return false;   // no insertion between parens/brackets/braces
304 
305             case T_QUESTION_MARK:
306                 if (T_QUESTION_MARK == beforeprev)
307                     return true;
308                 if (IS_CATEGORY(prev, OperatorTokenType))
309                     return false;
310                 break;
311 
312             default:
313                 break;
314             }
315             break;
316 
317         case T_MINUS:
318         case T_MINUSMINUS:
319         case T_MINUSASSIGN:
320             if (T_MINUS == prev || T_MINUSMINUS == prev)
321                 return true;
322             if (!impl::handle_parens(prev))
323                 return false;
324             if (T_QUESTION_MARK == prev && T_QUESTION_MARK == beforeprev)
325                 return true;
326             break;
327 
328         case T_PLUS:
329         case T_PLUSPLUS:
330         case T_PLUSASSIGN:
331             if (T_PLUS == prev || T_PLUSPLUS == prev)
332                 return true;
333             if (!impl::handle_parens(prev))
334                 return false;
335             if (T_QUESTION_MARK == prev && T_QUESTION_MARK == beforeprev)
336                 return true;
337             break;
338 
339         case T_DIVIDE:
340         case T_DIVIDEASSIGN:
341             if (T_DIVIDE == prev)
342                 return true;
343             if (!impl::handle_parens(prev))
344                 return false;
345             if (T_QUESTION_MARK == prev && T_QUESTION_MARK == beforeprev)
346                 return true;
347             break;
348 
349         case T_EQUAL:
350         case T_ASSIGN:
351             switch (static_cast<unsigned int>(prev)) {
352             case T_PLUSASSIGN:
353             case T_MINUSASSIGN:
354             case T_DIVIDEASSIGN:
355             case T_STARASSIGN:
356             case T_SHIFTRIGHTASSIGN:
357             case T_SHIFTLEFTASSIGN:
358             case T_EQUAL:
359             case T_NOTEQUAL:
360             case T_LESSEQUAL:
361             case T_GREATEREQUAL:
362             case T_LESS:
363             case T_GREATER:
364             case T_PLUS:
365             case T_MINUS:
366             case T_STAR:
367             case T_DIVIDE:
368             case T_ORASSIGN:
369             case T_ANDASSIGN:
370             case T_XORASSIGN:
371             case T_OR:
372             case T_AND:
373             case T_XOR:
374             case T_OROR:
375             case T_ANDAND:
376                 return true;
377 
378             case T_QUESTION_MARK:
379                 if (T_QUESTION_MARK == beforeprev)
380                     return true;
381                 break;
382 
383             default:
384                 if (!impl::handle_parens(prev))
385                     return false;
386                 break;
387             }
388             break;
389 
390         case T_GREATER:
391             if (T_MINUS == prev || T_GREATER == prev)
392                 return true;    // prevent -> or >>
393             if (!impl::handle_parens(prev))
394                 return false;
395             if (T_QUESTION_MARK == prev && T_QUESTION_MARK == beforeprev)
396                 return true;
397             break;
398 
399         case T_LESS:
400             if (T_LESS == prev)
401                 return true;    // prevent <<
402             // fall through
403         case T_CHARLIT:
404         case T_NOT:
405         case T_NOTEQUAL:
406             if (!impl::handle_parens(prev))
407                 return false;
408             if (T_QUESTION_MARK == prev && T_QUESTION_MARK == beforeprev)
409                 return true;
410             break;
411 
412         case T_AND:
413         case T_ANDAND:
414             if (!impl::handle_parens(prev))
415                 return false;
416             if (T_AND == prev || T_ANDAND == prev)
417                 return true;
418             break;
419 
420         case T_OR:
421             if (!impl::handle_parens(prev))
422                 return false;
423             if (T_OR == prev)
424                 return true;
425             break;
426 
427         case T_XOR:
428             if (!impl::handle_parens(prev))
429                 return false;
430             if (T_XOR == prev)
431                 return true;
432             break;
433 
434         case T_COMPL_ALT:
435         case T_OR_ALT:
436         case T_AND_ALT:
437         case T_NOT_ALT:
438         case T_XOR_ALT:
439         case T_ANDASSIGN_ALT:
440         case T_ORASSIGN_ALT:
441         case T_XORASSIGN_ALT:
442         case T_NOTEQUAL_ALT:
443             switch (static_cast<unsigned int>(prev)) {
444             case T_LEFTPAREN:
445             case T_RIGHTPAREN:
446             case T_LEFTBRACKET:
447             case T_RIGHTBRACKET:
448             case T_LEFTBRACE:
449             case T_RIGHTBRACE:
450             case T_SEMICOLON:
451             case T_COMMA:
452             case T_COLON:
453                 // no insertion between parens/brackets/braces and operators
454                 return false;
455 
456             case T_IDENTIFIER:
457                 if (T_NONREPLACABLE_IDENTIFIER == prev ||
458                     IS_CATEGORY(prev, KeywordTokenType))
459                 {
460                     return true;
461                 }
462                 break;
463 
464             default:
465                 break;
466             }
467             break;
468 
469         case T_STAR:
470             if (T_STAR == prev)
471                 return false;     // '*****' do not need to be separated
472             if (T_GREATER== prev &&
473                 (T_MINUS == beforeprev || T_MINUSMINUS == beforeprev)
474                )
475             {
476                 return true;    // prevent ->*
477             }
478             break;
479 
480         case T_POUND:
481             if (T_POUND == prev)
482                 return true;
483             break;
484         }
485 
486     // FIXME: else, handle operators separately (will catch to many cases)
487 //         if (IS_CATEGORY(current, OperatorTokenType) &&
488 //             IS_CATEGORY(prev, OperatorTokenType))
489 //         {
490 //             return true;    // operators must be delimited always
491 //         }
492         return false;
493     }
shift_tokens(boost::wave::token_id next_id)494     void shift_tokens (boost::wave::token_id next_id)
495     {
496         if (insert_whitespace) {
497             beforeprev = prev;
498             prev = next_id;
499         }
500     }
501 
502 private:
503     bool insert_whitespace;            // enable this component
504     boost::wave::token_id prev;        // the previous analyzed token
505     boost::wave::token_id beforeprev;  // the token before the previous
506 };
507 
508 ///////////////////////////////////////////////////////////////////////////////
509 }   //  namespace util
510 }   //  namespace wave
511 }   //  namespace boost
512 
513 // the suffix header occurs after all of the code
514 #ifdef BOOST_HAS_ABI_HEADERS
515 #include BOOST_ABI_SUFFIX
516 #endif
517 
518 #endif // !defined(INSERT_WHITESPACE_DETECTION_HPP_765EF77B_0513_4967_BDD6_6A38148C4C96_INCLUDED)
519