1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; either version 2 of the License, or
6    (at your option) any later version.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 
18 #include "udm_config.h"
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/types.h>
24 
25 #include "udm_common.h"
26 #include "udm_utils.h"
27 #include "udm_lex.h"
28 #include "udm_ctype.h"
29 
30 
UdmLex2str(udm_lex_t lex)31 const char *UdmLex2str(udm_lex_t lex)
32 {
33   switch (lex)
34   {
35     case UDM_LEX_EOF:       return "EOF";
36     case UDM_LEX_STRING :   return "STRING";
37     case UDM_LEX_ESCAPED_STRING :   return "ESTRING";
38     case UDM_LEX_CHAR_LITERAL:      return "CHAR";
39     case UDM_LEX_IDENT:     return "IDENT";
40     case UDM_LEX_EQ:        return "'='";
41     case UDM_LEX_LT:        return "'<'";
42     case UDM_LEX_GT:        return "'>'";
43     case UDM_LEX_EQ_EQ:     return "'=='";
44     case UDM_LEX_NOT_EQ:    return "'!='";
45     case UDM_LEX_LE:        return "'<='";
46     case UDM_LEX_GE:        return "'>='";
47     case UDM_LEX_LB:        return "'['";
48     case UDM_LEX_RB:        return "']'";
49     case UDM_LEX_SLASH:     return "'/'";
50     case UDM_LEX_TEXT:      return "TEXT";
51     case UDM_LEX_UINT:      return "UINT";
52     case UDM_LEX_QUESTION:  return "'?'";
53     case UDM_LEX_EXCLAM:    return "'!'";
54     case UDM_LEX_LP:        return "'('";
55     case UDM_LEX_RP:        return "')'";
56     case UDM_LEX_PLUS:      return "'+'";
57     case UDM_LEX_MINUS:     return "'-'";
58     case UDM_LEX_ASTERISK:  return "'*'";
59     case UDM_LEX_HASH:      return "'#'";
60     case UDM_LEX_DOLLAR:    return "'$'";
61     case UDM_LEX_PERCENT:   return "'%'";
62     case UDM_LEX_AMPERSAND: return "'%'";
63     case UDM_LEX_COMMA:     return "','";
64     case UDM_LEX_DOT:       return "'.'";
65     case UDM_LEX_COLON:     return "':'";
66     case UDM_LEX_SEMICOLON: return "';'";
67     case UDM_LEX_AT:        return "'@'";
68     case UDM_LEX_CARET:     return "'^'";
69     case UDM_LEX_LCB:       return "'{'";
70     case UDM_LEX_RCB:       return "'}'";
71     case UDM_LEX_VBAR:      return "'|'";
72     case UDM_LEX_TILDE:     return "'~'";
73     case UDM_LEX_INC:       return "++";
74     case UDM_LEX_DEC:       return "--";
75     case UDM_LEX_MUL_EQ:    return "*=";
76     case UDM_LEX_DIV_EQ:    return "/=";
77     case UDM_LEX_REM_EQ:    return "%=";
78     case UDM_LEX_INC_EQ:    return "+=";
79     case UDM_LEX_DEC_EQ:    return "+=";
80     case UDM_LEX_AND_EQ:    return "&=";
81     case UDM_LEX_OR_EQ:     return "|=";
82     case UDM_LEX_XOR_EQ:    return "&=";
83     case UDM_LEX_BOOL_OR:   return "||";
84     case UDM_LEX_BOOL_AND:  return "||";
85     case UDM_LEX_LSHIFT:    return "<<";
86     case UDM_LEX_RSHIFT:    return ">>";
87     case UDM_LEX_LSHIFT_EQ: return "<<=";
88     case UDM_LEX_RSHIFT_EQ: return ">>=";
89     case UDM_LEX_IF:        return "if";
90     case UDM_LEX_ELSE:      return "else";
91     case UDM_LEX_WHILE:     return "while";
92     case UDM_LEX_DO:        return "do";
93     case UDM_LEX_FOR:       return "for";
94     case UDM_LEX_BREAK:     return "break";
95     case UDM_LEX_CONTINUE:  return "continue";
96     case UDM_LEX_SIZEOF:    return "sizeof";
97 
98     case UDM_LEX_AUTO:      return "auto";
99     case UDM_LEX_CASE:      return "case";
100     case UDM_LEX_CONST:     return "const";
101     case UDM_LEX_DEFAULT:   return "default";
102     case UDM_LEX_ENUM:      return "enum";
103     case UDM_LEX_EXTERN:    return "extern";
104     case UDM_LEX_GOTO:      return "goto";
105     case UDM_LEX_REGISTER:  return "register";
106     case UDM_LEX_RETURN:    return "return";
107     case UDM_LEX_STATIC:    return "static";
108     case UDM_LEX_STRUCT:    return "struct";
109     case UDM_LEX_SWITCH:    return "switch";
110     case UDM_LEX_TYPEDEF:   return "typedef";
111     case UDM_LEX_UNION:     return "union";
112     case UDM_LEX_VOLATILE:  return "volatile";
113 
114     case UDM_LEX_CHAR:      return "char";
115     case UDM_LEX_DOUBLE:    return "double";
116     case UDM_LEX_FLOAT:     return "float";
117     case UDM_LEX_INT:       return "int";
118     case UDM_LEX_LONG:      return "long";
119     case UDM_LEX_SHORT:     return "short";
120     case UDM_LEX_SIGNED:    return "signed";
121     case UDM_LEX_UNSIGNED:  return "unsigned";
122 
123     case UDM_LEX_OPERATOR:  return "operator";
124 
125     case UDM_LEX_COUT:      return "cout";
126     case UDM_LEX_UNKNOWN:   return "UNKNOWN";
127     case UDM_LEX_COMMENT:   return "COMMENT";
128   }
129   return "UNKNOWN";
130 }
131 
132 
UdmLexScannerInit(UDM_LEX_SCANNER * s,const char * str,size_t length)133 void UdmLexScannerInit(UDM_LEX_SCANNER *s, const char *str, size_t length)
134 {
135   s->cur= str;
136   UdmConstTokenSet(&s->content, str, length);
137 }
138 
139 
140 void
UdmLexScannerSkipSpaces(UDM_LEX_SCANNER * s)141 UdmLexScannerSkipSpaces(UDM_LEX_SCANNER *s)
142 {
143   for ( ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
144   {
145     if (!UdmLexScannerIsSpace(UdmLexScannerCur(s)[0]))
146       break;
147   }
148 }
149 
150 
151 static inline udm_bool_t
UdmIsIdentStart(int x)152 UdmIsIdentStart(int x)
153 {
154   return ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') || x == '_');
155 }
156 
157 
158 static inline udm_bool_t
UdmIsIdentBody(int x)159 UdmIsIdentBody(int x)
160 {
161   return UdmIsIdentStart(x) || (x >= '0' && x <= '9');
162 }
163 
164 
165 
166 static udm_bool_t
UdmLexScannerScanKeywordOrOperator(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a,const char * str,size_t length,udm_lex_t type,udm_bool_t is_keyword)167 UdmLexScannerScanKeywordOrOperator(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a,
168                                    const char *str, size_t length,
169                                    udm_lex_t type,
170                                    udm_bool_t is_keyword)
171 {
172   if (UdmLexScannerCur(s) + length + 1 > UdmLexScannerEnd(s) ||
173       memcmp(UdmLexScannerCur(s), str, length) ||
174       (is_keyword && UdmIsIdentBody(UdmLexScannerCur(s)[length])))
175     return UDM_TRUE;
176   for ( ; length; length--)
177     UdmLexScannerShift(s);
178   a->token.end= UdmLexScannerCur(s);
179   a->type= type;
180   return UDM_FALSE;
181 }
182 
183 
184 static udm_bool_t
UdmLexScannerScanOperator2(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a,const char * str,size_t length,udm_lex_t type)185 UdmLexScannerScanOperator2(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a,
186                            const char *str, size_t length,
187                            udm_lex_t type)
188 {
189   return UdmLexScannerScanKeywordOrOperator(s, a, str, length, type, UDM_FALSE);
190 }
191 
192 
193 static udm_bool_t
UdmLexScannerScanOneKeyword(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a,const char * str,size_t length,udm_lex_t type)194 UdmLexScannerScanOneKeyword(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a,
195                          const char *str, size_t length,
196                          udm_lex_t type)
197 {
198   return UdmLexScannerScanKeywordOrOperator(s, a, str, length, type, UDM_TRUE);
199 }
200 
201 
202 /*
203   Used keywords:
204   --------------
205   break
206   continue
207   do
208   else
209   for
210   if
211   sizeof
212   while
213 
214   C remaining keywords:
215   --------------------
216   char
217   double
218   float
219   int
220   long
221   short
222   signed
223   unsigned
224   void
225 
226   Unused keywords
227   ---------------
228   auto
229   case
230   const
231   default
232   enum
233   extern
234   goto
235   register
236   return
237   static
238   struct
239   switch
240   typedef
241   union
242   volatile
243 
244   C++ used keywords:
245   ------------------
246   operator
247 
248   C++ remaining keywords:
249   -----------------------
250 
251   alignas (since C++11)
252   alignof (since C++11)
253   and
254   and_eq
255   asm
256   auto(1)
257   bitand
258   bitor
259   bool
260   case
261   catch
262   char
263   char16_t (since C++11)
264   char32_t (since C++11)
265   class
266   compl
267   concept (concepts TS)
268   const
269   constexpr (since C++11)
270   const_cast
271   decltype (since C++11)
272   default(1)
273   delete(1)
274   double
275   dynamic_cast
276   enum
277   explicit
278   export(1)
279   extern
280   false
281   float
282   friend
283   goto
284   inline
285   int
286   long
287   mutable
288   namespace
289   new
290   noexcept (since C++11)
291   not
292   not_eq
293   nullptr (since C++11)
294   or
295   or_eq
296   private
297   protected
298   public
299   register
300   reinterpret_cast
301   requires (concepts TS)
302   return
303   short
304   signed
305   static
306   static_assert (since C++11)
307   static_cast
308   struct
309   switch
310   template
311   this
312   thread_local (since C++11)
313   throw
314   true
315   try
316   typedef
317   typeid
318   typename
319   union
320   unsigned
321   using(1)
322   virtual
323   void
324   volatile
325   wchar_t
326   xor
327   xor_eq
328 */
329 static udm_bool_t
UdmLexScannerScanKeyword(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)330 UdmLexScannerScanKeyword(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
331 {
332   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("if"), UDM_LEX_IF))
333     return UDM_FALSE;
334   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("else"), UDM_LEX_ELSE))
335     return UDM_FALSE;
336   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("while"), UDM_LEX_WHILE))
337     return UDM_FALSE;
338   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("do"), UDM_LEX_DO))
339     return UDM_FALSE;
340   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("for"), UDM_LEX_FOR))
341     return UDM_FALSE;
342   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("break"), UDM_LEX_BREAK))
343     return UDM_FALSE;
344   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("sizeof"), UDM_LEX_SIZEOF))
345     return UDM_FALSE;
346   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("continue"), UDM_LEX_CONTINUE))
347     return UDM_FALSE;
348   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("cout"), UDM_LEX_COUT))
349     return UDM_FALSE;
350 
351   /* Unused keywords */
352   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("auto"), UDM_LEX_AUTO))
353     return UDM_FALSE;
354   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("case"), UDM_LEX_CASE))
355     return UDM_FALSE;
356   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("const"), UDM_LEX_CONST))
357     return UDM_FALSE;
358   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("default"), UDM_LEX_DEFAULT))
359     return UDM_FALSE;
360   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("enum"), UDM_LEX_ENUM))
361     return UDM_FALSE;
362   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("extern"), UDM_LEX_EXTERN))
363     return UDM_FALSE;
364   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("goto"), UDM_LEX_GOTO))
365     return UDM_FALSE;
366   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("register"), UDM_LEX_REGISTER))
367     return UDM_FALSE;
368   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("return"), UDM_LEX_RETURN))
369     return UDM_FALSE;
370   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("static"), UDM_LEX_STATIC))
371     return UDM_FALSE;
372   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("struct"), UDM_LEX_STRUCT))
373     return UDM_FALSE;
374   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("switch"), UDM_LEX_SWITCH))
375     return UDM_FALSE;
376   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("typedef"), UDM_LEX_TYPEDEF))
377     return UDM_FALSE;
378   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("union"), UDM_LEX_UNION))
379     return UDM_FALSE;
380   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("volatile"), UDM_LEX_VOLATILE))
381     return UDM_FALSE;
382 
383   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("char"), UDM_LEX_CHAR))
384     return UDM_FALSE;
385   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("double"), UDM_LEX_DOUBLE))
386     return UDM_FALSE;
387   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("float"), UDM_LEX_FLOAT))
388     return UDM_FALSE;
389   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("int"), UDM_LEX_INT))
390     return UDM_FALSE;
391   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("long"), UDM_LEX_LONG))
392     return UDM_FALSE;
393   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("short"), UDM_LEX_SHORT))
394     return UDM_FALSE;
395   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("signed"), UDM_LEX_SIGNED))
396     return UDM_FALSE;
397   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("unsigned"), UDM_LEX_UNSIGNED))
398     return UDM_FALSE;
399 
400   if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("operator"), UDM_LEX_OPERATOR))
401     return UDM_FALSE;
402 
403   return UDM_TRUE;
404 }
405 
406 
407 udm_bool_t
UdmLexScannerScanCOperator(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)408 UdmLexScannerScanCOperator(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
409 {
410   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("=="), UDM_LEX_EQ_EQ))
411     return UDM_FALSE;
412   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("!="), UDM_LEX_NOT_EQ))
413     return UDM_FALSE;
414   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("++"), UDM_LEX_INC))
415     return UDM_FALSE;
416   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("--"), UDM_LEX_DEC))
417     return UDM_FALSE;
418   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("*="), UDM_LEX_MUL_EQ))
419     return UDM_FALSE;
420   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("/="), UDM_LEX_DIV_EQ))
421     return UDM_FALSE;
422   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("%="), UDM_LEX_REM_EQ))
423     return UDM_FALSE;
424   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("+="), UDM_LEX_INC_EQ))
425     return UDM_FALSE;
426   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("-="), UDM_LEX_DEC_EQ))
427     return UDM_FALSE;
428   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("&="), UDM_LEX_AND_EQ))
429     return UDM_FALSE;
430   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("|="), UDM_LEX_OR_EQ))
431     return UDM_FALSE;
432   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("||"), UDM_LEX_BOOL_OR))
433     return UDM_FALSE;
434   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("&&"), UDM_LEX_BOOL_AND))
435     return UDM_FALSE;
436   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("^="), UDM_LEX_XOR_EQ))
437     return UDM_FALSE;
438   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("<<="), UDM_LEX_LSHIFT_EQ))
439     return UDM_FALSE;
440   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("<<"), UDM_LEX_LSHIFT))
441     return UDM_FALSE;
442   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN(">>="), UDM_LEX_RSHIFT_EQ))
443     return UDM_FALSE;
444   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN(">>"), UDM_LEX_RSHIFT))
445     return UDM_FALSE;
446   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("<="), UDM_LEX_LE))
447     return UDM_FALSE;
448   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN(">="), UDM_LEX_GE))
449     return UDM_FALSE;
450   if (!UdmLexScannerScanKeyword(s, a))
451     return UDM_FALSE;
452   return UDM_TRUE;
453 }
454 
455 
456 /*
457   Simple tokens, consisting on one character.
458 */
459 udm_bool_t
UdmLexScannerScanPunctuation(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)460 UdmLexScannerScanPunctuation(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
461 {
462   switch (UdmLexScannerCur(s)[0])
463   {
464   case UDM_LEX_EXCLAM:        /*  '!'   0x21 */
465   case UDM_LEX_HASH:          /*  '#'   0x23 */
466   case UDM_LEX_DOLLAR:        /*  '$'   0x24 */
467   case UDM_LEX_PERCENT:       /*  '%'   0x25 */
468   case UDM_LEX_AMPERSAND:     /*  '&'   0x26 */
469   case UDM_LEX_LP:            /*  '('   0x28 */
470   case UDM_LEX_RP:            /*  ')'   0x29 */
471   case UDM_LEX_ASTERISK:      /*  '*'   0x2A */
472   case UDM_LEX_PLUS:          /*  '+'   0x2B */
473   case UDM_LEX_COMMA:         /*  ','   0x2C */
474   case UDM_LEX_MINUS:         /*  '-'   0x2D */
475   case UDM_LEX_DOT:           /*  '.'   0x2E */
476   case UDM_LEX_SLASH:         /*  '/'   0x2F */
477   case UDM_LEX_COLON:         /*  ':'   0x3A */
478   case UDM_LEX_SEMICOLON:     /*  ';'   0x3B */
479   case UDM_LEX_LT:            /*  '<'   0x3C */
480   case UDM_LEX_EQ:            /*  '='   0x3D */
481   case UDM_LEX_GT:            /*  '>'   0x3E */
482   case UDM_LEX_QUESTION:      /*  '?'   0x3F */
483   case UDM_LEX_AT:            /*  '@'   0x40 */
484   case UDM_LEX_LB:            /*  '['   0x5B */
485   case UDM_LEX_RB:            /*  ']'   0x5D */
486   case UDM_LEX_CARET:         /*  '^'   0x5E */
487   case UDM_LEX_LCB:           /*  '{'   0x7B */
488   case UDM_LEX_VBAR:          /*  '|'   0x7C */
489   case UDM_LEX_RCB:           /*  '}'   0x7D */
490   case UDM_LEX_TILDE:         /*  '~'   0x7E */
491     UdmLexScannerShift(s);
492     a->token.end= UdmLexScannerCur(s);
493     a->type= (udm_lex_t) a->token.str[0];
494     return UDM_FALSE;
495   default:
496     break;
497   }
498   return UDM_TRUE;
499 }
500 
501 
502 static udm_bool_t
UdmLexScannerScanStringInternal(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a,udm_bool_t escaped)503 UdmLexScannerScanStringInternal(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a,
504                                 udm_bool_t escaped)
505 {
506   UDM_ASSERT(!UdmLexScannerEOF(s));
507   if (UdmLexScannerCur(s)[0] != '"' /*&& UdmLexScannerCur(s)[0] != '\''*/)
508     return UDM_TRUE;
509 
510   a->type= UDM_LEX_STRING;
511   UdmLexScannerShift(s);
512   for( ; !UdmLexScannerEOF(s); UdmLexScannerShift(s))
513   {
514     if (escaped && UdmLexScannerCur(s)[0] == '\\')
515     {
516       UdmLexScannerShift(s);
517       a->type= UDM_LEX_ESCAPED_STRING;
518       continue;
519     }
520     if (UdmLexScannerCur(s)[0] == a->token.str[0])
521       break;
522   }
523   a->token.end= UdmLexScannerCur(s);
524   if (a->token.str[0] == UdmLexScannerCur(s)[0])
525     UdmLexScannerShift(s);
526   a->token.str++;
527   return UDM_FALSE;
528 }
529 
530 
531 udm_bool_t
UdmLexScannerScanString(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)532 UdmLexScannerScanString(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
533 {
534   return UdmLexScannerScanStringInternal(s, a, UDM_FALSE);
535 }
536 
537 
538 udm_bool_t
UdmLexScannerScanEscapedString(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)539 UdmLexScannerScanEscapedString(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
540 {
541   return UdmLexScannerScanStringInternal(s, a, UDM_TRUE);
542 }
543 
544 udm_bool_t
UdmLexScannerScanChar(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)545 UdmLexScannerScanChar(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
546 {
547   UDM_ASSERT(!UdmLexScannerEOF(s));
548   if (UdmLexScannerCur(s)[0] != '\'')
549     return UDM_TRUE;
550 
551   UdmLexScannerShift(s);
552   for( ; !UdmLexScannerEOF(s); UdmLexScannerShift(s))
553   {
554     if (UdmLexScannerCur(s)[0] == '\\')
555     {
556       UdmLexScannerShift(s);
557       continue;
558     }
559     if (UdmLexScannerCur(s)[0] == a->token.str[0])
560       break;
561   }
562   a->token.end= UdmLexScannerCur(s);
563   if (a->token.str[0] == UdmLexScannerCur(s)[0])
564     UdmLexScannerShift(s);
565   a->token.str++;
566   a->type= UDM_LEX_CHAR_LITERAL;
567   return UDM_FALSE;
568 }
569 
570 
571 udm_bool_t
UdmLexScannerScanIdentifier(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)572 UdmLexScannerScanIdentifier(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
573 {
574   UDM_ASSERT(!UdmLexScannerEOF(s));
575 
576   if (!UdmIsIdentStart(UdmLexScannerCur(s)[0]))
577     return UDM_TRUE;
578 
579   for (UdmLexScannerShift(s) ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
580   {
581     if (!UdmIsIdentBody(UdmLexScannerCur(s)[0]))
582       break;
583   }
584   a->token.end= UdmLexScannerCur(s);
585   a->type= UDM_LEX_IDENT;
586   return UDM_FALSE;
587 }
588 
589 
590 udm_bool_t
UdmLexScannerScanUnsignedNumber(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)591 UdmLexScannerScanUnsignedNumber(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
592 {
593   UDM_ASSERT(!UdmLexScannerEOF(s));
594 
595   if (!udm_isdigit(UdmLexScannerCur(s)[0]))
596     return UDM_TRUE;
597 
598   for (UdmLexScannerShift(s) ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
599   {
600     if (!udm_isdigit(UdmLexScannerCur(s)[0]))
601       break;
602   }
603   a->token.end= UdmLexScannerCur(s);
604   a->type= UDM_LEX_UINT; /* TODO34: add ulonglong */
605   return UDM_FALSE;
606 }
607 
608 
609 udm_bool_t
UdmLexScannerScanCComment(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)610 UdmLexScannerScanCComment(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
611 {
612   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("//"), UDM_LEX_COMMENT))
613   {
614     for ( ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
615     {
616       if (UdmLexScannerCur(s)[0] == '\n')
617         break;
618     }
619     a->token.end= UdmLexScannerCur(s);
620     return UDM_FALSE;
621   }
622   if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("/*"), UDM_LEX_COMMENT))
623   {
624     for ( ;  !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
625     {
626       if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("*/"), UDM_LEX_COMMENT))
627         break;
628     }
629     a->token.end= UdmLexScannerCur(s);
630     return UDM_FALSE;
631   }
632   return UDM_TRUE;
633 }
634 
635 
636 /*** XML *******************************************************************/
637 
638 /*
639  NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender
640 */
641 static inline udm_bool_t
UdmIsXMLNameBody(int x)642 UdmIsXMLNameBody(int x)
643 {
644   return ((x >= 'a' && x <= 'z') ||
645           (x >= 'A' && x <= 'Z') ||
646           (x >= '0' && x <= '9') ||
647            x == '.' ||
648            x == '-' ||
649            x == '_' ||
650            x == ':' ||
651            x < 0);
652 }
653 
654 
655 static inline udm_bool_t
UdmIsXMLNameStart(int x)656 UdmIsXMLNameStart(int x)
657 {
658   return ((x >= 'a' && x <= 'z') ||
659           (x >= 'A' && x <= 'Z') ||
660            x == '_' ||
661            x == ':' ||
662            x < 0);
663 }
664 
665 
666 /*
667   Name ::= (Letter | '_' | ':') (NameChar)*
668 */
669 udm_bool_t
UdmLexScannerScanXMLIdentifier(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)670 UdmLexScannerScanXMLIdentifier(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
671 {
672   UDM_ASSERT(!UdmLexScannerEOF(s));
673 
674   if (!UdmIsXMLNameStart(UdmLexScannerCur(s)[0]))
675     return UDM_TRUE;
676 
677   for (UdmLexScannerShift(s) ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
678   {
679     if (!UdmIsXMLNameBody(UdmLexScannerCur(s)[0]))
680       break;
681   }
682   a->token.end= UdmLexScannerCur(s);
683   a->type= UDM_LEX_IDENT;
684   return UDM_FALSE;
685 }
686 
687 
688 /*
689   Scan until "str" is found in the content.
690   Returns UDM_FALSE if the searched string was not found.
691   scanner->token.type is set to "on_error" (usually UDM_LEX_EOF).
692   Returns UDM_TRUE if the searched string was found.
693   scanner->token.type is set to "on_success" (usually UDM_LEX_TEXT).
694 */
695 udm_bool_t
UdmLexScannerScanUntil(UDM_LEX_SCANNER * scanner,const UDM_CONST_STR * str,udm_lex_t on_success,udm_lex_t on_error)696 UdmLexScannerScanUntil(UDM_LEX_SCANNER *scanner,
697                        const UDM_CONST_STR *str,
698                        udm_lex_t on_success,
699                        udm_lex_t on_error)
700 {
701   scanner->token.token.str= scanner->cur;
702   for ( ; scanner->cur < scanner->content.end; scanner->cur++)
703   {
704     size_t length= scanner->content.end - scanner->cur;
705     if (length >= str->length && !memcmp(scanner->cur, str->str, str->length))
706     {
707       scanner->token.type= on_success;
708       scanner->token.token.end= scanner->cur;
709       scanner->cur+= str->length;
710       return UDM_TRUE;
711     }
712   }
713   scanner->token.type= on_error;
714   scanner->token.token.end= scanner->cur;
715   return UDM_FALSE;
716 }
717