1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #include "udm_config.h"
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/types.h>
24
25 #include "udm_common.h"
26 #include "udm_utils.h"
27 #include "udm_lex.h"
28 #include "udm_ctype.h"
29
30
UdmLex2str(udm_lex_t lex)31 const char *UdmLex2str(udm_lex_t lex)
32 {
33 switch (lex)
34 {
35 case UDM_LEX_EOF: return "EOF";
36 case UDM_LEX_STRING : return "STRING";
37 case UDM_LEX_ESCAPED_STRING : return "ESTRING";
38 case UDM_LEX_CHAR_LITERAL: return "CHAR";
39 case UDM_LEX_IDENT: return "IDENT";
40 case UDM_LEX_EQ: return "'='";
41 case UDM_LEX_LT: return "'<'";
42 case UDM_LEX_GT: return "'>'";
43 case UDM_LEX_EQ_EQ: return "'=='";
44 case UDM_LEX_NOT_EQ: return "'!='";
45 case UDM_LEX_LE: return "'<='";
46 case UDM_LEX_GE: return "'>='";
47 case UDM_LEX_LB: return "'['";
48 case UDM_LEX_RB: return "']'";
49 case UDM_LEX_SLASH: return "'/'";
50 case UDM_LEX_TEXT: return "TEXT";
51 case UDM_LEX_UINT: return "UINT";
52 case UDM_LEX_QUESTION: return "'?'";
53 case UDM_LEX_EXCLAM: return "'!'";
54 case UDM_LEX_LP: return "'('";
55 case UDM_LEX_RP: return "')'";
56 case UDM_LEX_PLUS: return "'+'";
57 case UDM_LEX_MINUS: return "'-'";
58 case UDM_LEX_ASTERISK: return "'*'";
59 case UDM_LEX_HASH: return "'#'";
60 case UDM_LEX_DOLLAR: return "'$'";
61 case UDM_LEX_PERCENT: return "'%'";
62 case UDM_LEX_AMPERSAND: return "'%'";
63 case UDM_LEX_COMMA: return "','";
64 case UDM_LEX_DOT: return "'.'";
65 case UDM_LEX_COLON: return "':'";
66 case UDM_LEX_SEMICOLON: return "';'";
67 case UDM_LEX_AT: return "'@'";
68 case UDM_LEX_CARET: return "'^'";
69 case UDM_LEX_LCB: return "'{'";
70 case UDM_LEX_RCB: return "'}'";
71 case UDM_LEX_VBAR: return "'|'";
72 case UDM_LEX_TILDE: return "'~'";
73 case UDM_LEX_INC: return "++";
74 case UDM_LEX_DEC: return "--";
75 case UDM_LEX_MUL_EQ: return "*=";
76 case UDM_LEX_DIV_EQ: return "/=";
77 case UDM_LEX_REM_EQ: return "%=";
78 case UDM_LEX_INC_EQ: return "+=";
79 case UDM_LEX_DEC_EQ: return "+=";
80 case UDM_LEX_AND_EQ: return "&=";
81 case UDM_LEX_OR_EQ: return "|=";
82 case UDM_LEX_XOR_EQ: return "&=";
83 case UDM_LEX_BOOL_OR: return "||";
84 case UDM_LEX_BOOL_AND: return "||";
85 case UDM_LEX_LSHIFT: return "<<";
86 case UDM_LEX_RSHIFT: return ">>";
87 case UDM_LEX_LSHIFT_EQ: return "<<=";
88 case UDM_LEX_RSHIFT_EQ: return ">>=";
89 case UDM_LEX_IF: return "if";
90 case UDM_LEX_ELSE: return "else";
91 case UDM_LEX_WHILE: return "while";
92 case UDM_LEX_DO: return "do";
93 case UDM_LEX_FOR: return "for";
94 case UDM_LEX_BREAK: return "break";
95 case UDM_LEX_CONTINUE: return "continue";
96 case UDM_LEX_SIZEOF: return "sizeof";
97
98 case UDM_LEX_AUTO: return "auto";
99 case UDM_LEX_CASE: return "case";
100 case UDM_LEX_CONST: return "const";
101 case UDM_LEX_DEFAULT: return "default";
102 case UDM_LEX_ENUM: return "enum";
103 case UDM_LEX_EXTERN: return "extern";
104 case UDM_LEX_GOTO: return "goto";
105 case UDM_LEX_REGISTER: return "register";
106 case UDM_LEX_RETURN: return "return";
107 case UDM_LEX_STATIC: return "static";
108 case UDM_LEX_STRUCT: return "struct";
109 case UDM_LEX_SWITCH: return "switch";
110 case UDM_LEX_TYPEDEF: return "typedef";
111 case UDM_LEX_UNION: return "union";
112 case UDM_LEX_VOLATILE: return "volatile";
113
114 case UDM_LEX_CHAR: return "char";
115 case UDM_LEX_DOUBLE: return "double";
116 case UDM_LEX_FLOAT: return "float";
117 case UDM_LEX_INT: return "int";
118 case UDM_LEX_LONG: return "long";
119 case UDM_LEX_SHORT: return "short";
120 case UDM_LEX_SIGNED: return "signed";
121 case UDM_LEX_UNSIGNED: return "unsigned";
122
123 case UDM_LEX_OPERATOR: return "operator";
124
125 case UDM_LEX_COUT: return "cout";
126 case UDM_LEX_UNKNOWN: return "UNKNOWN";
127 case UDM_LEX_COMMENT: return "COMMENT";
128 }
129 return "UNKNOWN";
130 }
131
132
UdmLexScannerInit(UDM_LEX_SCANNER * s,const char * str,size_t length)133 void UdmLexScannerInit(UDM_LEX_SCANNER *s, const char *str, size_t length)
134 {
135 s->cur= str;
136 UdmConstTokenSet(&s->content, str, length);
137 }
138
139
140 void
UdmLexScannerSkipSpaces(UDM_LEX_SCANNER * s)141 UdmLexScannerSkipSpaces(UDM_LEX_SCANNER *s)
142 {
143 for ( ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
144 {
145 if (!UdmLexScannerIsSpace(UdmLexScannerCur(s)[0]))
146 break;
147 }
148 }
149
150
151 static inline udm_bool_t
UdmIsIdentStart(int x)152 UdmIsIdentStart(int x)
153 {
154 return ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') || x == '_');
155 }
156
157
158 static inline udm_bool_t
UdmIsIdentBody(int x)159 UdmIsIdentBody(int x)
160 {
161 return UdmIsIdentStart(x) || (x >= '0' && x <= '9');
162 }
163
164
165
166 static udm_bool_t
UdmLexScannerScanKeywordOrOperator(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a,const char * str,size_t length,udm_lex_t type,udm_bool_t is_keyword)167 UdmLexScannerScanKeywordOrOperator(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a,
168 const char *str, size_t length,
169 udm_lex_t type,
170 udm_bool_t is_keyword)
171 {
172 if (UdmLexScannerCur(s) + length + 1 > UdmLexScannerEnd(s) ||
173 memcmp(UdmLexScannerCur(s), str, length) ||
174 (is_keyword && UdmIsIdentBody(UdmLexScannerCur(s)[length])))
175 return UDM_TRUE;
176 for ( ; length; length--)
177 UdmLexScannerShift(s);
178 a->token.end= UdmLexScannerCur(s);
179 a->type= type;
180 return UDM_FALSE;
181 }
182
183
184 static udm_bool_t
UdmLexScannerScanOperator2(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a,const char * str,size_t length,udm_lex_t type)185 UdmLexScannerScanOperator2(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a,
186 const char *str, size_t length,
187 udm_lex_t type)
188 {
189 return UdmLexScannerScanKeywordOrOperator(s, a, str, length, type, UDM_FALSE);
190 }
191
192
193 static udm_bool_t
UdmLexScannerScanOneKeyword(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a,const char * str,size_t length,udm_lex_t type)194 UdmLexScannerScanOneKeyword(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a,
195 const char *str, size_t length,
196 udm_lex_t type)
197 {
198 return UdmLexScannerScanKeywordOrOperator(s, a, str, length, type, UDM_TRUE);
199 }
200
201
202 /*
203 Used keywords:
204 --------------
205 break
206 continue
207 do
208 else
209 for
210 if
211 sizeof
212 while
213
214 C remaining keywords:
215 --------------------
216 char
217 double
218 float
219 int
220 long
221 short
222 signed
223 unsigned
224 void
225
226 Unused keywords
227 ---------------
228 auto
229 case
230 const
231 default
232 enum
233 extern
234 goto
235 register
236 return
237 static
238 struct
239 switch
240 typedef
241 union
242 volatile
243
244 C++ used keywords:
245 ------------------
246 operator
247
248 C++ remaining keywords:
249 -----------------------
250
251 alignas (since C++11)
252 alignof (since C++11)
253 and
254 and_eq
255 asm
256 auto(1)
257 bitand
258 bitor
259 bool
260 case
261 catch
262 char
263 char16_t (since C++11)
264 char32_t (since C++11)
265 class
266 compl
267 concept (concepts TS)
268 const
269 constexpr (since C++11)
270 const_cast
271 decltype (since C++11)
272 default(1)
273 delete(1)
274 double
275 dynamic_cast
276 enum
277 explicit
278 export(1)
279 extern
280 false
281 float
282 friend
283 goto
284 inline
285 int
286 long
287 mutable
288 namespace
289 new
290 noexcept (since C++11)
291 not
292 not_eq
293 nullptr (since C++11)
294 or
295 or_eq
296 private
297 protected
298 public
299 register
300 reinterpret_cast
301 requires (concepts TS)
302 return
303 short
304 signed
305 static
306 static_assert (since C++11)
307 static_cast
308 struct
309 switch
310 template
311 this
312 thread_local (since C++11)
313 throw
314 true
315 try
316 typedef
317 typeid
318 typename
319 union
320 unsigned
321 using(1)
322 virtual
323 void
324 volatile
325 wchar_t
326 xor
327 xor_eq
328 */
329 static udm_bool_t
UdmLexScannerScanKeyword(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)330 UdmLexScannerScanKeyword(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
331 {
332 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("if"), UDM_LEX_IF))
333 return UDM_FALSE;
334 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("else"), UDM_LEX_ELSE))
335 return UDM_FALSE;
336 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("while"), UDM_LEX_WHILE))
337 return UDM_FALSE;
338 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("do"), UDM_LEX_DO))
339 return UDM_FALSE;
340 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("for"), UDM_LEX_FOR))
341 return UDM_FALSE;
342 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("break"), UDM_LEX_BREAK))
343 return UDM_FALSE;
344 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("sizeof"), UDM_LEX_SIZEOF))
345 return UDM_FALSE;
346 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("continue"), UDM_LEX_CONTINUE))
347 return UDM_FALSE;
348 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("cout"), UDM_LEX_COUT))
349 return UDM_FALSE;
350
351 /* Unused keywords */
352 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("auto"), UDM_LEX_AUTO))
353 return UDM_FALSE;
354 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("case"), UDM_LEX_CASE))
355 return UDM_FALSE;
356 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("const"), UDM_LEX_CONST))
357 return UDM_FALSE;
358 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("default"), UDM_LEX_DEFAULT))
359 return UDM_FALSE;
360 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("enum"), UDM_LEX_ENUM))
361 return UDM_FALSE;
362 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("extern"), UDM_LEX_EXTERN))
363 return UDM_FALSE;
364 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("goto"), UDM_LEX_GOTO))
365 return UDM_FALSE;
366 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("register"), UDM_LEX_REGISTER))
367 return UDM_FALSE;
368 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("return"), UDM_LEX_RETURN))
369 return UDM_FALSE;
370 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("static"), UDM_LEX_STATIC))
371 return UDM_FALSE;
372 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("struct"), UDM_LEX_STRUCT))
373 return UDM_FALSE;
374 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("switch"), UDM_LEX_SWITCH))
375 return UDM_FALSE;
376 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("typedef"), UDM_LEX_TYPEDEF))
377 return UDM_FALSE;
378 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("union"), UDM_LEX_UNION))
379 return UDM_FALSE;
380 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("volatile"), UDM_LEX_VOLATILE))
381 return UDM_FALSE;
382
383 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("char"), UDM_LEX_CHAR))
384 return UDM_FALSE;
385 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("double"), UDM_LEX_DOUBLE))
386 return UDM_FALSE;
387 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("float"), UDM_LEX_FLOAT))
388 return UDM_FALSE;
389 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("int"), UDM_LEX_INT))
390 return UDM_FALSE;
391 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("long"), UDM_LEX_LONG))
392 return UDM_FALSE;
393 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("short"), UDM_LEX_SHORT))
394 return UDM_FALSE;
395 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("signed"), UDM_LEX_SIGNED))
396 return UDM_FALSE;
397 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("unsigned"), UDM_LEX_UNSIGNED))
398 return UDM_FALSE;
399
400 if (!UdmLexScannerScanOneKeyword(s, a, UDM_CSTR_WITH_LEN("operator"), UDM_LEX_OPERATOR))
401 return UDM_FALSE;
402
403 return UDM_TRUE;
404 }
405
406
407 udm_bool_t
UdmLexScannerScanCOperator(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)408 UdmLexScannerScanCOperator(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
409 {
410 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("=="), UDM_LEX_EQ_EQ))
411 return UDM_FALSE;
412 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("!="), UDM_LEX_NOT_EQ))
413 return UDM_FALSE;
414 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("++"), UDM_LEX_INC))
415 return UDM_FALSE;
416 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("--"), UDM_LEX_DEC))
417 return UDM_FALSE;
418 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("*="), UDM_LEX_MUL_EQ))
419 return UDM_FALSE;
420 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("/="), UDM_LEX_DIV_EQ))
421 return UDM_FALSE;
422 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("%="), UDM_LEX_REM_EQ))
423 return UDM_FALSE;
424 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("+="), UDM_LEX_INC_EQ))
425 return UDM_FALSE;
426 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("-="), UDM_LEX_DEC_EQ))
427 return UDM_FALSE;
428 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("&="), UDM_LEX_AND_EQ))
429 return UDM_FALSE;
430 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("|="), UDM_LEX_OR_EQ))
431 return UDM_FALSE;
432 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("||"), UDM_LEX_BOOL_OR))
433 return UDM_FALSE;
434 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("&&"), UDM_LEX_BOOL_AND))
435 return UDM_FALSE;
436 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("^="), UDM_LEX_XOR_EQ))
437 return UDM_FALSE;
438 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("<<="), UDM_LEX_LSHIFT_EQ))
439 return UDM_FALSE;
440 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("<<"), UDM_LEX_LSHIFT))
441 return UDM_FALSE;
442 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN(">>="), UDM_LEX_RSHIFT_EQ))
443 return UDM_FALSE;
444 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN(">>"), UDM_LEX_RSHIFT))
445 return UDM_FALSE;
446 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("<="), UDM_LEX_LE))
447 return UDM_FALSE;
448 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN(">="), UDM_LEX_GE))
449 return UDM_FALSE;
450 if (!UdmLexScannerScanKeyword(s, a))
451 return UDM_FALSE;
452 return UDM_TRUE;
453 }
454
455
456 /*
457 Simple tokens, consisting on one character.
458 */
459 udm_bool_t
UdmLexScannerScanPunctuation(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)460 UdmLexScannerScanPunctuation(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
461 {
462 switch (UdmLexScannerCur(s)[0])
463 {
464 case UDM_LEX_EXCLAM: /* '!' 0x21 */
465 case UDM_LEX_HASH: /* '#' 0x23 */
466 case UDM_LEX_DOLLAR: /* '$' 0x24 */
467 case UDM_LEX_PERCENT: /* '%' 0x25 */
468 case UDM_LEX_AMPERSAND: /* '&' 0x26 */
469 case UDM_LEX_LP: /* '(' 0x28 */
470 case UDM_LEX_RP: /* ')' 0x29 */
471 case UDM_LEX_ASTERISK: /* '*' 0x2A */
472 case UDM_LEX_PLUS: /* '+' 0x2B */
473 case UDM_LEX_COMMA: /* ',' 0x2C */
474 case UDM_LEX_MINUS: /* '-' 0x2D */
475 case UDM_LEX_DOT: /* '.' 0x2E */
476 case UDM_LEX_SLASH: /* '/' 0x2F */
477 case UDM_LEX_COLON: /* ':' 0x3A */
478 case UDM_LEX_SEMICOLON: /* ';' 0x3B */
479 case UDM_LEX_LT: /* '<' 0x3C */
480 case UDM_LEX_EQ: /* '=' 0x3D */
481 case UDM_LEX_GT: /* '>' 0x3E */
482 case UDM_LEX_QUESTION: /* '?' 0x3F */
483 case UDM_LEX_AT: /* '@' 0x40 */
484 case UDM_LEX_LB: /* '[' 0x5B */
485 case UDM_LEX_RB: /* ']' 0x5D */
486 case UDM_LEX_CARET: /* '^' 0x5E */
487 case UDM_LEX_LCB: /* '{' 0x7B */
488 case UDM_LEX_VBAR: /* '|' 0x7C */
489 case UDM_LEX_RCB: /* '}' 0x7D */
490 case UDM_LEX_TILDE: /* '~' 0x7E */
491 UdmLexScannerShift(s);
492 a->token.end= UdmLexScannerCur(s);
493 a->type= (udm_lex_t) a->token.str[0];
494 return UDM_FALSE;
495 default:
496 break;
497 }
498 return UDM_TRUE;
499 }
500
501
502 static udm_bool_t
UdmLexScannerScanStringInternal(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a,udm_bool_t escaped)503 UdmLexScannerScanStringInternal(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a,
504 udm_bool_t escaped)
505 {
506 UDM_ASSERT(!UdmLexScannerEOF(s));
507 if (UdmLexScannerCur(s)[0] != '"' /*&& UdmLexScannerCur(s)[0] != '\''*/)
508 return UDM_TRUE;
509
510 a->type= UDM_LEX_STRING;
511 UdmLexScannerShift(s);
512 for( ; !UdmLexScannerEOF(s); UdmLexScannerShift(s))
513 {
514 if (escaped && UdmLexScannerCur(s)[0] == '\\')
515 {
516 UdmLexScannerShift(s);
517 a->type= UDM_LEX_ESCAPED_STRING;
518 continue;
519 }
520 if (UdmLexScannerCur(s)[0] == a->token.str[0])
521 break;
522 }
523 a->token.end= UdmLexScannerCur(s);
524 if (a->token.str[0] == UdmLexScannerCur(s)[0])
525 UdmLexScannerShift(s);
526 a->token.str++;
527 return UDM_FALSE;
528 }
529
530
531 udm_bool_t
UdmLexScannerScanString(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)532 UdmLexScannerScanString(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
533 {
534 return UdmLexScannerScanStringInternal(s, a, UDM_FALSE);
535 }
536
537
538 udm_bool_t
UdmLexScannerScanEscapedString(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)539 UdmLexScannerScanEscapedString(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
540 {
541 return UdmLexScannerScanStringInternal(s, a, UDM_TRUE);
542 }
543
544 udm_bool_t
UdmLexScannerScanChar(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)545 UdmLexScannerScanChar(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
546 {
547 UDM_ASSERT(!UdmLexScannerEOF(s));
548 if (UdmLexScannerCur(s)[0] != '\'')
549 return UDM_TRUE;
550
551 UdmLexScannerShift(s);
552 for( ; !UdmLexScannerEOF(s); UdmLexScannerShift(s))
553 {
554 if (UdmLexScannerCur(s)[0] == '\\')
555 {
556 UdmLexScannerShift(s);
557 continue;
558 }
559 if (UdmLexScannerCur(s)[0] == a->token.str[0])
560 break;
561 }
562 a->token.end= UdmLexScannerCur(s);
563 if (a->token.str[0] == UdmLexScannerCur(s)[0])
564 UdmLexScannerShift(s);
565 a->token.str++;
566 a->type= UDM_LEX_CHAR_LITERAL;
567 return UDM_FALSE;
568 }
569
570
571 udm_bool_t
UdmLexScannerScanIdentifier(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)572 UdmLexScannerScanIdentifier(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
573 {
574 UDM_ASSERT(!UdmLexScannerEOF(s));
575
576 if (!UdmIsIdentStart(UdmLexScannerCur(s)[0]))
577 return UDM_TRUE;
578
579 for (UdmLexScannerShift(s) ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
580 {
581 if (!UdmIsIdentBody(UdmLexScannerCur(s)[0]))
582 break;
583 }
584 a->token.end= UdmLexScannerCur(s);
585 a->type= UDM_LEX_IDENT;
586 return UDM_FALSE;
587 }
588
589
590 udm_bool_t
UdmLexScannerScanUnsignedNumber(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)591 UdmLexScannerScanUnsignedNumber(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
592 {
593 UDM_ASSERT(!UdmLexScannerEOF(s));
594
595 if (!udm_isdigit(UdmLexScannerCur(s)[0]))
596 return UDM_TRUE;
597
598 for (UdmLexScannerShift(s) ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
599 {
600 if (!udm_isdigit(UdmLexScannerCur(s)[0]))
601 break;
602 }
603 a->token.end= UdmLexScannerCur(s);
604 a->type= UDM_LEX_UINT; /* TODO34: add ulonglong */
605 return UDM_FALSE;
606 }
607
608
609 udm_bool_t
UdmLexScannerScanCComment(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)610 UdmLexScannerScanCComment(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
611 {
612 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("//"), UDM_LEX_COMMENT))
613 {
614 for ( ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
615 {
616 if (UdmLexScannerCur(s)[0] == '\n')
617 break;
618 }
619 a->token.end= UdmLexScannerCur(s);
620 return UDM_FALSE;
621 }
622 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("/*"), UDM_LEX_COMMENT))
623 {
624 for ( ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
625 {
626 if (!UdmLexScannerScanOperator2(s, a, UDM_CSTR_WITH_LEN("*/"), UDM_LEX_COMMENT))
627 break;
628 }
629 a->token.end= UdmLexScannerCur(s);
630 return UDM_FALSE;
631 }
632 return UDM_TRUE;
633 }
634
635
636 /*** XML *******************************************************************/
637
638 /*
639 NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender
640 */
641 static inline udm_bool_t
UdmIsXMLNameBody(int x)642 UdmIsXMLNameBody(int x)
643 {
644 return ((x >= 'a' && x <= 'z') ||
645 (x >= 'A' && x <= 'Z') ||
646 (x >= '0' && x <= '9') ||
647 x == '.' ||
648 x == '-' ||
649 x == '_' ||
650 x == ':' ||
651 x < 0);
652 }
653
654
655 static inline udm_bool_t
UdmIsXMLNameStart(int x)656 UdmIsXMLNameStart(int x)
657 {
658 return ((x >= 'a' && x <= 'z') ||
659 (x >= 'A' && x <= 'Z') ||
660 x == '_' ||
661 x == ':' ||
662 x < 0);
663 }
664
665
666 /*
667 Name ::= (Letter | '_' | ':') (NameChar)*
668 */
669 udm_bool_t
UdmLexScannerScanXMLIdentifier(UDM_LEX_SCANNER * s,UDM_LEX_TOKEN * a)670 UdmLexScannerScanXMLIdentifier(UDM_LEX_SCANNER *s, UDM_LEX_TOKEN *a)
671 {
672 UDM_ASSERT(!UdmLexScannerEOF(s));
673
674 if (!UdmIsXMLNameStart(UdmLexScannerCur(s)[0]))
675 return UDM_TRUE;
676
677 for (UdmLexScannerShift(s) ; !UdmLexScannerEOF(s) ; UdmLexScannerShift(s))
678 {
679 if (!UdmIsXMLNameBody(UdmLexScannerCur(s)[0]))
680 break;
681 }
682 a->token.end= UdmLexScannerCur(s);
683 a->type= UDM_LEX_IDENT;
684 return UDM_FALSE;
685 }
686
687
688 /*
689 Scan until "str" is found in the content.
690 Returns UDM_FALSE if the searched string was not found.
691 scanner->token.type is set to "on_error" (usually UDM_LEX_EOF).
692 Returns UDM_TRUE if the searched string was found.
693 scanner->token.type is set to "on_success" (usually UDM_LEX_TEXT).
694 */
695 udm_bool_t
UdmLexScannerScanUntil(UDM_LEX_SCANNER * scanner,const UDM_CONST_STR * str,udm_lex_t on_success,udm_lex_t on_error)696 UdmLexScannerScanUntil(UDM_LEX_SCANNER *scanner,
697 const UDM_CONST_STR *str,
698 udm_lex_t on_success,
699 udm_lex_t on_error)
700 {
701 scanner->token.token.str= scanner->cur;
702 for ( ; scanner->cur < scanner->content.end; scanner->cur++)
703 {
704 size_t length= scanner->content.end - scanner->cur;
705 if (length >= str->length && !memcmp(scanner->cur, str->str, str->length))
706 {
707 scanner->token.type= on_success;
708 scanner->token.token.end= scanner->cur;
709 scanner->cur+= str->length;
710 return UDM_TRUE;
711 }
712 }
713 scanner->token.type= on_error;
714 scanner->token.token.end= scanner->cur;
715 return UDM_FALSE;
716 }
717