1 /*
2 Copyright (c) 2003, Steve Dekorte
3 All rights reserved. See _BSDLicense.txt.
4
5 Aug 2004 - removed {} from op chars
6 - changed identifier to stop after 1 colon
7 */
8
9 #include "IoLexer.h"
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <stddef.h>
14
15 //#define LEXER_DEBUG
16 //#define LEXER_DEBUG_TOKENS
17 //#define TEST_INLINE inline
18 #define TEST_INLINE
19
IoLexer_currentToken(IoLexer * self)20 static IoToken *IoLexer_currentToken(IoLexer *self)
21 {
22 return List_top(self->tokenStream);
23 }
24
IoLexer_new(void)25 IoLexer *IoLexer_new(void)
26 {
27 IoLexer *self = (IoLexer *)io_calloc(1, sizeof(IoLexer));
28 self->s = (char *)io_calloc(1, 1);
29 self->s[0] = 0;
30 self->posStack = Stack_new();
31 self->tokenStack = Stack_new();
32 self->tokenStream = List_new();
33 self->charLineIndex = List_new();
34 return self;
35 }
36
IoLexer_free(IoLexer * self)37 void IoLexer_free(IoLexer *self)
38 {
39 IoLexer_clear(self);
40 io_free(self->s);
41 Stack_free(self->posStack);
42 Stack_free(self->tokenStack);
43 List_free(self->tokenStream);
44 List_free(self->charLineIndex);
45 if(self->errorDescription) io_free(self->errorDescription);
46 io_free(self);
47 }
48
IoLexer_errorDescription(IoLexer * self)49 char *IoLexer_errorDescription(IoLexer *self)
50 {
51 IoToken *et = IoLexer_errorToken(self);
52
53 if (!self->errorDescription)
54 {
55 self->errorDescription = io_calloc(1, 1024);
56 self->errorDescription[0] = 0;
57 }
58
59 if (et)
60 {
61 sprintf(self->errorDescription,
62 "\"%s\" on line %i character %i",
63 et->error,
64 IoToken_lineNumber(et),
65 IoToken_charNumber(et));
66 }
67
68 return self->errorDescription;
69 }
70
71
IoLexer_buildLineIndex(IoLexer * self)72 void IoLexer_buildLineIndex(IoLexer *self)
73 {
74 char *s = self->s;
75
76 List_removeAll(self->charLineIndex);
77
78 List_append_(self->charLineIndex, s);
79
80 while (*s)
81 {
82 if (*s == '\n')
83 {
84 List_append_(self->charLineIndex, s);
85 }
86 s ++;
87 }
88
89 List_append_(self->charLineIndex, s);
90 self->lineHint = 0;
91 }
92
93 // next/prev character ------------------------
94
95 #define UTF8_SEQLEN(c) ( \
96 (c) < 0x80 ? 1 : \
97 (c) < 0xe0 ? 2 : \
98 (c) < 0xf0 ? 3 : \
99 (c) < 0xf8 ? 4 : \
100 (c) < 0xfc ? 5 : \
101 (c) < 0xfe ? 6 : 1 \
102 )
103 #define INVALID_CHAR 0xfffe
104
_IoLexer_DecodeUTF8(const unsigned char * s)105 static uchar_t _IoLexer_DecodeUTF8(const unsigned char *s)
106 {
107 if (*s < 0x80)
108 return *s;
109 else if (*s < 0xc2)
110 return INVALID_CHAR;
111 else if (*s < 0xe0)
112 {
113 if (!((s[1] ^ 0x80) < 0x40))
114 return INVALID_CHAR;
115 return ((uchar_t)(s[0] & 0x1f) << 6) | (uchar_t)(s[1] ^ 0x80);
116 }
117 else if (*s < 0xf0)
118 {
119 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[0] >= 0xe1 || s[1] >= 0xa0)))
120 return INVALID_CHAR;
121 return ((uchar_t)(s[0] & 0x0f) << 12) | ((uchar_t)(s[1] ^ 0x80) << 6) | (uchar_t)(s[2] ^ 0x80);
122 }
123 else if (*s < 0xf8)
124 {
125 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[0] >= 0xf1 || s[1] >= 0x90)))
126 return INVALID_CHAR;
127 return ((uchar_t)(s[0] & 0x07) << 18) | ((uchar_t)(s[1] ^ 0x80) << 12) | ((uchar_t)(s[2] ^ 0x80) << 6) | (uchar_t)(s[3] ^ 0x80);
128 }
129 else if (*s < 0xfc)
130 {
131 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (s[0] >= 0xf9 || s[1] >= 0x88)))
132 return INVALID_CHAR;
133 return ((uchar_t)(s[0] & 0x03) << 24) | ((uchar_t)(s[1] ^ 0x80) << 18) | ((uchar_t)(s[2] ^ 0x80) << 12) | ((uchar_t)(s[3] ^ 0x80) << 6) | (uchar_t)(s[4] ^ 0x80);
134 }
135 else if (*s < 0xfe)
136 {
137 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (s[5] ^ 0x80) < 0x40 && (s[0] >= 0xfd || s[1] >= 0x84)))
138 return INVALID_CHAR;
139 return ((uchar_t)(s[0] & 0x01) << 30) | ((uchar_t)(s[1] ^ 0x80) << 24) | ((uchar_t)(s[2] ^ 0x80) << 18) | ((uchar_t)(s[3] ^ 0x80) << 12) | ((uchar_t)(s[4] ^ 0x80) << 6) | (uchar_t)(s[5] ^ 0x80);
140 }
141 else
142 return INVALID_CHAR;
143 }
144
IoLexer_nextChar(IoLexer * self)145 TEST_INLINE uchar_t IoLexer_nextChar(IoLexer *self)
146 {
147 unsigned char c = (unsigned char) * (self->current);
148 int seqlen, i;
149 uchar_t uch;
150
151 if (c == 0)
152 {
153 return 0;
154 }
155 else if (c < 0x80)
156 {
157 self->current++;
158 return c;
159 }
160
161 seqlen = UTF8_SEQLEN(c);
162
163 for (i = 0; i < seqlen; i++)
164 {
165 if (self->current[i] == 0)
166 {
167 // XXX: invalid or incomplete sequence
168 return 0;
169 }
170 }
171
172 uch = _IoLexer_DecodeUTF8((unsigned char*)self->current);
173
174 if (uch == INVALID_CHAR)
175 {
176 return 0;
177 }
178
179 self->current += seqlen;
180 return uch;
181 }
182
IoLexer_prevChar(IoLexer * self)183 TEST_INLINE uchar_t IoLexer_prevChar(IoLexer *self)
184 {
185 uchar_t uch;
186 int len;
187
188 for (len = 1; len <= 6 && self->current - len > self->s; len++)
189 {
190 unsigned char c = *(unsigned char *)(self->current - len);
191 if (c < 0x80 || c >= 0xc2)
192 break;
193 }
194
195 self->current -= len;
196 uch = _IoLexer_DecodeUTF8((unsigned char*)self->current);
197 if (uch == INVALID_CHAR)
198 return 0;
199
200 return uch;
201 }
202
IoLexer_current(IoLexer * self)203 TEST_INLINE char *IoLexer_current(IoLexer *self)
204 {
205 return self->current;
206 }
207
IoLexer_onNULL(IoLexer * self)208 TEST_INLINE int IoLexer_onNULL(IoLexer *self)
209 {
210 return (*(self->current) == 0);
211 }
212
213 // ------------------------------------------
214
IoLexer_currentLineNumberOld(IoLexer * self)215 size_t IoLexer_currentLineNumberOld(IoLexer *self)
216 {
217 size_t lineNumber = 1;
218 char *s = self->s;
219
220 while (s < self->current)
221 {
222 if (*s == '\n')
223 {
224 lineNumber ++;
225 }
226
227 s ++;
228 }
229
230 return lineNumber;
231 }
232
IoLexer_currentLineNumber(IoLexer * self)233 TEST_INLINE size_t IoLexer_currentLineNumber(IoLexer *self)
234 {
235 // this should be even faster than a binary search
236 // since almost all results are very close to the last
237
238 List *index = self->charLineIndex;
239 size_t line = self->lineHint;
240 size_t numLines = List_size(index);
241 void *current = (void *)self->current;
242
243 if (current < List_at_(index, line))
244 {
245 // walk down lines until char is bigger than one
246 while (line > 0 && !(current > List_at_(index, line)))
247 {
248 line --;
249 }
250 line ++;
251 }
252 else
253 {
254 // walk up lines until char is less than or equal to one
255 while (line < numLines && !(current <= List_at_(index, line)))
256 {
257 line ++;
258 }
259 }
260
261
262 self->lineHint = line;
263
264 /*
265 {
266 size_t realLine = IoLexer_currentLineNumberOld(self);
267
268 if (line != realLine)
269 {
270 printf("mismatch on currentLine %i != %i\n", (int)line, (int)realLine);
271 }
272 }
273 */
274 return line;
275 }
276
IoLexer_clear(IoLexer * self)277 void IoLexer_clear(IoLexer *self)
278 {
279 LIST_FOREACH(self->tokenStream, i, t, IoToken_free((IoToken *)t) );
280 List_removeAll(self->tokenStream);
281
282 Stack_clear(self->posStack);
283 Stack_clear(self->tokenStack);
284
285 self->current = self->s;
286 self->resultIndex = 0;
287 self->maxChar = 0;
288 self->errorToken = NULL;
289 }
290
IoLexer_errorToken(IoLexer * self)291 IoToken *IoLexer_errorToken(IoLexer *self)
292 {
293 return self->errorToken;
294 }
295
296 // lexing -------------------------------------
297
IoLexer_string_(IoLexer * self,const char * string)298 void IoLexer_string_(IoLexer *self, const char *string)
299 {
300 self->s = strcpy((char *)io_realloc(self->s, strlen(string) + 1), string);
301 self->current = self->s;
302 IoLexer_buildLineIndex(self);
303 }
304
IoLexer_printLast_(IoLexer * self,int max)305 void IoLexer_printLast_(IoLexer *self, int max)
306 {
307 char *s = self->s + self->maxChar;
308 int i;
309
310 for (i = 0; i < max && s[i]; i ++)
311 {
312 putchar(s[i]);
313 }
314 }
315
316 // --- token and character position stacks ---
317
IoLexer_lastPos(IoLexer * self)318 char *IoLexer_lastPos(IoLexer *self)
319 {
320 return Stack_top(self->posStack);
321 }
322
IoLexer_pushPos(IoLexer * self)323 TEST_INLINE void IoLexer_pushPos(IoLexer *self)
324 {
325 intptr_t index = self->current - self->s;
326
327 if (index > (intptr_t)self->maxChar)
328 {
329 self->maxChar = index;
330 }
331
332 Stack_push_(self->tokenStack, (void *)(intptr_t)(List_size(self->tokenStream) - 1));
333 Stack_push_(self->posStack, self->current);
334
335 #ifdef LEXER_DEBUG
336 printf("push: ");
337 IoLexer_print(self);
338 #endif
339 }
340
IoLexer_popPos(IoLexer * self)341 TEST_INLINE void IoLexer_popPos(IoLexer *self)
342 {
343 Stack_pop(self->tokenStack);
344 Stack_pop(self->posStack);
345 #ifdef LEXER_DEBUG
346 printf("pop: ");
347 IoLexer_print(self);
348 #endif
349 }
350
IoLexer_popPosBack(IoLexer * self)351 TEST_INLINE void IoLexer_popPosBack(IoLexer *self)
352 {
353 intptr_t i = (intptr_t)Stack_pop(self->tokenStack);
354 intptr_t topIndex = (intptr_t)Stack_top(self->tokenStack);
355
356 if (i > -1)
357 {
358 List_setSize_(self->tokenStream, i + 1);
359
360 if (i != topIndex) // ok to io_free token
361 {
362 IoToken *parent = IoLexer_currentToken(self);
363
364 if (parent)
365 {
366 IoToken_nextToken_(parent, NULL);
367 }
368 }
369 }
370
371 self->current = Stack_pop(self->posStack);
372 #ifdef LEXER_DEBUG
373 printf("back: "); IoLexer_print(self);
374 #endif
375 }
376
377 // ------------------------------------------
378
IoLexer_lex(IoLexer * self)379 int IoLexer_lex(IoLexer *self)
380 {
381 IoLexer_clear(self);
382 IoLexer_pushPos(self);
383
384 IoLexer_messageChain(self);
385
386 if (*(self->current))
387 {
388 //printf("Lexing error after: ");
389 //IoLexer_printLast_(self, 30);
390 //printf("\n");
391
392 if (!self->errorToken)
393 {
394 if (List_size(self->tokenStream))
395 {
396 self->errorToken = IoLexer_currentToken(self);
397 }
398 else
399 {
400 self->errorToken = IoLexer_addTokenString_length_type_(self, self->current, 30, NO_TOKEN);
401 }
402
403 IoToken_error_(self->errorToken, "Syntax error near this location");
404 }
405 return -1;
406 }
407 return 0;
408 }
409
410 // getting results --------------------------------
411
IoLexer_top(IoLexer * self)412 IoToken *IoLexer_top(IoLexer *self)
413 {
414 return List_at_(self->tokenStream, self->resultIndex);
415 }
416
IoLexer_topType(IoLexer * self)417 IoTokenType IoLexer_topType(IoLexer *self)
418 {
419 if (!IoLexer_top(self))
420 {
421 return 0;
422 }
423
424 return IoLexer_top(self)->type;
425 }
426
IoLexer_pop(IoLexer * self)427 IoToken *IoLexer_pop(IoLexer *self)
428 {
429 IoToken *t = IoLexer_top(self);
430 self->resultIndex ++;
431 return t;
432 }
433
434 // stack management --------------------------------
435
IoLexer_print(IoLexer * self)436 void IoLexer_print(IoLexer *self)
437 {
438 IoToken *first = List_first(self->tokenStream);
439
440 if (first)
441 {
442 IoToken_print(first);
443 }
444
445 printf("\n");
446 }
447
IoLexer_printTokens(IoLexer * self)448 void IoLexer_printTokens(IoLexer *self)
449 {
450 int i;
451
452 for (i = 0; i < List_size(self->tokenStream); i ++)
453 {
454 IoToken *t = List_at_(self->tokenStream, i);
455
456 printf("'%s'", t->name);
457 printf(" %s ", IoToken_typeName(t));
458
459 if (i < List_size(self->tokenStream) - 1)
460 {
461 printf(", ");
462 }
463 }
464
465 printf("\n");
466 }
467
468 // grabbing ---------------------------------------------
469
IoLexer_grabLength(IoLexer * self)470 int IoLexer_grabLength(IoLexer *self)
471 {
472 char *s1 = IoLexer_lastPos(self);
473 char *s2 = IoLexer_current(self);
474
475 return (int)(s2 - s1);
476 }
477
IoLexer_grabTokenType_(IoLexer * self,IoTokenType type)478 void IoLexer_grabTokenType_(IoLexer *self, IoTokenType type)
479 {
480 char *s1 = IoLexer_lastPos(self);
481 char *s2 = IoLexer_current(self);
482 size_t len = (s2 - s1);
483
484 if (!len)
485 {
486 printf("IoLexer fatal error: empty token\n");
487 exit(1);
488 }
489
490 IoLexer_addTokenString_length_type_(self, s1, len, type);
491 }
492
IoLexer_addTokenString_length_type_(IoLexer * self,const char * s1,size_t len,IoTokenType type)493 IoToken *IoLexer_addTokenString_length_type_(IoLexer *self, const char *s1, size_t len, IoTokenType type)
494 {
495 IoToken *top = IoLexer_currentToken(self);
496 IoToken *t = IoToken_new();
497
498 t->lineNumber = (int)IoLexer_currentLineNumber(self);
499 //t->charNumber = (int)(s1 - self->s);
500 t->charNumber = (int)(self->current - self->s);
501
502 if (t->charNumber < 0)
503 {
504 printf("bad t->charNumber = %i\n", t->charNumber);
505 }
506
507 IoToken_name_length_(t, s1, len);
508 IoToken_type_(t, type);
509
510 if (top)
511 {
512 IoToken_nextToken_(top, t);
513 }
514
515 List_push_(self->tokenStream, t);
516 #ifdef LEXER_DEBUG_TOKENS
517 printf("token '%s' %s\n", t->name, IoToken_typeName(t));
518 #endif
519
520 return t;
521 }
522
523 // reading ------------------------------------
524
IoLexer_messageChain(IoLexer * self)525 void IoLexer_messageChain(IoLexer *self)
526 {
527 do
528 {
529 while ( IoLexer_readTerminator(self) ||
530 IoLexer_readSeparator(self) ||
531 IoLexer_readComment(self))
532 {}
533 } while ( IoLexer_readMessage(self));
534 }
535
536 // message -------------------------------
537
IoLexer_readMessage_error(IoLexer * self,const char * name)538 static void IoLexer_readMessage_error(IoLexer *self, const char *name)
539 {
540 IoLexer_popPosBack(self);
541 self->errorToken = IoLexer_currentToken(self);
542 IoToken_error_(self->errorToken, name);
543 }
544
IoLexer_readTokenChars_type_(IoLexer * self,const char * chars,IoTokenType type)545 int IoLexer_readTokenChars_type_(IoLexer *self, const char *chars, IoTokenType type)
546 {
547 while (*chars)
548 {
549 if (IoLexer_readTokenChar_type_(self, *chars, type)) return 1;
550 chars ++;
551 }
552
553 return 0;
554 }
555
IoLexer_nameForGroupChar_(IoLexer * self,char groupChar)556 const char *IoLexer_nameForGroupChar_(IoLexer *self, char groupChar)
557 {
558 switch (groupChar)
559 {
560 case '(': return "";
561 case '[': return "squareBrackets";
562 case '{': return "curlyBrackets";
563 }
564
565 printf("IoLexer: fatal error - invalid group char %c\n", groupChar);
566 exit(1);
567 }
568
569 //static char *specialChars = ":._";
570 static char *specialChars = "._";
571
IoLexer_readMessage(IoLexer * self)572 int IoLexer_readMessage(IoLexer *self)
573 {
574 char foundSymbol;
575
576 IoLexer_pushPos(self);
577 IoLexer_readPadding(self);
578
579 foundSymbol = IoLexer_readSymbol(self);
580
581
582 {
583 char groupChar;
584 while (IoLexer_readSeparator(self) || IoLexer_readComment(self))
585 {}
586
587 groupChar = *IoLexer_current(self);
588
589 if (groupChar && (strchr("[{", groupChar) || (!foundSymbol && groupChar == '(')))
590 {
591 char *groupName = (char *)IoLexer_nameForGroupChar_(self, groupChar);
592 IoLexer_addTokenString_length_type_(self, groupName, strlen(groupName), IDENTIFIER_TOKEN);
593 }
594
595 if (IoLexer_readTokenChars_type_(self, "([{", OPENPAREN_TOKEN))
596 {
597 IoLexer_readPadding(self);
598 do {
599 IoTokenType type = IoLexer_currentToken(self)->type;
600
601 IoLexer_readPadding(self);
602 // Empty argument: (... ,)
603 if (COMMA_TOKEN == type)
604 {
605 char c = *IoLexer_current(self);
606
607 if (',' == c || strchr(")]}", c))
608 {
609 IoLexer_readMessage_error(self, "missing argument in argument list");
610 return 0;
611 }
612 }
613
614 //if (groupChar == '[') specialChars = "._";
615 IoLexer_messageChain(self);
616 //if (groupChar == '[') specialChars = ":._";
617 IoLexer_readPadding(self);
618
619 } while (IoLexer_readTokenChar_type_(self, ',', COMMA_TOKEN));
620
621 if (!IoLexer_readTokenChars_type_(self, ")]}", CLOSEPAREN_TOKEN))
622 {
623 /*
624 char c = *IoLexer_current(self);
625
626 if (strchr("([{", c))
627 {
628 IoLexer_readMessage_error(self, "expected a message but instead found a open group character");
629 }
630 else
631 {
632 IoLexer_readMessage_error(self, "missing closing group character for argument list");
633 }
634 */
635 if (groupChar == '(')
636 {
637 IoLexer_readMessage_error(self, "unmatched ()s");
638 }
639 else if (groupChar == '[')
640 {
641 IoLexer_readMessage_error(self, "unmatched []s");
642 }
643 else if (groupChar == '{')
644 {
645 IoLexer_readMessage_error(self, "unmatched {}s");
646 }
647 //printf("Token %p error: %s - %s\n", t, t->error, IoToken_error(t));
648 return 0;
649 }
650
651 IoLexer_popPos(self);
652 return 1;
653 }
654
655 if (foundSymbol)
656 {
657 IoLexer_popPos(self);
658 return 1;
659 }
660 }
661 IoLexer_popPosBack(self);
662 return 0;
663 }
664
IoLexer_readPadding(IoLexer * self)665 int IoLexer_readPadding(IoLexer *self)
666 {
667 int r = 0;
668
669 while (IoLexer_readWhitespace(self) || IoLexer_readComment(self))
670 {
671 r = 1;
672 }
673
674 return r;
675 }
676
677 // symbols ------------------------------------------
678
IoLexer_readSymbol(IoLexer * self)679 int IoLexer_readSymbol(IoLexer *self)
680 {
681 if (IoLexer_readNumber(self) ||
682 IoLexer_readOperator(self) ||
683 IoLexer_readIdentifier(self) ||
684 IoLexer_readQuote(self)) return 1;
685 return 0;
686 }
687
IoLexer_readIdentifier(IoLexer * self)688 int IoLexer_readIdentifier(IoLexer *self)
689 {
690 IoLexer_pushPos(self);
691
692 while ( IoLexer_readLetter(self) ||
693 IoLexer_readDigit(self) ||
694 IoLexer_readSpecialChar(self))
695 {}
696
697 if (IoLexer_grabLength(self))
698 {
699 // avoid grabing : on last character if followed by =
700
701 /*
702 char *current = IoLexer_current(self);
703
704 if (*(current - 1) == ':' && *current == '=')
705 {
706 IoLexer_prevChar(self);
707 }
708 */
709
710
711 IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
712 IoLexer_popPos(self);
713 return 1;
714 }
715
716 IoLexer_popPosBack(self);
717
718 return 0;
719 }
720
IoLexer_readOperator(IoLexer * self)721 int IoLexer_readOperator(IoLexer *self)
722 {
723 uchar_t c;
724 IoLexer_pushPos(self);
725 // ok if first character is a colon
726 c = IoLexer_nextChar(self);
727 //printf("IoLexer_nextChar(self) = %c %i\n", c, c);
728
729 if (c == 0)
730 {
731 IoLexer_popPosBack(self);
732 return 0;
733 }
734 else
735 {
736 IoLexer_prevChar(self);
737 }
738 /*
739 if (c != ':')
740 {
741 IoLexer_prevChar(self);
742 }
743 */
744
745 while (IoLexer_readOpChar(self))
746 { }
747
748 if (IoLexer_grabLength(self))
749 {
750 IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
751 IoLexer_popPos(self);
752 return 1;
753 }
754
755 IoLexer_popPosBack(self);
756 return 0;
757 }
758
759 // comments ------------------------------------------
760
IoLexer_readComment(IoLexer * self)761 int IoLexer_readComment(IoLexer *self)
762 {
763 return (IoLexer_readSlashStarComment(self) ||
764 IoLexer_readSlashSlashComment(self) ||
765 IoLexer_readPoundComment(self));
766 }
767
IoLexer_readSlashStarComment(IoLexer * self)768 int IoLexer_readSlashStarComment(IoLexer *self)
769 {
770 IoLexer_pushPos(self);
771
772 if (IoLexer_readString_(self, "/*"))
773 {
774 unsigned int nesting = 1;
775
776 while (nesting > 0)
777 {
778 if (IoLexer_readString_(self, "/*"))
779 {
780 IoLexer_nextChar(self);
781 nesting++;
782 }
783 else if (IoLexer_readString_(self, "*/"))
784 {
785 // otherwise we end up trimming the last char
786 if (nesting > 1) IoLexer_nextChar(self);
787 nesting--;
788 }
789 else
790 {
791 uchar_t c = IoLexer_nextChar(self);
792 if(c == 0)
793 {
794 self->errorToken = IoLexer_currentToken(self);
795
796 if (!self->errorToken)
797 {
798 IoLexer_grabTokenType_(self, NO_TOKEN);
799 self->errorToken = IoLexer_currentToken(self);
800 }
801
802 if (self->errorToken)
803 {
804 IoToken_error_(self->errorToken, "unterminated comment");
805 }
806
807 IoLexer_popPosBack(self);
808 return 0;
809 }
810 }
811 }
812 IoLexer_popPos(self);
813 return 1;
814 }
815
816 IoLexer_popPosBack(self);
817 return 0;
818 }
819
IoLexer_readSlashSlashComment(IoLexer * self)820 int IoLexer_readSlashSlashComment(IoLexer *self)
821 {
822 IoLexer_pushPos(self);
823
824 if (IoLexer_nextChar(self) == '/')
825 {
826 if (IoLexer_nextChar(self) == '/')
827 {
828 while (IoLexer_readNonReturn(self)) { }
829 //IoLexer_grabTokenType_(self, COMMENT_TOKEN);
830 IoLexer_popPos(self);
831 return 1;
832 }
833 }
834
835 IoLexer_popPosBack(self);
836 return 0;
837 }
838
IoLexer_readPoundComment(IoLexer * self)839 int IoLexer_readPoundComment(IoLexer *self)
840 {
841 IoLexer_pushPos(self);
842
843 if (IoLexer_nextChar(self) == '#')
844 {
845 while (IoLexer_readNonReturn(self))
846 {
847 }
848 //IoLexer_grabTokenType_(self, COMMENT_TOKEN);
849 IoLexer_popPos(self);
850 return 1;
851 }
852
853 IoLexer_popPosBack(self);
854 return 0;
855 }
856
857 // quotes -----------------------------------------
858
IoLexer_readQuote(IoLexer * self)859 int IoLexer_readQuote(IoLexer *self)
860 {
861 return (IoLexer_readTriQuote(self) || IoLexer_readMonoQuote(self));
862 }
863
IoLexer_readMonoQuote(IoLexer * self)864 int IoLexer_readMonoQuote(IoLexer *self)
865 {
866 IoLexer_pushPos(self);
867
868 if (IoLexer_nextChar(self) == '"')
869 {
870 for (;;)
871 {
872 uchar_t c = IoLexer_nextChar(self);
873
874 if (c == '"')
875 {
876 break;
877 }
878
879 if (c == '\\')
880 {
881 IoLexer_nextChar(self);
882 continue;
883 }
884
885 if (c == 0)
886 {
887 self->errorToken = IoLexer_currentToken(self);
888
889 if (self->errorToken)
890 {
891 IoToken_error_(self->errorToken, "unterminated quote");
892 }
893
894 IoLexer_popPosBack(self);
895 return 0;
896 }
897 }
898
899 IoLexer_grabTokenType_(self, MONOQUOTE_TOKEN);
900 IoLexer_popPos(self);
901 return 1;
902 }
903
904 IoLexer_popPosBack(self);
905 return 0;
906 }
907
IoLexer_readTriQuote(IoLexer * self)908 int IoLexer_readTriQuote(IoLexer *self)
909 {
910 IoLexer_pushPos(self);
911
912 if (IoLexer_readString_(self, "\"\"\""))
913 {
914 while (!IoLexer_readString_(self, "\"\"\""))
915 {
916 uchar_t c = IoLexer_nextChar(self);
917
918 if (c == 0)
919 {
920 IoLexer_popPosBack(self);
921 return 0;
922 }
923 }
924
925 IoLexer_grabTokenType_(self, TRIQUOTE_TOKEN);
926 IoLexer_popPos(self);
927 return 1;
928 }
929
930 IoLexer_popPosBack(self);
931 return 0;
932 }
933
934 // helpers ----------------------------
935
IoLexer_readTokenChar_type_(IoLexer * self,char c,IoTokenType type)936 int IoLexer_readTokenChar_type_(IoLexer *self, char c, IoTokenType type)
937 {
938 IoLexer_pushPos(self);
939
940 if (IoLexer_readChar_(self, c))
941 {
942 IoLexer_grabTokenType_(self, type);
943 IoLexer_popPos(self);
944 return 1;
945 }
946
947 IoLexer_popPosBack(self);
948 return 0;
949 }
950
IoLexer_readTokenString_(IoLexer * self,const char * s)951 int IoLexer_readTokenString_(IoLexer *self, const char *s)
952 {
953 IoLexer_pushPos(self);
954
955 if (IoLexer_readString_(self, s))
956 {
957 IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
958 IoLexer_popPos(self);
959 return 1;
960 }
961
962 IoLexer_popPosBack(self);
963 return 0;
964 }
965
966
IoLexer_readString_(IoLexer * self,const char * s)967 int IoLexer_readString_(IoLexer *self, const char *s)
968 {
969 size_t len = strlen(s);
970
971 if (IoLexer_onNULL(self))
972 {
973 return 0;
974 }
975
976 if (strncmp(self->current, s, len) == 0)
977 {
978 self->current += len;
979 return 1;
980 }
981
982 return 0;
983 }
984
IoLexer_readCharIn_(IoLexer * self,const char * s)985 TEST_INLINE int IoLexer_readCharIn_(IoLexer *self, const char *s)
986 {
987 if (!IoLexer_onNULL(self))
988 {
989 uchar_t c = IoLexer_nextChar(self);
990
991 if (c < 0x80 && strchr(s, c))
992 {
993 return 1;
994 }
995
996 IoLexer_prevChar(self);
997 }
998 return 0;
999 }
1000
IoLexer_readCharInRange_(IoLexer * self,uchar_t first,uchar_t last)1001 TEST_INLINE int IoLexer_readCharInRange_(IoLexer *self, uchar_t first, uchar_t last)
1002 {
1003 if (!IoLexer_onNULL(self))
1004 {
1005 uchar_t c = IoLexer_nextChar(self);
1006
1007 if (c >= first && c <= last)
1008 {
1009 return 1;
1010 }
1011
1012 IoLexer_prevChar(self);
1013 }
1014 return 0;
1015 }
1016
IoLexer_readChar_(IoLexer * self,char c)1017 int IoLexer_readChar_(IoLexer *self, char c)
1018 {
1019 if (!IoLexer_onNULL(self))
1020 {
1021 uchar_t nc = IoLexer_nextChar(self);
1022
1023 if (nc && nc == c)
1024 {
1025 return 1;
1026 }
1027
1028 IoLexer_prevChar(self);
1029 }
1030 return 0;
1031 }
1032
IoLexer_readCharAnyCase_(IoLexer * self,char c)1033 int IoLexer_readCharAnyCase_(IoLexer *self, char c)
1034 {
1035 if (!IoLexer_onNULL(self))
1036 {
1037 uchar_t nc = IoLexer_nextChar(self);
1038
1039 if (nc && tolower(nc) == tolower(c))
1040 {
1041 return 1;
1042 }
1043
1044 IoLexer_prevChar(self);
1045 }
1046 return 0;
1047 }
1048
IoLexer_readNonASCIIChar_(IoLexer * self)1049 int IoLexer_readNonASCIIChar_(IoLexer *self)
1050 {
1051 if (!IoLexer_onNULL(self))
1052 {
1053 uchar_t nc = IoLexer_nextChar(self);
1054
1055 if (nc >= 0x80)
1056 return 1;
1057
1058 IoLexer_prevChar(self);
1059 }
1060 return 0;
1061 }
1062
IoLexer_readNonReturn(IoLexer * self)1063 int IoLexer_readNonReturn(IoLexer *self)
1064 {
1065 if (IoLexer_onNULL(self)) return 0;
1066 if (IoLexer_nextChar(self) != '\n') return 1;
1067 IoLexer_prevChar(self);
1068 return 0;
1069 }
1070
IoLexer_readNonQuote(IoLexer * self)1071 int IoLexer_readNonQuote(IoLexer *self)
1072 {
1073 if (IoLexer_onNULL(self)) return 0;
1074 if (IoLexer_nextChar(self) != '"') return 1;
1075 IoLexer_prevChar(self);
1076 return 0;
1077 }
1078
1079 // character definitions ----------------------------
1080
IoLexer_readCharacters(IoLexer * self)1081 int IoLexer_readCharacters(IoLexer *self)
1082 {
1083 int read = 0;
1084
1085 while (IoLexer_readCharacter(self))
1086 {
1087 read = 1;
1088 }
1089
1090 return read;
1091 }
1092
IoLexer_readCharacter(IoLexer * self)1093 int IoLexer_readCharacter(IoLexer *self)
1094 {
1095 return (
1096 IoLexer_readLetter(self) ||
1097 IoLexer_readDigit(self) ||
1098 IoLexer_readSpecialChar(self) ||
1099 IoLexer_readOpChar(self)
1100 );
1101 }
1102
IoLexer_readOpChar(IoLexer * self)1103 int IoLexer_readOpChar(IoLexer *self)
1104 {
1105 //return IoLexer_readCharIn_(self, ":'~!@$%^&*-+=|\\<>?/");
1106 return IoLexer_readCharIn_(self, ":'~!@$%^&*-+=|\\<>?/");
1107 }
1108
IoLexer_readSpecialChar(IoLexer * self)1109 int IoLexer_readSpecialChar(IoLexer *self)
1110 {
1111 return IoLexer_readCharIn_(self, specialChars);
1112 }
1113
IoLexer_readDigit(IoLexer * self)1114 int IoLexer_readDigit(IoLexer *self)
1115 {
1116 return IoLexer_readCharInRange_(self, '0', '9');
1117 }
1118
IoLexer_readLetter(IoLexer * self)1119 int IoLexer_readLetter(IoLexer *self)
1120 {
1121 return IoLexer_readCharInRange_(self, 'A', 'Z') ||
1122 IoLexer_readCharInRange_(self, 'a', 'z') ||
1123 IoLexer_readCharIn_(self, ":") ||
1124 IoLexer_readNonASCIIChar_(self);
1125 }
1126
1127 // terminator -------------------------------
1128
IoLexer_readTerminator(IoLexer * self)1129 int IoLexer_readTerminator(IoLexer *self)
1130 {
1131 int terminated = 0;
1132 IoLexer_pushPos(self);
1133 IoLexer_readSeparator(self);
1134
1135 while (IoLexer_readTerminatorChar(self))
1136 {
1137 terminated = 1;
1138 IoLexer_readSeparator(self);
1139 }
1140
1141 if (terminated)
1142 {
1143 IoToken *top = IoLexer_currentToken(self);
1144
1145 // avoid double terminators
1146 if (top && IoToken_type(top) == TERMINATOR_TOKEN)
1147 {
1148 return 1;
1149 }
1150
1151 IoLexer_addTokenString_length_type_(self, ";", 1, TERMINATOR_TOKEN);
1152 IoLexer_popPos(self);
1153 return 1;
1154 }
1155
1156 IoLexer_popPosBack(self);
1157 return 0;
1158 }
1159
IoLexer_readTerminatorChar(IoLexer * self)1160 int IoLexer_readTerminatorChar(IoLexer *self)
1161 {
1162 return IoLexer_readCharIn_(self, ";\n");
1163 }
1164
1165 // separator --------------------------------
1166
IoLexer_readSeparator(IoLexer * self)1167 int IoLexer_readSeparator(IoLexer *self)
1168 {
1169 IoLexer_pushPos(self);
1170
1171 while (IoLexer_readSeparatorChar(self))
1172 {
1173 }
1174
1175 if (IoLexer_grabLength(self))
1176 {
1177 //IoLexer_grabTokenType_(self, SEPERATOR_TOKEN);
1178 IoLexer_popPos(self);
1179 return 1;
1180 }
1181
1182 IoLexer_popPosBack(self);
1183 return 0;
1184 }
1185
IoLexer_readSeparatorChar(IoLexer * self)1186 int IoLexer_readSeparatorChar(IoLexer *self)
1187 {
1188 if (IoLexer_readCharIn_(self, " \f\r\t\v"))
1189 {
1190 return 1;
1191 }
1192 else
1193 {
1194 IoLexer_pushPos(self);
1195 if (IoLexer_readCharIn_(self, "\\"))
1196 {
1197 while (IoLexer_readCharIn_(self, " \f\r\t\v"))
1198 {
1199 }
1200
1201 if (IoLexer_readCharIn_(self, "\n"))
1202 {
1203 IoLexer_popPos(self);
1204 return 1;
1205 }
1206 }
1207 IoLexer_popPosBack(self);
1208 return 0;
1209 }
1210 }
1211
1212 // whitespace -----------------------------------
1213
IoLexer_readWhitespace(IoLexer * self)1214 int IoLexer_readWhitespace(IoLexer *self)
1215 {
1216 IoLexer_pushPos(self);
1217
1218 while (IoLexer_readWhitespaceChar(self))
1219 {
1220 }
1221
1222 if (IoLexer_grabLength(self))
1223 {
1224 //IoLexer_grabTokenType_(self, WHITESPACE_TOKEN);
1225 IoLexer_popPos(self);
1226 return 1;
1227 }
1228
1229 IoLexer_popPosBack(self);
1230 return 0;
1231 }
1232
IoLexer_readWhitespaceChar(IoLexer * self)1233 int IoLexer_readWhitespaceChar(IoLexer *self)
1234 {
1235 return IoLexer_readCharIn_(self, " \f\r\t\v\n");
1236 }
1237
IoLexer_readDigits(IoLexer * self)1238 int IoLexer_readDigits(IoLexer *self)
1239 {
1240 int read = 0;
1241
1242 IoLexer_pushPos(self);
1243
1244 while (IoLexer_readDigit(self))
1245 {
1246 read = 1;
1247 }
1248
1249 if (!read)
1250 {
1251 IoLexer_popPosBack(self);
1252 return 0;
1253 }
1254
1255 IoLexer_popPos(self);
1256 return read;
1257 }
1258
IoLexer_readNumber(IoLexer * self)1259 int IoLexer_readNumber(IoLexer *self)
1260 {
1261 return (IoLexer_readHexNumber(self) || IoLexer_readDecimal(self));
1262 }
1263
IoLexer_readExponent(IoLexer * self)1264 int IoLexer_readExponent(IoLexer *self)
1265 {
1266 if (IoLexer_readCharAnyCase_(self, 'e'))
1267 {
1268 if (!IoLexer_readChar_(self, '-'))
1269 {
1270 IoLexer_readChar_(self, '+');
1271 }
1272
1273 if (!IoLexer_readDigits(self))
1274 {
1275 return -1;
1276 }
1277
1278 return 1;
1279 }
1280 return 0;
1281 }
1282
IoLexer_readDecimalPlaces(IoLexer * self)1283 int IoLexer_readDecimalPlaces(IoLexer *self)
1284 {
1285 if (IoLexer_readChar_(self, '.'))
1286 {
1287 if (!IoLexer_readDigits(self))
1288 {
1289 return -1;
1290 }
1291
1292 return 1;
1293 }
1294 return 0;
1295 }
1296
IoLexer_readDecimal(IoLexer * self)1297 int IoLexer_readDecimal(IoLexer *self)
1298 {
1299 IoLexer_pushPos(self);
1300
1301 if (IoLexer_readDigits(self))
1302 {
1303 if (IoLexer_readDecimalPlaces(self) == -1)
1304 {
1305 goto error;
1306 }
1307 }
1308 else
1309 {
1310 if (IoLexer_readDecimalPlaces(self) != 1)
1311 {
1312 goto error;
1313 }
1314 }
1315
1316 if (IoLexer_readExponent(self) == -1)
1317 {
1318 goto error;
1319 }
1320
1321 if (IoLexer_grabLength(self))
1322 {
1323 IoLexer_grabTokenType_(self, NUMBER_TOKEN);
1324 IoLexer_popPos(self);
1325 return 1;
1326 }
1327 error:
1328 IoLexer_popPosBack(self);
1329 return 0;
1330 }
1331
IoLexer_readHexNumber(IoLexer * self)1332 int IoLexer_readHexNumber(IoLexer *self)
1333 {
1334 int read = 0;
1335
1336 IoLexer_pushPos(self);
1337
1338 if (IoLexer_readChar_(self, '0') && IoLexer_readCharAnyCase_(self, 'x'))
1339 {
1340 while (IoLexer_readDigits(self) || IoLexer_readCharacters(self))
1341 {
1342 read ++;
1343 }
1344 }
1345
1346 if (read && IoLexer_grabLength(self))
1347 {
1348 IoLexer_grabTokenType_(self, HEXNUMBER_TOKEN);
1349 IoLexer_popPos(self);
1350 return 1;
1351 }
1352
1353 IoLexer_popPosBack(self);
1354 return 0;
1355 }
1356