1 /*
2 Copyright (c) 2003, Steve Dekorte
3 All rights reserved. See _BSDLicense.txt.
4 
5 Aug 2004 - removed {} from op chars
6 - changed identifier to stop after 1 colon
7 */
8 
9 #include "IoLexer.h"
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <stddef.h>
14 
15 //#define LEXER_DEBUG
16 //#define LEXER_DEBUG_TOKENS
17 //#define TEST_INLINE inline
18 #define TEST_INLINE
19 
IoLexer_currentToken(IoLexer * self)20 static IoToken *IoLexer_currentToken(IoLexer *self)
21 {
22 	return List_top(self->tokenStream);
23 }
24 
IoLexer_new(void)25 IoLexer *IoLexer_new(void)
26 {
27 	IoLexer *self = (IoLexer *)io_calloc(1, sizeof(IoLexer));
28 	self->s = (char *)io_calloc(1, 1);
29 	self->s[0] = 0;
30 	self->posStack      = Stack_new();
31 	self->tokenStack    = Stack_new();
32 	self->tokenStream   = List_new();
33 	self->charLineIndex = List_new();
34 	return self;
35 }
36 
IoLexer_free(IoLexer * self)37 void IoLexer_free(IoLexer *self)
38 {
39 	IoLexer_clear(self);
40 	io_free(self->s);
41 	Stack_free(self->posStack);
42 	Stack_free(self->tokenStack);
43 	List_free(self->tokenStream);
44 	List_free(self->charLineIndex);
45 	if(self->errorDescription) io_free(self->errorDescription);
46 	io_free(self);
47 }
48 
IoLexer_errorDescription(IoLexer * self)49 char *IoLexer_errorDescription(IoLexer *self)
50 {
51 	IoToken *et = IoLexer_errorToken(self);
52 
53 	if (!self->errorDescription)
54 	{
55 		self->errorDescription = io_calloc(1, 1024);
56 		self->errorDescription[0] = 0;
57 	}
58 
59 	if (et)
60 	{
61 		sprintf(self->errorDescription,
62 			"\"%s\" on line %i character %i",
63 			et->error,
64 			IoToken_lineNumber(et),
65 			IoToken_charNumber(et));
66 	}
67 
68 	return self->errorDescription;
69 }
70 
71 
IoLexer_buildLineIndex(IoLexer * self)72 void IoLexer_buildLineIndex(IoLexer *self)
73 {
74 	char *s = self->s;
75 
76 	List_removeAll(self->charLineIndex);
77 
78 	List_append_(self->charLineIndex, s);
79 
80 	while (*s)
81 	{
82 		if (*s == '\n')
83 		{
84 			List_append_(self->charLineIndex, s);
85 		}
86 		s ++;
87 	}
88 
89 	List_append_(self->charLineIndex, s);
90 	self->lineHint = 0;
91 }
92 
93 // next/prev character ------------------------
94 
95 #define UTF8_SEQLEN(c) (    \
96 	(c) < 0x80 ? 1 :        \
97 	(c) < 0xe0 ? 2 :        \
98 	(c) < 0xf0 ? 3 :        \
99 	(c) < 0xf8 ? 4 :        \
100 	(c) < 0xfc ? 5 :        \
101 	(c) < 0xfe ? 6 : 1      \
102 	)
103 #define INVALID_CHAR 0xfffe
104 
_IoLexer_DecodeUTF8(const unsigned char * s)105 static uchar_t _IoLexer_DecodeUTF8(const unsigned char *s)
106 {
107 	if (*s < 0x80)
108 		return *s;
109 	else if (*s < 0xc2)
110 		return INVALID_CHAR;
111 	else if (*s < 0xe0)
112 	{
113 		if (!((s[1] ^ 0x80) < 0x40))
114 			return INVALID_CHAR;
115 		return ((uchar_t)(s[0] & 0x1f) << 6) | (uchar_t)(s[1] ^ 0x80);
116 	}
117 	else if (*s < 0xf0)
118 	{
119 		if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[0] >= 0xe1 || s[1] >= 0xa0)))
120 			return INVALID_CHAR;
121 		return ((uchar_t)(s[0] & 0x0f) << 12) | ((uchar_t)(s[1] ^ 0x80) << 6) | (uchar_t)(s[2] ^ 0x80);
122 	}
123 	else if (*s < 0xf8)
124 	{
125 		if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[0] >= 0xf1 || s[1] >= 0x90)))
126 			return INVALID_CHAR;
127 		return ((uchar_t)(s[0] & 0x07) << 18) | ((uchar_t)(s[1] ^ 0x80) << 12) | ((uchar_t)(s[2] ^ 0x80) << 6) | (uchar_t)(s[3] ^ 0x80);
128 	}
129 	else if (*s < 0xfc)
130 	{
131 		if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (s[0] >= 0xf9 || s[1] >= 0x88)))
132 			return INVALID_CHAR;
133 		return ((uchar_t)(s[0] & 0x03) << 24) | ((uchar_t)(s[1] ^ 0x80) << 18) | ((uchar_t)(s[2] ^ 0x80) << 12) | ((uchar_t)(s[3] ^ 0x80) << 6) | (uchar_t)(s[4] ^ 0x80);
134 	}
135 	else if (*s < 0xfe)
136 	{
137 		if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (s[5] ^ 0x80) < 0x40 && (s[0] >= 0xfd || s[1] >= 0x84)))
138 			return INVALID_CHAR;
139 		return ((uchar_t)(s[0] & 0x01) << 30) | ((uchar_t)(s[1] ^ 0x80) << 24) | ((uchar_t)(s[2] ^ 0x80) << 18) | ((uchar_t)(s[3] ^ 0x80) << 12) | ((uchar_t)(s[4] ^ 0x80) << 6) | (uchar_t)(s[5] ^ 0x80);
140 	}
141 	else
142 		return INVALID_CHAR;
143 }
144 
IoLexer_nextChar(IoLexer * self)145 TEST_INLINE uchar_t IoLexer_nextChar(IoLexer *self)
146 {
147 	unsigned char c = (unsigned char) * (self->current);
148 	int seqlen, i;
149 	uchar_t uch;
150 
151 	if (c == 0)
152 	{
153 		return 0;
154 	}
155 	else if (c < 0x80)
156 	{
157 		self->current++;
158 		return c;
159 	}
160 
161 	seqlen = UTF8_SEQLEN(c);
162 
163 	for (i = 0; i < seqlen; i++)
164 	{
165 		if (self->current[i] == 0)
166 		{
167 			// XXX: invalid or incomplete sequence
168 			return 0;
169 		}
170 	}
171 
172 	uch = _IoLexer_DecodeUTF8((unsigned char*)self->current);
173 
174 	if (uch == INVALID_CHAR)
175 	{
176 		return 0;
177 	}
178 
179 	self->current += seqlen;
180 	return uch;
181 }
182 
IoLexer_prevChar(IoLexer * self)183 TEST_INLINE uchar_t IoLexer_prevChar(IoLexer *self)
184 {
185 	uchar_t uch;
186 	int len;
187 
188 	for (len = 1; len <= 6 && self->current - len > self->s; len++)
189 	{
190 		unsigned char c = *(unsigned char *)(self->current - len);
191 		if (c < 0x80 || c >= 0xc2)
192 			break;
193 	}
194 
195 	self->current -= len;
196 	uch = _IoLexer_DecodeUTF8((unsigned char*)self->current);
197 	if (uch == INVALID_CHAR)
198 		return 0;
199 
200 	return uch;
201 }
202 
IoLexer_current(IoLexer * self)203 TEST_INLINE char *IoLexer_current(IoLexer *self)
204 {
205 	return self->current;
206 }
207 
IoLexer_onNULL(IoLexer * self)208 TEST_INLINE int IoLexer_onNULL(IoLexer *self)
209 {
210 	return (*(self->current) == 0);
211 }
212 
213 // ------------------------------------------
214 
IoLexer_currentLineNumberOld(IoLexer * self)215 size_t IoLexer_currentLineNumberOld(IoLexer *self)
216 {
217 	size_t lineNumber = 1;
218 	char *s = self->s;
219 
220 	while (s < self->current)
221 	{
222 		if (*s == '\n')
223 		{
224 			lineNumber ++;
225 		}
226 
227 		s ++;
228 	}
229 
230 	return lineNumber;
231 }
232 
IoLexer_currentLineNumber(IoLexer * self)233 TEST_INLINE size_t IoLexer_currentLineNumber(IoLexer *self)
234 {
235 	// this should be even faster than a binary search
236 	// since almost all results are very close to the last
237 
238 	List *index = self->charLineIndex;
239 	size_t line = self->lineHint;
240 	size_t numLines = List_size(index);
241 	void *current = (void *)self->current;
242 
243 	if (current < List_at_(index, line))
244 	{
245 		// walk down lines until char is bigger than one
246 		while (line > 0 && !(current > List_at_(index, line)))
247 		{
248 			line --;
249 		}
250 		line ++;
251 	}
252 	else
253 	{
254 		// walk up lines until char is less than or equal to one
255 		while (line < numLines && !(current <= List_at_(index, line)))
256 		{
257 			line ++;
258 		}
259 	}
260 
261 
262 	self->lineHint = line;
263 
264 	/*
265 	{
266 		size_t realLine = IoLexer_currentLineNumberOld(self);
267 
268 		if (line != realLine)
269 		{
270 			printf("mismatch on currentLine %i != %i\n", (int)line, (int)realLine);
271 		}
272 	}
273 	*/
274 	return line;
275 }
276 
IoLexer_clear(IoLexer * self)277 void IoLexer_clear(IoLexer *self)
278 {
279 	LIST_FOREACH(self->tokenStream, i, t, IoToken_free((IoToken *)t) );
280 	List_removeAll(self->tokenStream);
281 
282 	Stack_clear(self->posStack);
283 	Stack_clear(self->tokenStack);
284 
285 	self->current = self->s;
286 	self->resultIndex = 0;
287 	self->maxChar = 0;
288 	self->errorToken = NULL;
289 }
290 
IoLexer_errorToken(IoLexer * self)291 IoToken *IoLexer_errorToken(IoLexer *self)
292 {
293 	return self->errorToken;
294 }
295 
296 // lexing -------------------------------------
297 
IoLexer_string_(IoLexer * self,const char * string)298 void IoLexer_string_(IoLexer *self, const char *string)
299 {
300 	self->s = strcpy((char *)io_realloc(self->s, strlen(string) + 1), string);
301 	self->current = self->s;
302 	IoLexer_buildLineIndex(self);
303 }
304 
IoLexer_printLast_(IoLexer * self,int max)305 void IoLexer_printLast_(IoLexer *self, int max)
306 {
307 	char *s = self->s + self->maxChar;
308 	int i;
309 
310 	for (i = 0; i < max && s[i]; i ++)
311 	{
312 		putchar(s[i]);
313 	}
314 }
315 
316 // --- token and character position stacks ---
317 
IoLexer_lastPos(IoLexer * self)318 char *IoLexer_lastPos(IoLexer *self)
319 {
320 	return Stack_top(self->posStack);
321 }
322 
IoLexer_pushPos(IoLexer * self)323 TEST_INLINE void IoLexer_pushPos(IoLexer *self)
324 {
325 	intptr_t index = self->current - self->s;
326 
327 	if (index > (intptr_t)self->maxChar)
328 	{
329 		self->maxChar = index;
330 	}
331 
332 	Stack_push_(self->tokenStack, (void *)(intptr_t)(List_size(self->tokenStream) - 1));
333 	Stack_push_(self->posStack, self->current);
334 
335 #ifdef LEXER_DEBUG
336 	printf("push: ");
337 	IoLexer_print(self);
338 #endif
339 }
340 
IoLexer_popPos(IoLexer * self)341 TEST_INLINE void IoLexer_popPos(IoLexer *self)
342 {
343 	Stack_pop(self->tokenStack);
344 	Stack_pop(self->posStack);
345 #ifdef LEXER_DEBUG
346 	printf("pop:	");
347 	IoLexer_print(self);
348 #endif
349 }
350 
IoLexer_popPosBack(IoLexer * self)351 TEST_INLINE void IoLexer_popPosBack(IoLexer *self)
352 {
353 	intptr_t i = (intptr_t)Stack_pop(self->tokenStack);
354 	intptr_t topIndex = (intptr_t)Stack_top(self->tokenStack);
355 
356 	if (i > -1)
357 	{
358 		List_setSize_(self->tokenStream, i + 1);
359 
360 		if (i != topIndex) // ok to io_free token
361 		{
362 			IoToken *parent = IoLexer_currentToken(self);
363 
364 			if (parent)
365 			{
366 				IoToken_nextToken_(parent, NULL);
367 			}
368 		}
369 	}
370 
371 	self->current = Stack_pop(self->posStack);
372 #ifdef LEXER_DEBUG
373 	printf("back: "); IoLexer_print(self);
374 #endif
375 }
376 
377 // ------------------------------------------
378 
IoLexer_lex(IoLexer * self)379 int IoLexer_lex(IoLexer *self)
380 {
381 	IoLexer_clear(self);
382 	IoLexer_pushPos(self);
383 
384 	IoLexer_messageChain(self);
385 
386 	if (*(self->current))
387 	{
388 		//printf("Lexing error after: ");
389 		//IoLexer_printLast_(self, 30);
390 		//printf("\n");
391 
392 		if (!self->errorToken)
393 		{
394 			if (List_size(self->tokenStream))
395 			{
396 				self->errorToken = IoLexer_currentToken(self);
397 			}
398 			else
399 			{
400 				self->errorToken = IoLexer_addTokenString_length_type_(self, self->current, 30, NO_TOKEN);
401 			}
402 
403 			IoToken_error_(self->errorToken, "Syntax error near this location");
404 		}
405 		return -1;
406 	}
407 	return 0;
408 }
409 
410 // getting results --------------------------------
411 
IoLexer_top(IoLexer * self)412 IoToken *IoLexer_top(IoLexer *self)
413 {
414 	return List_at_(self->tokenStream, self->resultIndex);
415 }
416 
IoLexer_topType(IoLexer * self)417 IoTokenType IoLexer_topType(IoLexer *self)
418 {
419 	if (!IoLexer_top(self))
420 	{
421 		return 0;
422 	}
423 
424 	return IoLexer_top(self)->type;
425 }
426 
IoLexer_pop(IoLexer * self)427 IoToken *IoLexer_pop(IoLexer *self)
428 {
429 	IoToken *t = IoLexer_top(self);
430 	self->resultIndex ++;
431 	return t;
432 }
433 
434 // stack management --------------------------------
435 
IoLexer_print(IoLexer * self)436 void IoLexer_print(IoLexer *self)
437 {
438 	IoToken *first = List_first(self->tokenStream);
439 
440 	if (first)
441 	{
442 		IoToken_print(first);
443 	}
444 
445 	printf("\n");
446 }
447 
IoLexer_printTokens(IoLexer * self)448 void IoLexer_printTokens(IoLexer *self)
449 {
450 	int i;
451 
452 	for (i = 0; i < List_size(self->tokenStream); i ++)
453 	{
454 		IoToken *t = List_at_(self->tokenStream, i);
455 
456 		printf("'%s'", t->name);
457 		printf(" %s ", IoToken_typeName(t));
458 
459 		if (i < List_size(self->tokenStream) - 1)
460 		{
461 			printf(", ");
462 		}
463 	}
464 
465 	printf("\n");
466 }
467 
468 // grabbing ---------------------------------------------
469 
IoLexer_grabLength(IoLexer * self)470 int IoLexer_grabLength(IoLexer *self)
471 {
472 	char *s1 = IoLexer_lastPos(self);
473 	char *s2 = IoLexer_current(self);
474 
475 	return (int)(s2 - s1);
476 }
477 
IoLexer_grabTokenType_(IoLexer * self,IoTokenType type)478 void IoLexer_grabTokenType_(IoLexer *self, IoTokenType type)
479 {
480 	char *s1 = IoLexer_lastPos(self);
481 	char *s2 = IoLexer_current(self);
482 	size_t len = (s2 - s1);
483 
484 	if (!len)
485 	{
486 		printf("IoLexer fatal error: empty token\n");
487 		exit(1);
488 	}
489 
490 	IoLexer_addTokenString_length_type_(self, s1, len, type);
491 }
492 
IoLexer_addTokenString_length_type_(IoLexer * self,const char * s1,size_t len,IoTokenType type)493 IoToken *IoLexer_addTokenString_length_type_(IoLexer *self, const char *s1, size_t len, IoTokenType type)
494 {
495 	IoToken *top = IoLexer_currentToken(self);
496 	IoToken *t = IoToken_new();
497 
498 	t->lineNumber = (int)IoLexer_currentLineNumber(self);
499 	//t->charNumber = (int)(s1 - self->s);
500 	t->charNumber = (int)(self->current - self->s);
501 
502 	if (t->charNumber < 0)
503 	{
504 		printf("bad t->charNumber = %i\n", t->charNumber);
505 	}
506 
507 	IoToken_name_length_(t, s1, len);
508 	IoToken_type_(t, type);
509 
510 	if (top)
511 	{
512 		IoToken_nextToken_(top, t);
513 	}
514 
515 	List_push_(self->tokenStream, t);
516 #ifdef LEXER_DEBUG_TOKENS
517 	printf("token '%s' %s\n", t->name, IoToken_typeName(t));
518 #endif
519 
520 	return t;
521 }
522 
523 // reading ------------------------------------
524 
IoLexer_messageChain(IoLexer * self)525 void IoLexer_messageChain(IoLexer *self)
526 {
527 	do
528 	{
529 		while (	IoLexer_readTerminator(self) ||
530 				IoLexer_readSeparator(self) ||
531 				IoLexer_readComment(self))
532 		{}
533 	} while ( IoLexer_readMessage(self));
534 }
535 
536 // message -------------------------------
537 
IoLexer_readMessage_error(IoLexer * self,const char * name)538 static void IoLexer_readMessage_error(IoLexer *self, const char *name)
539 {
540 	IoLexer_popPosBack(self);
541 	self->errorToken = IoLexer_currentToken(self);
542 	IoToken_error_(self->errorToken, name);
543 }
544 
IoLexer_readTokenChars_type_(IoLexer * self,const char * chars,IoTokenType type)545 int IoLexer_readTokenChars_type_(IoLexer *self, const char *chars, IoTokenType type)
546 {
547 	while (*chars)
548 	{
549 		if (IoLexer_readTokenChar_type_(self, *chars, type)) return 1;
550 		chars ++;
551 	}
552 
553 	return 0;
554 }
555 
IoLexer_nameForGroupChar_(IoLexer * self,char groupChar)556 const char *IoLexer_nameForGroupChar_(IoLexer *self, char groupChar)
557 {
558 	switch (groupChar)
559 	{
560 		case '(': return "";
561 		case '[': return "squareBrackets";
562 		case '{': return "curlyBrackets";
563 	}
564 
565 	printf("IoLexer: fatal error - invalid group char %c\n", groupChar);
566 	exit(1);
567 }
568 
569 //static char *specialChars = ":._";
570 static char *specialChars = "._";
571 
IoLexer_readMessage(IoLexer * self)572 int IoLexer_readMessage(IoLexer *self)
573 {
574 	char foundSymbol;
575 
576 	IoLexer_pushPos(self);
577 	IoLexer_readPadding(self);
578 
579 	foundSymbol = IoLexer_readSymbol(self);
580 
581 
582 	{
583 		char groupChar;
584 		while (IoLexer_readSeparator(self) || IoLexer_readComment(self))
585 		{}
586 
587 		groupChar = *IoLexer_current(self);
588 
589 		if (groupChar && (strchr("[{", groupChar) || (!foundSymbol && groupChar == '(')))
590 		{
591 			char *groupName = (char *)IoLexer_nameForGroupChar_(self, groupChar);
592 			IoLexer_addTokenString_length_type_(self, groupName, strlen(groupName), IDENTIFIER_TOKEN);
593 		}
594 
595 		if (IoLexer_readTokenChars_type_(self, "([{", OPENPAREN_TOKEN))
596 		{
597 			IoLexer_readPadding(self);
598 			do {
599 				IoTokenType type = IoLexer_currentToken(self)->type;
600 
601 				IoLexer_readPadding(self);
602 				// Empty argument: (... ,)
603 				if (COMMA_TOKEN == type)
604 				{
605 					char c = *IoLexer_current(self);
606 
607 					if (',' == c || strchr(")]}", c))
608 					{
609 						IoLexer_readMessage_error(self, "missing argument in argument list");
610 						return 0;
611 					}
612 				}
613 
614 				//if (groupChar == '[') specialChars = "._";
615 				IoLexer_messageChain(self);
616 				//if (groupChar == '[') specialChars = ":._";
617 				IoLexer_readPadding(self);
618 
619 			} while (IoLexer_readTokenChar_type_(self, ',', COMMA_TOKEN));
620 
621 			if (!IoLexer_readTokenChars_type_(self, ")]}", CLOSEPAREN_TOKEN))
622 			{
623 				/*
624 				char c = *IoLexer_current(self);
625 
626 				if (strchr("([{", c))
627 				{
628 					IoLexer_readMessage_error(self, "expected a message but instead found a open group character");
629 				}
630 				else
631 				{
632 					IoLexer_readMessage_error(self, "missing closing group character for argument list");
633 				}
634 				*/
635 				if (groupChar == '(')
636 				{
637 					IoLexer_readMessage_error(self, "unmatched ()s");
638 				}
639 				else if (groupChar == '[')
640 				{
641 					IoLexer_readMessage_error(self, "unmatched []s");
642 				}
643 				else if (groupChar == '{')
644 				{
645 					IoLexer_readMessage_error(self, "unmatched {}s");
646 				}
647 				//printf("Token %p error: %s - %s\n", t, t->error, IoToken_error(t));
648 				return 0;
649 			}
650 
651 			IoLexer_popPos(self);
652 			return 1;
653 		}
654 
655 		if (foundSymbol)
656 		{
657 			IoLexer_popPos(self);
658 			return 1;
659 		}
660 	}
661 	IoLexer_popPosBack(self);
662 	return 0;
663 }
664 
IoLexer_readPadding(IoLexer * self)665 int IoLexer_readPadding(IoLexer *self)
666 {
667 	int r = 0;
668 
669 	while (IoLexer_readWhitespace(self) || IoLexer_readComment(self))
670 	{
671 		r = 1;
672 	}
673 
674 	return r;
675 }
676 
677 // symbols ------------------------------------------
678 
IoLexer_readSymbol(IoLexer * self)679 int IoLexer_readSymbol(IoLexer *self)
680 {
681 	if (IoLexer_readNumber(self) ||
682 		IoLexer_readOperator(self) ||
683 		IoLexer_readIdentifier(self) ||
684 		IoLexer_readQuote(self)) return 1;
685 	return 0;
686 }
687 
IoLexer_readIdentifier(IoLexer * self)688 int IoLexer_readIdentifier(IoLexer *self)
689 {
690 	IoLexer_pushPos(self);
691 
692 	while ( IoLexer_readLetter(self) ||
693 			IoLexer_readDigit(self) ||
694 			IoLexer_readSpecialChar(self))
695 	{}
696 
697 	if (IoLexer_grabLength(self))
698 	{
699 		// avoid grabing : on last character if followed by =
700 
701 /*
702 		char *current = IoLexer_current(self);
703 
704 		if (*(current - 1) == ':' && *current == '=')
705 		{
706 			IoLexer_prevChar(self);
707 		}
708 		*/
709 
710 
711 		IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
712 		IoLexer_popPos(self);
713 		return 1;
714 	}
715 
716 	IoLexer_popPosBack(self);
717 
718 	return 0;
719 }
720 
IoLexer_readOperator(IoLexer * self)721 int IoLexer_readOperator(IoLexer *self)
722 {
723 	uchar_t c;
724 	IoLexer_pushPos(self);
725 	// ok if first character is a colon
726 	c = IoLexer_nextChar(self);
727 	//printf("IoLexer_nextChar(self) = %c %i\n", c, c);
728 
729 	if (c == 0)
730 	{
731 		IoLexer_popPosBack(self);
732 		return 0;
733 	}
734 	else
735 	{
736 		IoLexer_prevChar(self);
737 	}
738 	/*
739 	if (c != ':')
740 	{
741 		IoLexer_prevChar(self);
742 	}
743 	*/
744 
745 	while (IoLexer_readOpChar(self))
746 	{ }
747 
748 	if (IoLexer_grabLength(self))
749 	{
750 		IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
751 		IoLexer_popPos(self);
752 		return 1;
753 	}
754 
755 	IoLexer_popPosBack(self);
756 	return 0;
757 }
758 
759 // comments ------------------------------------------
760 
IoLexer_readComment(IoLexer * self)761 int IoLexer_readComment(IoLexer *self)
762 {
763 	return (IoLexer_readSlashStarComment(self) ||
764 		IoLexer_readSlashSlashComment(self) ||
765 		IoLexer_readPoundComment(self));
766 }
767 
IoLexer_readSlashStarComment(IoLexer * self)768 int IoLexer_readSlashStarComment(IoLexer *self)
769 {
770 	IoLexer_pushPos(self);
771 
772 	if (IoLexer_readString_(self, "/*"))
773 	{
774 		unsigned int nesting = 1;
775 
776 		while (nesting > 0)
777 		{
778 			if (IoLexer_readString_(self, "/*"))
779 			{
780 				IoLexer_nextChar(self);
781 				nesting++;
782 			}
783 			else if (IoLexer_readString_(self, "*/"))
784 			{
785 				// otherwise we end up trimming the last char
786 				if (nesting > 1) IoLexer_nextChar(self);
787 				nesting--;
788 			}
789 			else
790 			{
791 				uchar_t c = IoLexer_nextChar(self);
792 				if(c == 0)
793 				{
794 					self->errorToken = IoLexer_currentToken(self);
795 
796 					if (!self->errorToken)
797 					{
798 						IoLexer_grabTokenType_(self, NO_TOKEN);
799 						self->errorToken = IoLexer_currentToken(self);
800 					}
801 
802 					if (self->errorToken)
803 					{
804 						IoToken_error_(self->errorToken, "unterminated comment");
805 					}
806 
807 					IoLexer_popPosBack(self);
808 					return 0;
809 				}
810 			}
811 		}
812 		IoLexer_popPos(self);
813 		return 1;
814 	}
815 
816 	IoLexer_popPosBack(self);
817 	return 0;
818 }
819 
IoLexer_readSlashSlashComment(IoLexer * self)820 int IoLexer_readSlashSlashComment(IoLexer *self)
821 {
822 	IoLexer_pushPos(self);
823 
824 	if (IoLexer_nextChar(self) == '/')
825 	{
826 		if (IoLexer_nextChar(self) == '/')
827 		{
828 			while (IoLexer_readNonReturn(self)) { }
829 			//IoLexer_grabTokenType_(self, COMMENT_TOKEN);
830 			IoLexer_popPos(self);
831 			return 1;
832 		}
833 	}
834 
835 	IoLexer_popPosBack(self);
836 	return 0;
837 }
838 
IoLexer_readPoundComment(IoLexer * self)839 int IoLexer_readPoundComment(IoLexer *self)
840 {
841 	IoLexer_pushPos(self);
842 
843 	if (IoLexer_nextChar(self) == '#')
844 	{
845 		while (IoLexer_readNonReturn(self))
846 		{
847 		}
848 		//IoLexer_grabTokenType_(self, COMMENT_TOKEN);
849 		IoLexer_popPos(self);
850 		return 1;
851 	}
852 
853 	IoLexer_popPosBack(self);
854 	return 0;
855 }
856 
857 // quotes -----------------------------------------
858 
IoLexer_readQuote(IoLexer * self)859 int IoLexer_readQuote(IoLexer *self)
860 {
861 	return (IoLexer_readTriQuote(self) || IoLexer_readMonoQuote(self));
862 }
863 
IoLexer_readMonoQuote(IoLexer * self)864 int IoLexer_readMonoQuote(IoLexer *self)
865 {
866 	IoLexer_pushPos(self);
867 
868 	if (IoLexer_nextChar(self) == '"')
869 	{
870 		for (;;)
871 		{
872 			uchar_t c = IoLexer_nextChar(self);
873 
874 			if (c == '"')
875 			{
876 				break;
877 			}
878 
879 			if (c == '\\')
880 			{
881 				IoLexer_nextChar(self);
882 				continue;
883 			}
884 
885 			if (c == 0)
886 			{
887 				self->errorToken = IoLexer_currentToken(self);
888 
889 				if (self->errorToken)
890 				{
891 					IoToken_error_(self->errorToken, "unterminated quote");
892 				}
893 
894 				IoLexer_popPosBack(self);
895 				return 0;
896 			}
897 		}
898 
899 		IoLexer_grabTokenType_(self, MONOQUOTE_TOKEN);
900 		IoLexer_popPos(self);
901 		return 1;
902 	}
903 
904 	IoLexer_popPosBack(self);
905 	return 0;
906 }
907 
IoLexer_readTriQuote(IoLexer * self)908 int IoLexer_readTriQuote(IoLexer *self)
909 {
910 	IoLexer_pushPos(self);
911 
912 	if (IoLexer_readString_(self, "\"\"\""))
913 	{
914 		while (!IoLexer_readString_(self, "\"\"\""))
915 		{
916 			uchar_t c = IoLexer_nextChar(self);
917 
918 			if (c == 0)
919 			{
920 				IoLexer_popPosBack(self);
921 				return 0;
922 			}
923 		}
924 
925 		IoLexer_grabTokenType_(self, TRIQUOTE_TOKEN);
926 		IoLexer_popPos(self);
927 		return 1;
928 	}
929 
930 	IoLexer_popPosBack(self);
931 	return 0;
932 }
933 
934 // helpers ----------------------------
935 
IoLexer_readTokenChar_type_(IoLexer * self,char c,IoTokenType type)936 int IoLexer_readTokenChar_type_(IoLexer *self, char c, IoTokenType type)
937 {
938 	IoLexer_pushPos(self);
939 
940 	if (IoLexer_readChar_(self, c))
941 	{
942 		IoLexer_grabTokenType_(self, type);
943 		IoLexer_popPos(self);
944 		return 1;
945 	}
946 
947 	IoLexer_popPosBack(self);
948 	return 0;
949 }
950 
IoLexer_readTokenString_(IoLexer * self,const char * s)951 int IoLexer_readTokenString_(IoLexer *self, const char *s)
952 {
953 	IoLexer_pushPos(self);
954 
955 	if (IoLexer_readString_(self, s))
956 	{
957 		IoLexer_grabTokenType_(self, IDENTIFIER_TOKEN);
958 		IoLexer_popPos(self);
959 		return 1;
960 	}
961 
962 	IoLexer_popPosBack(self);
963 	return 0;
964 }
965 
966 
IoLexer_readString_(IoLexer * self,const char * s)967 int IoLexer_readString_(IoLexer *self, const char *s)
968 {
969 	size_t len = strlen(s);
970 
971 	if (IoLexer_onNULL(self))
972 	{
973 		return 0;
974 	}
975 
976 	if (strncmp(self->current, s, len) == 0)
977 	{
978 		self->current += len;
979 		return 1;
980 	}
981 
982 	return 0;
983 }
984 
IoLexer_readCharIn_(IoLexer * self,const char * s)985 TEST_INLINE int IoLexer_readCharIn_(IoLexer *self, const char *s)
986 {
987 	if (!IoLexer_onNULL(self))
988 	{
989 		uchar_t c = IoLexer_nextChar(self);
990 
991 		if (c < 0x80 && strchr(s, c))
992 		{
993 			return 1;
994 		}
995 
996 		IoLexer_prevChar(self);
997 	}
998 	return 0;
999 }
1000 
IoLexer_readCharInRange_(IoLexer * self,uchar_t first,uchar_t last)1001 TEST_INLINE int IoLexer_readCharInRange_(IoLexer *self, uchar_t first, uchar_t last)
1002 {
1003 	if (!IoLexer_onNULL(self))
1004 	{
1005 		uchar_t c = IoLexer_nextChar(self);
1006 
1007 		if (c >= first && c <= last)
1008 		{
1009 			return 1;
1010 		}
1011 
1012 		IoLexer_prevChar(self);
1013 	}
1014 	return 0;
1015 }
1016 
IoLexer_readChar_(IoLexer * self,char c)1017 int IoLexer_readChar_(IoLexer *self, char c)
1018 {
1019 	if (!IoLexer_onNULL(self))
1020 	{
1021 		uchar_t nc = IoLexer_nextChar(self);
1022 
1023 		if (nc && nc == c)
1024 		{
1025 			return 1;
1026 		}
1027 
1028 		IoLexer_prevChar(self);
1029 	}
1030 	return 0;
1031 }
1032 
IoLexer_readCharAnyCase_(IoLexer * self,char c)1033 int IoLexer_readCharAnyCase_(IoLexer *self, char c)
1034 {
1035 	if (!IoLexer_onNULL(self))
1036 	{
1037 		uchar_t nc = IoLexer_nextChar(self);
1038 
1039 		if (nc && tolower(nc) == tolower(c))
1040 		{
1041 			return 1;
1042 		}
1043 
1044 		IoLexer_prevChar(self);
1045 	}
1046 	return 0;
1047 }
1048 
IoLexer_readNonASCIIChar_(IoLexer * self)1049 int IoLexer_readNonASCIIChar_(IoLexer *self)
1050 {
1051 	if (!IoLexer_onNULL(self))
1052 	{
1053 		uchar_t nc = IoLexer_nextChar(self);
1054 
1055 		if (nc >= 0x80)
1056 			return 1;
1057 
1058 		IoLexer_prevChar(self);
1059 	}
1060 	return 0;
1061 }
1062 
IoLexer_readNonReturn(IoLexer * self)1063 int IoLexer_readNonReturn(IoLexer *self)
1064 {
1065 	if (IoLexer_onNULL(self)) return 0;
1066 	if (IoLexer_nextChar(self) != '\n') return 1;
1067 	IoLexer_prevChar(self);
1068 	return 0;
1069 }
1070 
IoLexer_readNonQuote(IoLexer * self)1071 int IoLexer_readNonQuote(IoLexer *self)
1072 {
1073 	if (IoLexer_onNULL(self)) return 0;
1074 	if (IoLexer_nextChar(self) != '"') return 1;
1075 	IoLexer_prevChar(self);
1076 	return 0;
1077 }
1078 
1079 // character definitions ----------------------------
1080 
IoLexer_readCharacters(IoLexer * self)1081 int IoLexer_readCharacters(IoLexer *self)
1082 {
1083 	int read = 0;
1084 
1085 	while (IoLexer_readCharacter(self))
1086 	{
1087 		read = 1;
1088 	}
1089 
1090 	return read;
1091 }
1092 
IoLexer_readCharacter(IoLexer * self)1093 int IoLexer_readCharacter(IoLexer *self)
1094 {
1095 	return (
1096 		IoLexer_readLetter(self) ||
1097 		IoLexer_readDigit(self) ||
1098 		IoLexer_readSpecialChar(self) ||
1099 		IoLexer_readOpChar(self)
1100 	);
1101 }
1102 
IoLexer_readOpChar(IoLexer * self)1103 int IoLexer_readOpChar(IoLexer *self)
1104 {
1105 	//return IoLexer_readCharIn_(self, ":'~!@$%^&*-+=|\\<>?/");
1106 	return IoLexer_readCharIn_(self, ":'~!@$%^&*-+=|\\<>?/");
1107 }
1108 
IoLexer_readSpecialChar(IoLexer * self)1109 int IoLexer_readSpecialChar(IoLexer *self)
1110 {
1111 	return IoLexer_readCharIn_(self, specialChars);
1112 }
1113 
IoLexer_readDigit(IoLexer * self)1114 int IoLexer_readDigit(IoLexer *self)
1115 {
1116 	return IoLexer_readCharInRange_(self, '0', '9');
1117 }
1118 
IoLexer_readLetter(IoLexer * self)1119 int IoLexer_readLetter(IoLexer *self)
1120 {
1121 	return IoLexer_readCharInRange_(self, 'A', 'Z') ||
1122 		IoLexer_readCharInRange_(self, 'a', 'z') ||
1123 		IoLexer_readCharIn_(self, ":") ||
1124 		IoLexer_readNonASCIIChar_(self);
1125 }
1126 
1127 // terminator -------------------------------
1128 
IoLexer_readTerminator(IoLexer * self)1129 int IoLexer_readTerminator(IoLexer *self)
1130 {
1131 	int terminated = 0;
1132 	IoLexer_pushPos(self);
1133 	IoLexer_readSeparator(self);
1134 
1135 	while (IoLexer_readTerminatorChar(self))
1136 	{
1137 		terminated = 1;
1138 		IoLexer_readSeparator(self);
1139 	}
1140 
1141 	if (terminated)
1142 	{
1143 		IoToken *top = IoLexer_currentToken(self);
1144 
1145 		// avoid double terminators
1146 		if (top && IoToken_type(top) == TERMINATOR_TOKEN)
1147 		{
1148 			return 1;
1149 		}
1150 
1151 		IoLexer_addTokenString_length_type_(self, ";", 1, TERMINATOR_TOKEN);
1152 		IoLexer_popPos(self);
1153 		return 1;
1154 	}
1155 
1156 	IoLexer_popPosBack(self);
1157 	return 0;
1158 }
1159 
IoLexer_readTerminatorChar(IoLexer * self)1160 int IoLexer_readTerminatorChar(IoLexer *self)
1161 {
1162 	return IoLexer_readCharIn_(self, ";\n");
1163 }
1164 
1165 // separator --------------------------------
1166 
IoLexer_readSeparator(IoLexer * self)1167 int IoLexer_readSeparator(IoLexer *self)
1168 {
1169 	IoLexer_pushPos(self);
1170 
1171 	while (IoLexer_readSeparatorChar(self))
1172 	{
1173 	}
1174 
1175 	if (IoLexer_grabLength(self))
1176 	{
1177 		//IoLexer_grabTokenType_(self, SEPERATOR_TOKEN);
1178 		IoLexer_popPos(self);
1179 		return 1;
1180 	}
1181 
1182 	IoLexer_popPosBack(self);
1183 	return 0;
1184 }
1185 
IoLexer_readSeparatorChar(IoLexer * self)1186 int IoLexer_readSeparatorChar(IoLexer *self)
1187 {
1188 	if (IoLexer_readCharIn_(self, " \f\r\t\v"))
1189 	{
1190 		return 1;
1191 	}
1192 	else
1193 	{
1194 		IoLexer_pushPos(self);
1195 		if (IoLexer_readCharIn_(self, "\\"))
1196 		{
1197 			while (IoLexer_readCharIn_(self, " \f\r\t\v"))
1198 			{
1199 			}
1200 
1201 			if (IoLexer_readCharIn_(self, "\n"))
1202 			{
1203 				IoLexer_popPos(self);
1204 				return 1;
1205 			}
1206 		}
1207 		IoLexer_popPosBack(self);
1208 		return 0;
1209 	}
1210 }
1211 
1212 // whitespace -----------------------------------
1213 
IoLexer_readWhitespace(IoLexer * self)1214 int IoLexer_readWhitespace(IoLexer *self)
1215 {
1216 	IoLexer_pushPos(self);
1217 
1218 	while (IoLexer_readWhitespaceChar(self))
1219 	{
1220 	}
1221 
1222 	if (IoLexer_grabLength(self))
1223 	{
1224 		//IoLexer_grabTokenType_(self, WHITESPACE_TOKEN);
1225 		IoLexer_popPos(self);
1226 		return 1;
1227 	}
1228 
1229 	IoLexer_popPosBack(self);
1230 	return 0;
1231 }
1232 
IoLexer_readWhitespaceChar(IoLexer * self)1233 int IoLexer_readWhitespaceChar(IoLexer *self)
1234 {
1235 	return IoLexer_readCharIn_(self, " \f\r\t\v\n");
1236 }
1237 
IoLexer_readDigits(IoLexer * self)1238 int IoLexer_readDigits(IoLexer *self)
1239 {
1240 	int read = 0;
1241 
1242 	IoLexer_pushPos(self);
1243 
1244 	while (IoLexer_readDigit(self))
1245 	{
1246 		read = 1;
1247 	}
1248 
1249 	if (!read)
1250 	{
1251 		IoLexer_popPosBack(self);
1252 		return 0;
1253 	}
1254 
1255 	IoLexer_popPos(self);
1256 	return read;
1257 }
1258 
IoLexer_readNumber(IoLexer * self)1259 int IoLexer_readNumber(IoLexer *self)
1260 {
1261 	return (IoLexer_readHexNumber(self) || IoLexer_readDecimal(self));
1262 }
1263 
IoLexer_readExponent(IoLexer * self)1264 int IoLexer_readExponent(IoLexer *self)
1265 {
1266 	if (IoLexer_readCharAnyCase_(self, 'e'))
1267 	{
1268 		if (!IoLexer_readChar_(self, '-'))
1269 		{
1270 			IoLexer_readChar_(self, '+');
1271 		}
1272 
1273 		if (!IoLexer_readDigits(self))
1274 		{
1275 			return -1;
1276 		}
1277 
1278 		return 1;
1279 	}
1280 	return 0;
1281 }
1282 
IoLexer_readDecimalPlaces(IoLexer * self)1283 int IoLexer_readDecimalPlaces(IoLexer *self)
1284 {
1285 	if (IoLexer_readChar_(self, '.'))
1286 	{
1287 		if (!IoLexer_readDigits(self))
1288 		{
1289 			return -1;
1290 		}
1291 
1292 		return 1;
1293 	}
1294 	return 0;
1295 }
1296 
IoLexer_readDecimal(IoLexer * self)1297 int IoLexer_readDecimal(IoLexer *self)
1298 {
1299 	IoLexer_pushPos(self);
1300 
1301 	if (IoLexer_readDigits(self))
1302 	{
1303 		if (IoLexer_readDecimalPlaces(self) == -1)
1304 		{
1305 			goto error;
1306 		}
1307 	}
1308 	else
1309 	{
1310 		if (IoLexer_readDecimalPlaces(self) != 1)
1311 		{
1312 			goto error;
1313 		}
1314 	}
1315 
1316 	if (IoLexer_readExponent(self) == -1)
1317 	{
1318 		goto error;
1319 	}
1320 
1321 	if (IoLexer_grabLength(self))
1322 	{
1323 		IoLexer_grabTokenType_(self, NUMBER_TOKEN);
1324 		IoLexer_popPos(self);
1325 		return 1;
1326 	}
1327 error:
1328 		IoLexer_popPosBack(self);
1329 	return 0;
1330 }
1331 
IoLexer_readHexNumber(IoLexer * self)1332 int IoLexer_readHexNumber(IoLexer *self)
1333 {
1334 	int read = 0;
1335 
1336 	IoLexer_pushPos(self);
1337 
1338 	if (IoLexer_readChar_(self, '0') && IoLexer_readCharAnyCase_(self, 'x'))
1339 	{
1340 		while (IoLexer_readDigits(self) || IoLexer_readCharacters(self))
1341 		{
1342 			read ++;
1343 		}
1344 	}
1345 
1346 	if (read && IoLexer_grabLength(self))
1347 	{
1348 		IoLexer_grabTokenType_(self, HEXNUMBER_TOKEN);
1349 		IoLexer_popPos(self);
1350 		return 1;
1351 	}
1352 
1353 	IoLexer_popPosBack(self);
1354 	return 0;
1355 }
1356