1 /*
2 * Copyright (c) 2013 Hugh Bailey <obs.jim@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include <ctype.h>
18 #include <stdio.h>
19 #include "platform.h"
20 #include "cf-lexer.h"
21
cf_convert_from_escape_literal(char ** p_dst,const char ** p_src)22 static inline void cf_convert_from_escape_literal(char **p_dst,
23 const char **p_src)
24 {
25 char *dst = *p_dst;
26 const char *src = *p_src;
27
28 switch (*(src++)) {
29 case '\'':
30 *(dst++) = '\'';
31 break;
32 case '\"':
33 *(dst++) = '\"';
34 break;
35 case '\?':
36 *(dst++) = '\?';
37 break;
38 case '\\':
39 *(dst++) = '\\';
40 break;
41 case '0':
42 *(dst++) = '\0';
43 break;
44 case 'a':
45 *(dst++) = '\a';
46 break;
47 case 'b':
48 *(dst++) = '\b';
49 break;
50 case 'f':
51 *(dst++) = '\f';
52 break;
53 case 'n':
54 *(dst++) = '\n';
55 break;
56 case 'r':
57 *(dst++) = '\r';
58 break;
59 case 't':
60 *(dst++) = '\t';
61 break;
62 case 'v':
63 *(dst++) = '\v';
64 break;
65
66 /* hex */
67 case 'X':
68 case 'x':
69 *(dst++) = (char)strtoul(src, NULL, 16);
70 src += 2;
71 break;
72
73 /* oct */
74 default:
75 if (isdigit(*src)) {
76 *(dst++) = (char)strtoul(src, NULL, 8);
77 src += 3;
78 }
79
80 /* case 'u':
81 case 'U': */
82 }
83
84 *p_dst = dst;
85 *p_src = src;
86 }
87
cf_literal_to_str(const char * literal,size_t count)88 char *cf_literal_to_str(const char *literal, size_t count)
89 {
90 const char *temp_src;
91 char *str, *temp_dst;
92
93 if (!count)
94 count = strlen(literal);
95
96 if (count < 2)
97 return NULL;
98 if (literal[0] != literal[count - 1])
99 return NULL;
100 if (literal[0] != '\"' && literal[0] != '\'')
101 return NULL;
102
103 /* strip leading and trailing quote characters */
104 str = bzalloc(--count);
105 temp_src = literal + 1;
106 temp_dst = str;
107
108 while (*temp_src && --count > 0) {
109 if (*temp_src == '\\') {
110 temp_src++;
111 cf_convert_from_escape_literal(&temp_dst, &temp_src);
112 } else {
113 *(temp_dst++) = *(temp_src++);
114 }
115 }
116
117 *temp_dst = 0;
118 return str;
119 }
120
cf_is_token_break(struct base_token * start_token,const struct base_token * token)121 static bool cf_is_token_break(struct base_token *start_token,
122 const struct base_token *token)
123 {
124 switch (start_token->type) {
125 case BASETOKEN_ALPHA:
126 if (token->type == BASETOKEN_OTHER ||
127 token->type == BASETOKEN_WHITESPACE)
128 return true;
129 break;
130
131 case BASETOKEN_DIGIT:
132 if (token->type == BASETOKEN_WHITESPACE ||
133 (token->type == BASETOKEN_OTHER &&
134 *token->text.array != '.'))
135 return true;
136 break;
137
138 case BASETOKEN_WHITESPACE:
139 /* lump all non-newline whitespace together when possible */
140 if (is_space_or_tab(*start_token->text.array) &&
141 is_space_or_tab(*token->text.array))
142 break;
143 return true;
144
145 case BASETOKEN_OTHER:
146 if (*start_token->text.array == '.' &&
147 token->type == BASETOKEN_DIGIT) {
148 start_token->type = BASETOKEN_DIGIT;
149 break;
150 }
151 /* Falls through. */
152
153 case BASETOKEN_NONE:
154 return true;
155 }
156
157 return false;
158 }
159
cf_is_splice(const char * array)160 static inline bool cf_is_splice(const char *array)
161 {
162 return (*array == '\\' && is_newline(array[1]));
163 }
164
cf_pass_any_splices(const char ** parray)165 static inline void cf_pass_any_splices(const char **parray)
166 {
167 while (cf_is_splice(*parray))
168 *parray += 1 + newline_size((*parray) + 1);
169 }
170
cf_is_comment(const char * array)171 static inline bool cf_is_comment(const char *array)
172 {
173 const char *offset = array;
174
175 if (*offset++ == '/') {
176 cf_pass_any_splices(&offset);
177 return (*offset == '*' || *offset == '/');
178 }
179
180 return false;
181 }
182
cf_lexer_process_comment(struct cf_lexer * lex,struct cf_token * out_token)183 static bool cf_lexer_process_comment(struct cf_lexer *lex,
184 struct cf_token *out_token)
185 {
186 const char *offset;
187
188 if (!cf_is_comment(out_token->unmerged_str.array))
189 return false;
190
191 offset = lex->base_lexer.offset;
192 cf_pass_any_splices(&offset);
193
194 strcpy(lex->write_offset++, " ");
195 out_token->str.len = 1;
196
197 if (*offset == '/') {
198 while (*++offset && !is_newline(*offset))
199 cf_pass_any_splices(&offset);
200
201 } else if (*offset == '*') {
202 bool was_star = false;
203 lex->unexpected_eof = true;
204
205 while (*++offset) {
206 cf_pass_any_splices(&offset);
207
208 if (was_star && *offset == '/') {
209 offset++;
210 lex->unexpected_eof = false;
211 break;
212 } else {
213 was_star = (*offset == '*');
214 }
215 }
216 }
217
218 out_token->unmerged_str.len +=
219 (size_t)(offset - out_token->unmerged_str.array);
220 out_token->type = CFTOKEN_SPACETAB;
221 lex->base_lexer.offset = offset;
222
223 return true;
224 }
225
cf_lexer_write_strref(struct cf_lexer * lex,const struct strref * ref)226 static inline void cf_lexer_write_strref(struct cf_lexer *lex,
227 const struct strref *ref)
228 {
229 strncpy(lex->write_offset, ref->array, ref->len);
230 lex->write_offset[ref->len] = 0;
231 lex->write_offset += ref->len;
232 }
233
cf_lexer_is_include(struct cf_lexer * lex)234 static bool cf_lexer_is_include(struct cf_lexer *lex)
235 {
236 bool found_include_import = false;
237 bool found_preprocessor = false;
238 size_t i;
239
240 for (i = lex->tokens.num; i > 0; i--) {
241 struct cf_token *token = lex->tokens.array + (i - 1);
242
243 if (is_space_or_tab(*token->str.array))
244 continue;
245
246 if (!found_include_import) {
247 if (strref_cmp(&token->str, "include") != 0 &&
248 strref_cmp(&token->str, "import") != 0)
249 break;
250
251 found_include_import = true;
252
253 } else if (!found_preprocessor) {
254 if (*token->str.array != '#')
255 break;
256
257 found_preprocessor = true;
258
259 } else {
260 return is_newline(*token->str.array);
261 }
262 }
263
264 /* if starting line */
265 return found_preprocessor && found_include_import;
266 }
267
cf_lexer_getstrtoken(struct cf_lexer * lex,struct cf_token * out_token,char delimiter,bool allow_escaped_delimiters)268 static void cf_lexer_getstrtoken(struct cf_lexer *lex,
269 struct cf_token *out_token, char delimiter,
270 bool allow_escaped_delimiters)
271 {
272 const char *offset = lex->base_lexer.offset;
273 bool escaped = false;
274
275 out_token->unmerged_str.len++;
276 out_token->str.len++;
277 cf_lexer_write_strref(lex, &out_token->unmerged_str);
278
279 while (*offset) {
280 cf_pass_any_splices(&offset);
281 if (*offset == delimiter) {
282 if (!escaped) {
283 *lex->write_offset++ = *offset;
284 out_token->str.len++;
285 offset++;
286 break;
287 }
288 } else if (is_newline(*offset)) {
289 break;
290 }
291
292 *lex->write_offset++ = *offset;
293 out_token->str.len++;
294
295 escaped = (allow_escaped_delimiters && *offset == '\\');
296 offset++;
297 }
298
299 *lex->write_offset = 0;
300 out_token->unmerged_str.len +=
301 (size_t)(offset - out_token->unmerged_str.array);
302 out_token->type = CFTOKEN_STRING;
303 lex->base_lexer.offset = offset;
304 }
305
cf_lexer_process_string(struct cf_lexer * lex,struct cf_token * out_token)306 static bool cf_lexer_process_string(struct cf_lexer *lex,
307 struct cf_token *out_token)
308 {
309 char ch = *out_token->unmerged_str.array;
310
311 if (ch == '<' && cf_lexer_is_include(lex)) {
312 cf_lexer_getstrtoken(lex, out_token, '>', false);
313 return true;
314
315 } else if (ch == '"' || ch == '\'') {
316 cf_lexer_getstrtoken(lex, out_token, ch,
317 !cf_lexer_is_include(lex));
318 return true;
319 }
320
321 return false;
322 }
323
324 static inline enum cf_token_type
cf_get_token_type(const struct cf_token * token,const struct base_token * start_token)325 cf_get_token_type(const struct cf_token *token,
326 const struct base_token *start_token)
327 {
328 switch (start_token->type) {
329 case BASETOKEN_ALPHA:
330 return CFTOKEN_NAME;
331
332 case BASETOKEN_DIGIT:
333 return CFTOKEN_NUM;
334
335 case BASETOKEN_WHITESPACE:
336 if (is_newline(*token->str.array))
337 return CFTOKEN_NEWLINE;
338 else
339 return CFTOKEN_SPACETAB;
340
341 case BASETOKEN_NONE:
342 case BASETOKEN_OTHER:
343 break;
344 }
345
346 return CFTOKEN_OTHER;
347 }
348
cf_lexer_nexttoken(struct cf_lexer * lex,struct cf_token * out_token)349 static bool cf_lexer_nexttoken(struct cf_lexer *lex, struct cf_token *out_token)
350 {
351 struct base_token token, start_token;
352 bool wrote_data = false;
353
354 base_token_clear(&token);
355 base_token_clear(&start_token);
356 cf_token_clear(out_token);
357
358 while (lexer_getbasetoken(&lex->base_lexer, &token, PARSE_WHITESPACE)) {
359 /* reclassify underscore as alpha for alnum tokens */
360 if (*token.text.array == '_')
361 token.type = BASETOKEN_ALPHA;
362
363 /* ignore escaped newlines to merge spliced lines */
364 if (cf_is_splice(token.text.array)) {
365 lex->base_lexer.offset +=
366 newline_size(token.text.array + 1);
367 continue;
368 }
369
370 if (!wrote_data) {
371 out_token->unmerged_str.array = token.text.array;
372 out_token->str.array = lex->write_offset;
373
374 /* if comment then output a space */
375 if (cf_lexer_process_comment(lex, out_token))
376 return true;
377
378 /* process string tokens if any */
379 if (cf_lexer_process_string(lex, out_token))
380 return true;
381
382 base_token_copy(&start_token, &token);
383 wrote_data = true;
384
385 } else if (cf_is_token_break(&start_token, &token)) {
386 lex->base_lexer.offset -= token.text.len;
387 break;
388 }
389
390 /* write token to CF lexer to account for splicing/comments */
391 cf_lexer_write_strref(lex, &token.text);
392 out_token->str.len += token.text.len;
393 }
394
395 if (wrote_data) {
396 out_token->unmerged_str.len = (size_t)(
397 lex->base_lexer.offset - out_token->unmerged_str.array);
398 out_token->type = cf_get_token_type(out_token, &start_token);
399 }
400
401 return wrote_data;
402 }
403
cf_lexer_init(struct cf_lexer * lex)404 void cf_lexer_init(struct cf_lexer *lex)
405 {
406 lexer_init(&lex->base_lexer);
407 da_init(lex->tokens);
408
409 lex->file = NULL;
410 lex->reformatted = NULL;
411 lex->write_offset = NULL;
412 lex->unexpected_eof = false;
413 }
414
cf_lexer_free(struct cf_lexer * lex)415 void cf_lexer_free(struct cf_lexer *lex)
416 {
417 bfree(lex->file);
418 bfree(lex->reformatted);
419 lexer_free(&lex->base_lexer);
420 da_free(lex->tokens);
421
422 lex->file = NULL;
423 lex->reformatted = NULL;
424 lex->write_offset = NULL;
425 lex->unexpected_eof = false;
426 }
427
cf_lexer_lex(struct cf_lexer * lex,const char * str,const char * file)428 bool cf_lexer_lex(struct cf_lexer *lex, const char *str, const char *file)
429 {
430 struct cf_token token;
431 struct cf_token *last_token = NULL;
432
433 cf_lexer_free(lex);
434 if (!str || !*str)
435 return false;
436
437 if (file)
438 lex->file = bstrdup(file);
439
440 lexer_start(&lex->base_lexer, str);
441 cf_token_clear(&token);
442
443 lex->reformatted = bmalloc(strlen(str) + 1);
444 lex->reformatted[0] = 0;
445 lex->write_offset = lex->reformatted;
446
447 while (cf_lexer_nexttoken(lex, &token)) {
448 if (last_token && is_space_or_tab(*last_token->str.array) &&
449 is_space_or_tab(*token.str.array)) {
450 cf_token_add(last_token, &token);
451 continue;
452 }
453
454 token.lex = lex;
455 last_token = da_push_back_new(lex->tokens);
456 memcpy(last_token, &token, sizeof(struct cf_token));
457 }
458
459 cf_token_clear(&token);
460
461 token.str.array = lex->write_offset;
462 token.unmerged_str.array = lex->base_lexer.offset;
463 token.lex = lex;
464 da_push_back(lex->tokens, &token);
465
466 return !lex->unexpected_eof;
467 }
468
469 /* ------------------------------------------------------------------------- */
470
471 struct macro_param {
472 struct cf_token name;
473 DARRAY(struct cf_token) tokens;
474 };
475
macro_param_init(struct macro_param * param)476 static inline void macro_param_init(struct macro_param *param)
477 {
478 cf_token_clear(¶m->name);
479 da_init(param->tokens);
480 }
481
macro_param_free(struct macro_param * param)482 static inline void macro_param_free(struct macro_param *param)
483 {
484 cf_token_clear(¶m->name);
485 da_free(param->tokens);
486 }
487
488 /* ------------------------------------------------------------------------- */
489
490 struct macro_params {
491 DARRAY(struct macro_param) params;
492 };
493
macro_params_init(struct macro_params * params)494 static inline void macro_params_init(struct macro_params *params)
495 {
496 da_init(params->params);
497 }
498
macro_params_free(struct macro_params * params)499 static inline void macro_params_free(struct macro_params *params)
500 {
501 size_t i;
502 for (i = 0; i < params->params.num; i++)
503 macro_param_free(params->params.array + i);
504 da_free(params->params);
505 }
506
507 static inline struct macro_param *
get_macro_param(const struct macro_params * params,const struct strref * name)508 get_macro_param(const struct macro_params *params, const struct strref *name)
509 {
510 size_t i;
511 if (!params)
512 return NULL;
513
514 for (i = 0; i < params->params.num; i++) {
515 struct macro_param *param = params->params.array + i;
516 if (strref_cmp_strref(¶m->name.str, name) == 0)
517 return param;
518 }
519
520 return NULL;
521 }
522
523 /* ------------------------------------------------------------------------- */
524
525 static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block,
526 struct cf_token **p_cur_token);
527 static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block,
528 struct cf_token **p_cur_token);
529
go_to_newline(struct cf_token ** p_cur_token)530 static inline bool go_to_newline(struct cf_token **p_cur_token)
531 {
532 struct cf_token *cur_token = *p_cur_token;
533 while (cur_token->type != CFTOKEN_NEWLINE &&
534 cur_token->type != CFTOKEN_NONE)
535 cur_token++;
536
537 *p_cur_token = cur_token;
538
539 return cur_token->type != CFTOKEN_NONE;
540 }
541
next_token(struct cf_token ** p_cur_token,bool preprocessor)542 static inline bool next_token(struct cf_token **p_cur_token, bool preprocessor)
543 {
544 struct cf_token *cur_token = *p_cur_token;
545
546 if (cur_token->type != CFTOKEN_NONE)
547 cur_token++;
548
549 /* if preprocessor, stop at newline */
550 while (cur_token->type == CFTOKEN_SPACETAB &&
551 (preprocessor || cur_token->type == CFTOKEN_NEWLINE))
552 cur_token++;
553
554 *p_cur_token = cur_token;
555 return cur_token->type != CFTOKEN_NONE;
556 }
557
cf_gettokenoffset(struct cf_preprocessor * pp,const struct cf_token * token,uint32_t * row,uint32_t * col)558 static inline void cf_gettokenoffset(struct cf_preprocessor *pp,
559 const struct cf_token *token,
560 uint32_t *row, uint32_t *col)
561 {
562 lexer_getstroffset(&pp->lex->base_lexer, token->unmerged_str.array, row,
563 col);
564 }
565
cf_addew(struct cf_preprocessor * pp,const struct cf_token * token,const char * message,int error_level,const char * val1,const char * val2,const char * val3)566 static void cf_addew(struct cf_preprocessor *pp, const struct cf_token *token,
567 const char *message, int error_level, const char *val1,
568 const char *val2, const char *val3)
569 {
570 uint32_t row, col;
571 cf_gettokenoffset(pp, token, &row, &col);
572
573 if (!val1 && !val2 && !val3) {
574 error_data_add(pp->ed, token->lex->file, row, col, message,
575 error_level);
576 } else {
577 struct dstr formatted;
578 dstr_init(&formatted);
579 dstr_safe_printf(&formatted, message, val1, val2, val3, NULL);
580
581 error_data_add(pp->ed, token->lex->file, row, col,
582 formatted.array, error_level);
583 dstr_free(&formatted);
584 }
585 }
586
cf_adderror(struct cf_preprocessor * pp,const struct cf_token * token,const char * error,const char * val1,const char * val2,const char * val3)587 static inline void cf_adderror(struct cf_preprocessor *pp,
588 const struct cf_token *token, const char *error,
589 const char *val1, const char *val2,
590 const char *val3)
591 {
592 cf_addew(pp, token, error, LEX_ERROR, val1, val2, val3);
593 }
594
cf_addwarning(struct cf_preprocessor * pp,const struct cf_token * token,const char * warning,const char * val1,const char * val2,const char * val3)595 static inline void cf_addwarning(struct cf_preprocessor *pp,
596 const struct cf_token *token,
597 const char *warning, const char *val1,
598 const char *val2, const char *val3)
599 {
600 cf_addew(pp, token, warning, LEX_WARNING, val1, val2, val3);
601 }
602
cf_adderror_expecting(struct cf_preprocessor * pp,const struct cf_token * token,const char * expecting)603 static inline void cf_adderror_expecting(struct cf_preprocessor *pp,
604 const struct cf_token *token,
605 const char *expecting)
606 {
607 cf_adderror(pp, token, "Expected $1", expecting, NULL, NULL);
608 }
609
cf_adderror_expected_newline(struct cf_preprocessor * pp,const struct cf_token * token)610 static inline void cf_adderror_expected_newline(struct cf_preprocessor *pp,
611 const struct cf_token *token)
612 {
613 cf_adderror(pp, token,
614 "Unexpected token after preprocessor, expected "
615 "newline",
616 NULL, NULL, NULL);
617 }
618
619 static inline void
cf_adderror_unexpected_endif_eof(struct cf_preprocessor * pp,const struct cf_token * token)620 cf_adderror_unexpected_endif_eof(struct cf_preprocessor *pp,
621 const struct cf_token *token)
622 {
623 cf_adderror(pp, token, "Unexpected end of file before #endif", NULL,
624 NULL, NULL);
625 }
626
cf_adderror_unexpected_eof(struct cf_preprocessor * pp,const struct cf_token * token)627 static inline void cf_adderror_unexpected_eof(struct cf_preprocessor *pp,
628 const struct cf_token *token)
629 {
630 cf_adderror(pp, token, "Unexpected end of file", NULL, NULL, NULL);
631 }
632
insert_path(struct cf_preprocessor * pp,struct dstr * str_file)633 static inline void insert_path(struct cf_preprocessor *pp,
634 struct dstr *str_file)
635 {
636 const char *file;
637 const char *slash;
638
639 if (pp && pp->lex && pp->lex->file) {
640 file = pp->lex->file;
641 slash = strrchr(file, '/');
642 if (slash) {
643 struct dstr path = {0};
644 dstr_ncopy(&path, file, slash - file + 1);
645 dstr_insert_dstr(str_file, 0, &path);
646 dstr_free(&path);
647 }
648 }
649 }
650
cf_include_file(struct cf_preprocessor * pp,const struct cf_token * file_token)651 static void cf_include_file(struct cf_preprocessor *pp,
652 const struct cf_token *file_token)
653 {
654 struct cf_lexer new_lex;
655 struct dstr str_file;
656 FILE *file;
657 char *file_data;
658 struct cf_token *tokens;
659 size_t i;
660
661 dstr_init(&str_file);
662 dstr_copy_strref(&str_file, &file_token->str);
663 dstr_mid(&str_file, &str_file, 1, str_file.len - 2);
664 insert_path(pp, &str_file);
665
666 /* if dependency already exists, run preprocessor on it */
667 for (i = 0; i < pp->dependencies.num; i++) {
668 struct cf_lexer *dep = pp->dependencies.array + i;
669
670 if (strcmp(dep->file, str_file.array) == 0) {
671 tokens = cf_lexer_get_tokens(dep);
672 cf_preprocess_tokens(pp, false, &tokens);
673 goto exit;
674 }
675 }
676
677 file = os_fopen(str_file.array, "rb");
678 if (!file) {
679 cf_adderror(pp, file_token, "Could not open file '$1'",
680 file_token->str.array, NULL, NULL);
681 goto exit;
682 }
683
684 os_fread_utf8(file, &file_data);
685 fclose(file);
686
687 cf_lexer_init(&new_lex);
688 cf_lexer_lex(&new_lex, file_data, str_file.array);
689 tokens = cf_lexer_get_tokens(&new_lex);
690 cf_preprocess_tokens(pp, false, &tokens);
691 bfree(file_data);
692
693 da_push_back(pp->dependencies, &new_lex);
694
695 exit:
696 dstr_free(&str_file);
697 }
698
is_sys_include(struct strref * ref)699 static inline bool is_sys_include(struct strref *ref)
700 {
701 return ref->len >= 2 && ref->array[0] == '<' &&
702 ref->array[ref->len - 1] == '>';
703 }
704
is_loc_include(struct strref * ref)705 static inline bool is_loc_include(struct strref *ref)
706 {
707 return ref->len >= 2 && ref->array[0] == '"' &&
708 ref->array[ref->len - 1] == '"';
709 }
710
cf_preprocess_include(struct cf_preprocessor * pp,struct cf_token ** p_cur_token)711 static void cf_preprocess_include(struct cf_preprocessor *pp,
712 struct cf_token **p_cur_token)
713 {
714 struct cf_token *cur_token = *p_cur_token;
715
716 if (pp->ignore_state) {
717 go_to_newline(p_cur_token);
718 return;
719 }
720
721 next_token(&cur_token, true);
722
723 if (cur_token->type != CFTOKEN_STRING) {
724 cf_adderror_expecting(pp, cur_token, "string");
725 go_to_newline(&cur_token);
726 goto exit;
727 }
728
729 if (is_sys_include(&cur_token->str)) {
730 /* TODO */
731 } else if (is_loc_include(&cur_token->str)) {
732 if (!pp->ignore_state)
733 cf_include_file(pp, cur_token);
734 } else {
735 cf_adderror(pp, cur_token, "Invalid or incomplete string", NULL,
736 NULL, NULL);
737 go_to_newline(&cur_token);
738 goto exit;
739 }
740
741 cur_token++;
742
743 exit:
744 *p_cur_token = cur_token;
745 }
746
cf_preprocess_macro_params(struct cf_preprocessor * pp,struct cf_def * def,struct cf_token ** p_cur_token)747 static bool cf_preprocess_macro_params(struct cf_preprocessor *pp,
748 struct cf_def *def,
749 struct cf_token **p_cur_token)
750 {
751 struct cf_token *cur_token = *p_cur_token;
752 bool success = false;
753 def->macro = true;
754
755 do {
756 next_token(&cur_token, true);
757 if (cur_token->type != CFTOKEN_NAME) {
758 cf_adderror_expecting(pp, cur_token, "identifier");
759 go_to_newline(&cur_token);
760 goto exit;
761 }
762
763 cf_def_addparam(def, cur_token);
764
765 next_token(&cur_token, true);
766 if (cur_token->type != CFTOKEN_OTHER ||
767 (*cur_token->str.array != ',' &&
768 *cur_token->str.array != ')')) {
769
770 cf_adderror_expecting(pp, cur_token, "',' or ')'");
771 go_to_newline(&cur_token);
772 goto exit;
773 }
774 } while (*cur_token->str.array != ')');
775
776 /* ended properly, now go to first define token (or newline) */
777 next_token(&cur_token, true);
778 success = true;
779
780 exit:
781 *p_cur_token = cur_token;
782 return success;
783 }
784
785 #define INVALID_INDEX ((size_t)-1)
786
cf_preprocess_get_def_idx(struct cf_preprocessor * pp,const struct strref * def_name)787 static inline size_t cf_preprocess_get_def_idx(struct cf_preprocessor *pp,
788 const struct strref *def_name)
789 {
790 struct cf_def *array = pp->defines.array;
791 size_t i;
792
793 for (i = 0; i < pp->defines.num; i++) {
794 struct cf_def *cur_def = array + i;
795
796 if (strref_cmp_strref(&cur_def->name.str, def_name) == 0)
797 return i;
798 }
799
800 return INVALID_INDEX;
801 }
802
803 static inline struct cf_def *
cf_preprocess_get_def(struct cf_preprocessor * pp,const struct strref * def_name)804 cf_preprocess_get_def(struct cf_preprocessor *pp, const struct strref *def_name)
805 {
806 size_t idx = cf_preprocess_get_def_idx(pp, def_name);
807 if (idx == INVALID_INDEX)
808 return NULL;
809
810 return pp->defines.array + idx;
811 }
812
813 static char space_filler[2] = " ";
814
append_space(struct cf_preprocessor * pp,struct darray * tokens,const struct cf_token * base)815 static inline void append_space(struct cf_preprocessor *pp,
816 struct darray *tokens,
817 const struct cf_token *base)
818 {
819 struct cf_token token;
820
821 strref_set(&token.str, space_filler, 1);
822 token.type = CFTOKEN_SPACETAB;
823 if (base) {
824 token.lex = base->lex;
825 strref_copy(&token.unmerged_str, &base->unmerged_str);
826 } else {
827 token.lex = pp->lex;
828 strref_copy(&token.unmerged_str, &token.str);
829 }
830
831 darray_push_back(sizeof(struct cf_token), tokens, &token);
832 }
833
append_end_token(struct darray * tokens)834 static inline void append_end_token(struct darray *tokens)
835 {
836 struct cf_token end;
837 cf_token_clear(&end);
838 darray_push_back(sizeof(struct cf_token), tokens, &end);
839 }
840
cf_preprocess_define(struct cf_preprocessor * pp,struct cf_token ** p_cur_token)841 static void cf_preprocess_define(struct cf_preprocessor *pp,
842 struct cf_token **p_cur_token)
843 {
844 struct cf_token *cur_token = *p_cur_token;
845 struct cf_def def;
846
847 if (pp->ignore_state) {
848 go_to_newline(p_cur_token);
849 return;
850 }
851
852 cf_def_init(&def);
853
854 next_token(&cur_token, true);
855 if (cur_token->type != CFTOKEN_NAME) {
856 cf_adderror_expecting(pp, cur_token, "identifier");
857 go_to_newline(&cur_token);
858 goto exit;
859 }
860
861 append_space(pp, &def.tokens.da, NULL);
862 cf_token_copy(&def.name, cur_token);
863
864 if (!next_token(&cur_token, true))
865 goto complete;
866
867 /* process macro */
868 if (*cur_token->str.array == '(') {
869 if (!cf_preprocess_macro_params(pp, &def, &cur_token))
870 goto error;
871 }
872
873 while (cur_token->type != CFTOKEN_NEWLINE &&
874 cur_token->type != CFTOKEN_NONE)
875 cf_def_addtoken(&def, cur_token++);
876
877 complete:
878 append_end_token(&def.tokens.da);
879 append_space(pp, &def.tokens.da, NULL);
880 da_push_back(pp->defines, &def);
881 goto exit;
882
883 error:
884 cf_def_free(&def);
885
886 exit:
887 *p_cur_token = cur_token;
888 }
889
cf_preprocess_remove_def_strref(struct cf_preprocessor * pp,const struct strref * ref)890 static inline void cf_preprocess_remove_def_strref(struct cf_preprocessor *pp,
891 const struct strref *ref)
892 {
893 size_t def_idx = cf_preprocess_get_def_idx(pp, ref);
894 if (def_idx != INVALID_INDEX) {
895 struct cf_def *array = pp->defines.array;
896 cf_def_free(array + def_idx);
897 da_erase(pp->defines, def_idx);
898 }
899 }
900
cf_preprocess_undef(struct cf_preprocessor * pp,struct cf_token ** p_cur_token)901 static void cf_preprocess_undef(struct cf_preprocessor *pp,
902 struct cf_token **p_cur_token)
903 {
904 struct cf_token *cur_token = *p_cur_token;
905
906 if (pp->ignore_state) {
907 go_to_newline(p_cur_token);
908 return;
909 }
910
911 next_token(&cur_token, true);
912 if (cur_token->type != CFTOKEN_NAME) {
913 cf_adderror_expecting(pp, cur_token, "identifier");
914 go_to_newline(&cur_token);
915 goto exit;
916 }
917
918 cf_preprocess_remove_def_strref(pp, &cur_token->str);
919 cur_token++;
920
921 exit:
922 *p_cur_token = cur_token;
923 }
924
925 /* Processes an #ifdef/#ifndef/#if/#else/#elif sub block recursively */
cf_preprocess_subblock(struct cf_preprocessor * pp,bool ignore,struct cf_token ** p_cur_token)926 static inline bool cf_preprocess_subblock(struct cf_preprocessor *pp,
927 bool ignore,
928 struct cf_token **p_cur_token)
929 {
930 bool eof;
931
932 if (!next_token(p_cur_token, true))
933 return false;
934
935 if (!pp->ignore_state) {
936 pp->ignore_state = ignore;
937 cf_preprocess_tokens(pp, true, p_cur_token);
938 pp->ignore_state = false;
939 } else {
940 cf_preprocess_tokens(pp, true, p_cur_token);
941 }
942
943 eof = ((*p_cur_token)->type == CFTOKEN_NONE);
944 if (eof)
945 cf_adderror_unexpected_endif_eof(pp, *p_cur_token);
946 return !eof;
947 }
948
cf_preprocess_ifdef(struct cf_preprocessor * pp,bool ifnot,struct cf_token ** p_cur_token)949 static void cf_preprocess_ifdef(struct cf_preprocessor *pp, bool ifnot,
950 struct cf_token **p_cur_token)
951 {
952 struct cf_token *cur_token = *p_cur_token;
953 struct cf_def *def;
954 bool is_true;
955
956 next_token(&cur_token, true);
957 if (cur_token->type != CFTOKEN_NAME) {
958 cf_adderror_expecting(pp, cur_token, "identifier");
959 go_to_newline(&cur_token);
960 goto exit;
961 }
962
963 def = cf_preprocess_get_def(pp, &cur_token->str);
964 is_true = (def == NULL) == ifnot;
965
966 if (!cf_preprocess_subblock(pp, !is_true, &cur_token))
967 goto exit;
968
969 if (strref_cmp(&cur_token->str, "else") == 0) {
970 if (!cf_preprocess_subblock(pp, is_true, &cur_token))
971 goto exit;
972 /*} else if (strref_cmp(&cur_token->str, "elif") == 0) {*/
973 }
974
975 cur_token++;
976
977 exit:
978 *p_cur_token = cur_token;
979 }
980
cf_preprocessor(struct cf_preprocessor * pp,bool if_block,struct cf_token ** p_cur_token)981 static bool cf_preprocessor(struct cf_preprocessor *pp, bool if_block,
982 struct cf_token **p_cur_token)
983 {
984 struct cf_token *cur_token = *p_cur_token;
985
986 if (strref_cmp(&cur_token->str, "include") == 0) {
987 cf_preprocess_include(pp, p_cur_token);
988
989 } else if (strref_cmp(&cur_token->str, "define") == 0) {
990 cf_preprocess_define(pp, p_cur_token);
991
992 } else if (strref_cmp(&cur_token->str, "undef") == 0) {
993 cf_preprocess_undef(pp, p_cur_token);
994
995 } else if (strref_cmp(&cur_token->str, "ifdef") == 0) {
996 cf_preprocess_ifdef(pp, false, p_cur_token);
997
998 } else if (strref_cmp(&cur_token->str, "ifndef") == 0) {
999 cf_preprocess_ifdef(pp, true, p_cur_token);
1000
1001 /*} else if (strref_cmp(&cur_token->str, "if") == 0) {
1002 TODO;*/
1003 } else if (strref_cmp(&cur_token->str, "else") == 0 ||
1004 /*strref_cmp(&cur_token->str, "elif") == 0 ||*/
1005 strref_cmp(&cur_token->str, "endif") == 0) {
1006 if (!if_block) {
1007 struct dstr name;
1008 dstr_init_copy_strref(&name, &cur_token->str);
1009 cf_adderror(pp, cur_token,
1010 "#$1 outside of "
1011 "#if/#ifdef/#ifndef block",
1012 name.array, NULL, NULL);
1013 dstr_free(&name);
1014 (*p_cur_token)++;
1015
1016 return true;
1017 }
1018
1019 return false;
1020
1021 } else if (cur_token->type != CFTOKEN_NEWLINE &&
1022 cur_token->type != CFTOKEN_NONE) {
1023 /*
1024 * TODO: language-specific preprocessor stuff should be sent to
1025 * handler of some sort
1026 */
1027 (*p_cur_token)++;
1028 }
1029
1030 return true;
1031 }
1032
1033 static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
1034 struct darray *dst, /* struct cf_token */
1035 struct cf_token **p_cur_token,
1036 const struct cf_token *base,
1037 const struct macro_params *params);
1038
1039 /*
1040 * collects tokens for a macro parameter
1041 *
1042 * note that it is important to make sure that any usage of function calls
1043 * within a macro parameter is preserved, example MACRO(func(1, 2), 3), do not
1044 * let it stop on the comma at "1,"
1045 */
cf_preprocess_save_macro_param(struct cf_preprocessor * pp,struct cf_token ** p_cur_token,struct macro_param * param,const struct cf_token * base,const struct macro_params * cur_params)1046 static void cf_preprocess_save_macro_param(
1047 struct cf_preprocessor *pp, struct cf_token **p_cur_token,
1048 struct macro_param *param, const struct cf_token *base,
1049 const struct macro_params *cur_params)
1050 {
1051 struct cf_token *cur_token = *p_cur_token;
1052 int brace_count = 0;
1053
1054 append_space(pp, ¶m->tokens.da, base);
1055
1056 while (cur_token->type != CFTOKEN_NONE) {
1057 if (*cur_token->str.array == '(') {
1058 brace_count++;
1059 } else if (*cur_token->str.array == ')') {
1060 if (brace_count)
1061 brace_count--;
1062 else
1063 break;
1064 } else if (*cur_token->str.array == ',') {
1065 if (!brace_count)
1066 break;
1067 }
1068
1069 cf_preprocess_addtoken(pp, ¶m->tokens.da, &cur_token, base,
1070 cur_params);
1071 }
1072
1073 if (cur_token->type == CFTOKEN_NONE)
1074 cf_adderror_unexpected_eof(pp, cur_token);
1075
1076 append_space(pp, ¶m->tokens.da, base);
1077 append_end_token(¶m->tokens.da);
1078
1079 *p_cur_token = cur_token;
1080 }
1081
param_is_whitespace(const struct macro_param * param)1082 static inline bool param_is_whitespace(const struct macro_param *param)
1083 {
1084 struct cf_token *array = param->tokens.array;
1085 size_t i;
1086
1087 for (i = 0; i < param->tokens.num; i++)
1088 if (array[i].type != CFTOKEN_NONE &&
1089 array[i].type != CFTOKEN_SPACETAB &&
1090 array[i].type != CFTOKEN_NEWLINE)
1091 return false;
1092
1093 return true;
1094 }
1095
1096 /* collects parameter tokens of a used macro and stores them for the unwrap */
cf_preprocess_save_macro_params(struct cf_preprocessor * pp,struct cf_token ** p_cur_token,const struct cf_def * def,const struct cf_token * base,const struct macro_params * cur_params,struct macro_params * dst)1097 static void cf_preprocess_save_macro_params(
1098 struct cf_preprocessor *pp, struct cf_token **p_cur_token,
1099 const struct cf_def *def, const struct cf_token *base,
1100 const struct macro_params *cur_params, struct macro_params *dst)
1101 {
1102 struct cf_token *cur_token = *p_cur_token;
1103 size_t count = 0;
1104
1105 next_token(&cur_token, false);
1106 if (cur_token->type != CFTOKEN_OTHER || *cur_token->str.array != '(') {
1107 cf_adderror_expecting(pp, cur_token, "'('");
1108 goto exit;
1109 }
1110
1111 do {
1112 struct macro_param param;
1113 macro_param_init(¶m);
1114 cur_token++;
1115 count++;
1116
1117 cf_preprocess_save_macro_param(pp, &cur_token, ¶m, base,
1118 cur_params);
1119 if (cur_token->type != CFTOKEN_OTHER ||
1120 (*cur_token->str.array != ',' &&
1121 *cur_token->str.array != ')')) {
1122
1123 macro_param_free(¶m);
1124 cf_adderror_expecting(pp, cur_token, "',' or ')'");
1125 goto exit;
1126 }
1127
1128 if (param_is_whitespace(¶m)) {
1129 /* if 0-param macro, ignore first entry */
1130 if (count == 1 && !def->params.num &&
1131 *cur_token->str.array == ')') {
1132 macro_param_free(¶m);
1133 break;
1134 }
1135 }
1136
1137 if (count <= def->params.num) {
1138 cf_token_copy(¶m.name,
1139 cf_def_getparam(def, count - 1));
1140 da_push_back(dst->params, ¶m);
1141 } else {
1142 macro_param_free(¶m);
1143 }
1144 } while (*cur_token->str.array != ')');
1145
1146 if (count != def->params.num)
1147 cf_adderror(pp, cur_token,
1148 "Mismatching number of macro parameters", NULL,
1149 NULL, NULL);
1150
1151 exit:
1152 *p_cur_token = cur_token;
1153 }
1154
cf_preprocess_unwrap_param(struct cf_preprocessor * pp,struct darray * dst,struct cf_token ** p_cur_token,const struct cf_token * base,const struct macro_param * param)1155 static inline void cf_preprocess_unwrap_param(
1156 struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */
1157 struct cf_token **p_cur_token, const struct cf_token *base,
1158 const struct macro_param *param)
1159 {
1160 struct cf_token *cur_token = *p_cur_token;
1161 struct cf_token *cur_param_token = param->tokens.array;
1162
1163 while (cur_param_token->type != CFTOKEN_NONE)
1164 cf_preprocess_addtoken(pp, dst, &cur_param_token, base, NULL);
1165
1166 cur_token++;
1167 *p_cur_token = cur_token;
1168 }
1169
cf_preprocess_unwrap_define(struct cf_preprocessor * pp,struct darray * dst,struct cf_token ** p_cur_token,const struct cf_token * base,const struct cf_def * def,const struct macro_params * cur_params)1170 static inline void cf_preprocess_unwrap_define(
1171 struct cf_preprocessor *pp, struct darray *dst, /* struct cf_token */
1172 struct cf_token **p_cur_token, const struct cf_token *base,
1173 const struct cf_def *def, const struct macro_params *cur_params)
1174 {
1175 struct cf_token *cur_token = *p_cur_token;
1176 struct macro_params new_params;
1177 struct cf_token *cur_def_token = def->tokens.array;
1178
1179 macro_params_init(&new_params);
1180
1181 if (def->macro)
1182 cf_preprocess_save_macro_params(pp, &cur_token, def, base,
1183 cur_params, &new_params);
1184
1185 while (cur_def_token->type != CFTOKEN_NONE)
1186 cf_preprocess_addtoken(pp, dst, &cur_def_token, base,
1187 &new_params);
1188
1189 macro_params_free(&new_params);
1190
1191 cur_token++;
1192 *p_cur_token = cur_token;
1193 }
1194
cf_preprocess_addtoken(struct cf_preprocessor * pp,struct darray * dst,struct cf_token ** p_cur_token,const struct cf_token * base,const struct macro_params * params)1195 static void cf_preprocess_addtoken(struct cf_preprocessor *pp,
1196 struct darray *dst, /* struct cf_token */
1197 struct cf_token **p_cur_token,
1198 const struct cf_token *base,
1199 const struct macro_params *params)
1200 {
1201 struct cf_token *cur_token = *p_cur_token;
1202
1203 if (pp->ignore_state)
1204 goto ignore;
1205
1206 if (!base)
1207 base = cur_token;
1208
1209 if (cur_token->type == CFTOKEN_NAME) {
1210 struct cf_def *def;
1211 struct macro_param *param;
1212
1213 param = get_macro_param(params, &cur_token->str);
1214 if (param) {
1215 cf_preprocess_unwrap_param(pp, dst, &cur_token, base,
1216 param);
1217 goto exit;
1218 }
1219
1220 def = cf_preprocess_get_def(pp, &cur_token->str);
1221 if (def) {
1222 cf_preprocess_unwrap_define(pp, dst, &cur_token, base,
1223 def, params);
1224 goto exit;
1225 }
1226 }
1227
1228 darray_push_back(sizeof(struct cf_token), dst, cur_token);
1229
1230 ignore:
1231 cur_token++;
1232
1233 exit:
1234 *p_cur_token = cur_token;
1235 }
1236
cf_preprocess_tokens(struct cf_preprocessor * pp,bool if_block,struct cf_token ** p_cur_token)1237 static void cf_preprocess_tokens(struct cf_preprocessor *pp, bool if_block,
1238 struct cf_token **p_cur_token)
1239 {
1240 bool newline = true;
1241 bool preprocessor_line = if_block;
1242 struct cf_token *cur_token = *p_cur_token;
1243
1244 while (cur_token->type != CFTOKEN_NONE) {
1245 if (cur_token->type != CFTOKEN_SPACETAB &&
1246 cur_token->type != CFTOKEN_NEWLINE) {
1247 if (preprocessor_line) {
1248 cf_adderror_expected_newline(pp, cur_token);
1249 if (!go_to_newline(&cur_token))
1250 break;
1251 }
1252
1253 if (newline && *cur_token->str.array == '#') {
1254 next_token(&cur_token, true);
1255 preprocessor_line = true;
1256 if (!cf_preprocessor(pp, if_block, &cur_token))
1257 break;
1258
1259 continue;
1260 }
1261
1262 newline = false;
1263 }
1264
1265 if (cur_token->type == CFTOKEN_NEWLINE) {
1266 newline = true;
1267 preprocessor_line = false;
1268 } else if (cur_token->type == CFTOKEN_NONE) {
1269 break;
1270 }
1271
1272 cf_preprocess_addtoken(pp, &pp->tokens.da, &cur_token, NULL,
1273 NULL);
1274 }
1275
1276 *p_cur_token = cur_token;
1277 }
1278
cf_preprocessor_init(struct cf_preprocessor * pp)1279 void cf_preprocessor_init(struct cf_preprocessor *pp)
1280 {
1281 da_init(pp->defines);
1282 da_init(pp->sys_include_dirs);
1283 da_init(pp->dependencies);
1284 da_init(pp->tokens);
1285 pp->lex = NULL;
1286 pp->ed = NULL;
1287 pp->ignore_state = false;
1288 }
1289
cf_preprocessor_free(struct cf_preprocessor * pp)1290 void cf_preprocessor_free(struct cf_preprocessor *pp)
1291 {
1292 struct cf_lexer *dependencies = pp->dependencies.array;
1293 char **sys_include_dirs = pp->sys_include_dirs.array;
1294 struct cf_def *defs = pp->defines.array;
1295 size_t i;
1296
1297 for (i = 0; i < pp->defines.num; i++)
1298 cf_def_free(defs + i);
1299 for (i = 0; i < pp->sys_include_dirs.num; i++)
1300 bfree(sys_include_dirs[i]);
1301 for (i = 0; i < pp->dependencies.num; i++)
1302 cf_lexer_free(dependencies + i);
1303
1304 da_free(pp->defines);
1305 da_free(pp->sys_include_dirs);
1306 da_free(pp->dependencies);
1307 da_free(pp->tokens);
1308
1309 pp->lex = NULL;
1310 pp->ed = NULL;
1311 pp->ignore_state = false;
1312 }
1313
cf_preprocess(struct cf_preprocessor * pp,struct cf_lexer * lex,struct error_data * ed)1314 bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
1315 struct error_data *ed)
1316 {
1317 struct cf_token *token = cf_lexer_get_tokens(lex);
1318 if (!token)
1319 return false;
1320
1321 pp->ed = ed;
1322 pp->lex = lex;
1323 cf_preprocess_tokens(pp, false, &token);
1324 da_push_back(pp->tokens, token);
1325
1326 return !lex->unexpected_eof;
1327 }
1328
cf_preprocessor_add_def(struct cf_preprocessor * pp,struct cf_def * def)1329 void cf_preprocessor_add_def(struct cf_preprocessor *pp, struct cf_def *def)
1330 {
1331 struct cf_def *existing = cf_preprocess_get_def(pp, &def->name.str);
1332
1333 if (existing) {
1334 struct dstr name;
1335 dstr_init_copy_strref(&name, &def->name.str);
1336 cf_addwarning(pp, &def->name, "Token $1 already defined",
1337 name.array, NULL, NULL);
1338 cf_addwarning(pp, &existing->name,
1339 "Previous definition of $1 is here", name.array,
1340 NULL, NULL);
1341
1342 cf_def_free(existing);
1343 memcpy(existing, def, sizeof(struct cf_def));
1344 } else {
1345 da_push_back(pp->defines, def);
1346 }
1347 }
1348
cf_preprocessor_remove_def(struct cf_preprocessor * pp,const char * def_name)1349 void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
1350 const char *def_name)
1351 {
1352 struct strref ref;
1353 ref.array = def_name;
1354 ref.len = strlen(def_name);
1355 cf_preprocess_remove_def_strref(pp, &ref);
1356 }
1357