1 #include "adlib/lib.h"
2 #include "adlib/map.h"
3 #include "adlib/set.h"
4 #include "pplex.h"
5
6 #ifdef GCC
7 #define noinline __attribute__((noinlinex))
8 #else
9 #define noinline
10 #endif
11
12 typedef Map<Str *, Str *> Dict;
13
14 static Token space, semicolon, eof, static_token, asterisk;
15 static Str *decl_var;
16 static Str *decl_extern_var;
17 static Str *decl_static_var;
18 static StrArr *init_list;
19 static Dict *class_vars;
20 static Dict *class_types;
21 static StrSet *type_prefix_set;
22 static Dict *namespaced;
23
24 enum DeclType {
25 ExternDecl,
26 StaticDecl,
27 NormalDecl
28 };
29
30 INIT(DeclParser, {
31 GCVar(space, Token(SymWS, S(" ")));
32 GCVar(semicolon, Token(SymWS, S(";\n")));
33 GCVar(eof, Token(SymEOF, S("")));
34 GCVar(asterisk, Token(SymAst, S("*")));
35 GCVar(static_token, Token(SymIdent, S("static")));
36 GCVar(decl_var, S("__thread"));
37 GCVar(decl_extern_var, S("extern __thread"));
38 GCVar(decl_static_var, S("static __thread"));
39 GCVar(init_list, A());
40 GCVar(class_vars, new Dict());
41 GCVar(class_types, new Dict());
42 GCVar(type_prefix_set, new StrSet());
43 type_prefix_set->add(S("class"));
44 type_prefix_set->add(S("struct"));
45 type_prefix_set->add(S("typedef"));
46 // special treatment for some gfanlib variables for now.
47 GCVar(namespaced, new Dict());
48 namespaced->add(S("MVMachineIntegerOverflow"), S("gfan"));
49 namespaced->add(S("lpSolver"), S("gfan"));
50 });
51
52 struct State {
53 Int pos, marker;
54 };
55
56 class Parser : public GC {
57 public:
58 TokenList *input, *output, *prologue;
59 SourceFile *source;
60 Int pos, marker;
61 Int init_count;
Parser(SourceFile * _source)62 noinline Parser(SourceFile *_source) {
63 source = _source;
64 input = _source->tokens;
65 if (input->len() == 0 || input->last().sym != SymEOF)
66 input->add(eof);
67 output = new TokenList();
68 prologue = new TokenList();
69 pos = 0;
70 marker = 0;
71 }
c_source()72 bool c_source() {
73 return source->filename->ends_with(".c");
74 }
skip_until(Word64 syms)75 void skip_until(Word64 syms) {
76 syms |= BIT(SymEOF);
77 while (!TEST(syms, input->at(pos).sym)) {
78 pos++;
79 }
80 }
skip_while(Word64 syms)81 void skip_while(Word64 syms) {
82 while (TEST(syms, input->at(pos).sym)) {
83 pos++;
84 }
85 }
find_back_until(Word64 syms)86 noinline Int find_back_until(Word64 syms) {
87 Int p = pos;
88 while (p > marker) {
89 p--;
90 if (TEST(syms, input->at(p).sym))
91 return p;
92 }
93 return p;
94 }
safe_skip_until(Word64 syms)95 noinline void safe_skip_until(Word64 syms) {
96 // handle parentheses and brackets properly.
97 Int par_level = 0;
98 Int brkt_level = 0;
99 Int brace_level = 0;
100 for (;;) {
101 Symbol sym = input->at(pos).sym;
102 switch (sym) {
103 case SymEOF:
104 return;
105 case SymLPar:
106 par_level++;
107 break;
108 case SymRPar:
109 par_level--;
110 break;
111 case SymLBrkt:
112 brkt_level++;
113 break;
114 case SymRBrkt:
115 brkt_level--;
116 break;
117 case SymLBrace:
118 brace_level++;
119 break;
120 case SymRBrace:
121 brace_level--;
122 break;
123 default:
124 if (par_level + brkt_level + brace_level > 0)
125 break;
126 if (TEST(syms, sym)) {
127 return;
128 }
129 break;
130 }
131 advance();
132 }
133 }
skipWhiteSpace()134 void skipWhiteSpace() {
135 while (BIT(input->at(pos).sym) & SymsWS) {
136 pos++;
137 }
138 }
advance()139 void advance() {
140 if (pos < input->len())
141 pos++;
142 }
token(Int i)143 Token &token(Int i) {
144 return input->at(i);
145 }
tokenRange(Int start,Int end)146 TokenList *tokenRange(Int start, Int end) {
147 return input->subarr(start, end - start);
148 }
current()149 Token ¤t() {
150 return input->at(pos);
151 }
next()152 Token &next() {
153 return input->at(pos+1);
154 }
current_sym()155 Symbol current_sym() {
156 return input->at(pos).sym;
157 }
emit(Token token)158 void emit(Token token) {
159 output->add(token);
160 }
emit_gen(Str * s)161 void emit_gen(Str *s) {
162 output->add(Token(SymGen, s));
163 }
mark()164 void mark() {
165 marker = pos;
166 }
current_pos()167 Int current_pos() {
168 return pos;
169 }
markerPos()170 Int markerPos() {
171 return marker;
172 }
push_marked_until(Int p)173 noinline void push_marked_until(Int p) {
174 while (marker < p) {
175 output->add(input->at(marker));
176 marker++;
177 }
178 }
emit_range(Int start,Int end)179 noinline void emit_range(Int start, Int end) {
180 while (start < end) {
181 output->add(input->at(start));
182 start++;
183 }
184 }
emit_tokens(TokenList * tokens)185 void emit_tokens(TokenList *tokens) {
186 output->add(tokens);
187 }
push_marked()188 void push_marked() {
189 push_marked_until(pos);
190 }
save()191 State save() {
192 State result;
193 result.pos = pos;
194 result.marker = marker;
195 return result;
196 }
restore(State state)197 void restore(State state) {
198 pos = state.pos;
199 marker = state.marker;
200 }
201 };
202
IsLiteral(Parser * parser,Int start,Int end)203 bool IsLiteral(Parser *parser, Int start, Int end) {
204 Int op = 0;
205 Int lit = 0;
206 for (Int i = start; i < end; i++) {
207 Token &token = parser->token(i);
208 switch (token.sym) {
209 case SymWS:
210 case SymEOL:
211 case SymComment:
212 break;
213 case SymOp:
214 if (token.str->eq("-") || token.str->eq("+"))
215 op++;
216 else
217 return false;
218 break;
219 case SymLiteral:
220 lit++;
221 break;
222 default:
223 return false;
224 }
225 }
226 return lit >= 1 && op <= 1;
227 }
228
EmitDecl(Parser * parser,Str * storage_class,Int type_start,Int type_end,Int var_start,Int var_end,Int init_start,Int init_end,Int var_pos,bool is_class,bool is_toplevel,DeclType decl_type)229 void EmitDecl(Parser *parser, Str *storage_class,
230 Int type_start, Int type_end,
231 Int var_start, Int var_end,
232 Int init_start, Int init_end, Int var_pos,
233 bool is_class, bool is_toplevel, DeclType decl_type) {
234 bool is_static = (decl_type == StaticDecl);
235 bool is_extern = (decl_type == ExternDecl);
236 parser->emit(Token(SymGen, storage_class));
237 parser->emit(space);
238 parser->emit_range(type_start, type_end);
239 parser->emit(space);
240 if (is_class)
241 parser->emit(asterisk);
242 Str *var_name = parser->token(var_pos).str;
243 parser->emit_range(var_start, var_pos);
244 parser->emit(Token(SymGen, var_name));
245 parser->emit_range(var_pos+1, var_end);
246 if (is_static && !is_class && IsLiteral(parser, init_start+1, init_end)) {
247 parser->emit(space);
248 parser->emit_range(init_start, init_end);
249 parser->emit(semicolon);
250 return;
251 }
252 parser->emit(semicolon);
253 if (is_class) {
254 class_vars->add(var_name, S("(*")->add(var_name)->add(")"));
255 class_types->add(var_name, parser->token(type_start).str);
256 }
257 if ((is_class && !is_extern) || init_start >= 0) {
258 if (is_toplevel) {
259 init_list->add(var_name);
260 }
261 Token var_init = Token(SymGen, var_name->clone()->add("__INIT__"));
262 parser->emit(static_token);
263 parser->emit(space);
264 parser->emit_range(type_start, type_end);
265 parser->emit(space);
266 parser->emit_range(var_start, var_pos);
267 parser->emit(var_init);
268 parser->emit_range(var_pos+1, var_end);
269 if (init_start >= 0) {
270 parser->emit(space);
271 parser->emit_range(init_start, init_end);
272 }
273 parser->emit(semicolon);
274 if (!is_toplevel && !parser->c_source()) {
275 parser->init_count++;
276 parser->emit(Token(SymGen, S(
277 "class %s__CONSTR__ {\n"
278 " public: %s__CONSTR__() {\n"
279 " pSingular_register_init_var((void *)&%s, (void *)&%s__INIT__, sizeof(%s));\n"
280 " }\n"
281 "} %s__AUX__;\n"
282 )->replace_all(S("%s"), var_name)));
283 }
284 }
285 }
286
EmitEpilogue(Parser * parser)287 void EmitEpilogue(Parser *parser) {
288 if (parser->init_count == 0 && init_list->len() == 0 && class_vars->count() == 0)
289 return;
290 Str *modulename = parser->source->modulename;
291 TokenList *output = parser->output;
292 for (Int i = 0; i < output->len(); i++) {
293 Token &token = output->at(i);
294 if (token.sym == SymIdent && class_vars->contains(token.str)) {
295 Int j = i-1;
296 while (j >= 0 && TEST(SymsWS | BIT(SymAst), output->at(j).sym))
297 j--;
298 if (j < 0 || !type_prefix_set->contains(output->at(j).str))
299 token.str = class_vars->at(token.str);
300 }
301 }
302 if (parser->init_count == 0 && init_list->len() == 0)
303 return;
304 Str *init_part;
305 if (parser->c_source()) {
306 init_part = S("\n"
307 "void pSingular_init_var(const void *s, const void *t, long n);\n"
308 "void *pSingular_alloc_var(long n);\n"
309 "void pSingular_register_init(void (*f)());\n"
310 "static void pSingular_mod_init() {\n"
311 );
312
313 } else {
314 init_part = S("\n"
315 "extern \"C\" {\n"
316 "void pSingular_init_var(const void *s, const void *t, long n);\n"
317 "void *pSingular_alloc_var(long n);\n"
318 "void pSingular_register_init(void (*f)());\n"
319 "}\n"
320 "typedef struct {\n"
321 " void *target; void *source; long size;\n"
322 "} pSingular_var_desc;\n"
323 "static pSingular_var_desc pSingular_var_descs[%n];\n"
324 "static void pSingular_register_init_var(void *t, void *s, long n) {\n"
325 " pSingular_var_desc * p = pSingular_var_descs;\n"
326 " while (p->target) p++;\n"
327 " p->target = t; p->source = s; p->size = n;\n"
328 "}\n"
329 "static void pSingular_mod_init() {\n"
330 );
331 }
332 for (Int i = 0; i < init_list->len(); i++) {
333 Str *var_name = init_list->at(i);
334 if (class_vars->contains(var_name)) {
335 Str *type = class_types->at(var_name);
336 if (namespaced->contains(var_name)) {
337 type = namespaced->at(var_name)->clone()->add("::")->add(type);
338 var_name = namespaced->at(var_name)->clone()->add("::")->add(var_name);
339 }
340 init_part->add(S(
341 " %s = (%c *)pSingular_alloc_var((long)sizeof(%c));\n"
342 " pSingular_init_var(%s, &%s__INIT__, (long) sizeof(%s));\n"
343 )->replace_all(S("%c"), type)->replace_all(S("%s"), var_name));
344 } else {
345 init_part->add(
346 S(" pSingular_init_var((void *)&%s, (void *)&%s__INIT__, (long) sizeof(%s));\n")
347 ->replace_all(S("%s"), var_name)
348 );
349 }
350 }
351 init_part->add("}\n");
352 if (parser->init_count) {
353 parser->prologue->add(Token(SymGen,
354 S("static void pSingular_register_init_var(void *, void *, long);\n")));
355 }
356 init_part = init_part->replace_all(S("%n"), S(parser->init_count+1));
357 parser->emit(Token(SymGen, init_part));
358 Str *init_rest;
359 if (parser->c_source()) {
360 init_rest = S(
361 "__attribute__((constructor))"
362 "static void pSingular_init_%s(void) {\n"
363 " pSingular_register_init(pSingular_mod_init);\n"
364 "}\n"
365 );
366 } else {
367 init_rest = S(
368 "static struct pSingular_Init_%s {\n"
369 " pSingular_Init_%s() {\n"
370 " pSingular_register_init(pSingular_mod_init);\n"
371 " }\n"
372 "} pSingular_init_%s;\n"
373 );
374 }
375 init_rest = init_rest->replace_all(S("%s"), modulename);
376 parser->emit(Token(SymGen, init_rest));
377 }
378
TransformVarDecl(Parser * parser,Str * storage_class,bool is_class,bool is_toplevel,DeclType decl_type)379 void TransformVarDecl(Parser *parser, Str *storage_class,
380 bool is_class, bool is_toplevel, DeclType decl_type) {
381 // We rewrite: VAR type a, b = init, c;
382 // as:
383 // storage_class type a;
384 // storage_class type b;
385 // static type b__INIT__;
386 // storage_class type c;
387 // Init(b, init);
388 State saved = parser->save();
389 Int special_pos = parser->current_pos();
390 parser->current().str = storage_class; // rewrite contents
391 parser->advance(); // skip past special token
392 parser->skip_while(SymsWS);
393 Int type_start = parser->current_pos();
394 parser->advance();
395 parser->skip_while(SymIdent | SymColonColon | SymsWS);
396 Int type_end = parser->current_pos();
397 for(;;) {
398 Int var_pos = -1, var_start = -1, var_end = -1;
399 var_start = parser->current_pos();
400 parser->skip_while(SymsTypePrefix | BIT(SymWS) | BIT(SymClass));
401 if (TEST(SymsEndDecl | BIT(SymLBrkt), parser->current_sym())) {
402 // We are one symbol past the initial variable identifier.
403 var_pos = parser->find_back_until(BIT(SymIdent));
404 var_end = var_pos + 1;
405 } else {
406 // We have a function pointer declaration
407 State tmp = parser->save();
408 var_start = parser->current_pos();
409 parser->skip_until(BIT(SymIdent));
410 if (parser->current_sym() == SymIdent)
411 var_pos = parser->current_pos();
412 parser->restore(tmp);
413 parser->safe_skip_until(SymsEndDecl);
414 var_end = parser->current_pos();
415 }
416 Int init_start = -1, init_end = -1;
417 if (parser->current_sym() == SymEqual) {
418 init_start = parser->current_pos();
419 parser->advance();
420 parser->safe_skip_until(BIT(SymComma) | BIT(SymSemicolon));
421 init_end = parser->current_pos();
422 }
423 switch (parser->current_sym()) {
424 case SymComma:
425 EmitDecl(parser, storage_class,
426 type_start, type_end, var_start, var_end,
427 init_start, init_end, var_pos, is_class, is_toplevel, decl_type);
428 parser->advance();
429 break;
430 case SymSemicolon:
431 EmitDecl(parser, storage_class,
432 type_start, type_end, var_start, var_end,
433 init_start, init_end, var_pos, is_class, is_toplevel, decl_type);
434 parser->advance();
435 parser->mark();
436 return;
437 case SymEOF:
438 return; // error
439 default:
440 assert(0, "exhaustive switch hits default case");
441 return;
442 }
443 }
444 }
445
IsToplevel(Arr<int> * stack)446 bool IsToplevel(Arr<int> *stack) {
447 if (stack->len() == 0) return true;
448 for (Int i = 0; i < stack->len(); i++) {
449 if (!stack->at(i)) return false;
450 }
451 return true;
452 }
453
Transform(SourceFile * source)454 TokenList *Transform(SourceFile *source) {
455 Parser *parser = new Parser(source);
456 Arr<int> *toplevel = new Arr<int>();
457 int tl = 0;
458 while (parser->current().sym != SymEOF) {
459 parser->skip_until(SymsSpecial);
460 parser->push_marked();
461 bool is_toplevel = IsToplevel(toplevel);
462 switch (parser->current().sym) {
463 case SymVAR:
464 TransformVarDecl(parser, decl_var,
465 false, is_toplevel, NormalDecl);
466 break;
467 case SymEXTERN_VAR:
468 parser->current().str = decl_extern_var;
469 parser->advance();
470 parser->push_marked();
471 break;
472 case SymSTATIC_VAR:
473 TransformVarDecl(parser, decl_static_var,
474 false, is_toplevel, StaticDecl);
475 break;
476 case SymINST_VAR:
477 TransformVarDecl(parser, decl_var,
478 true, is_toplevel, NormalDecl);
479 break;
480 case SymEXTERN_INST_VAR:
481 TransformVarDecl(parser, decl_extern_var,
482 true, is_toplevel, ExternDecl);
483 break;
484 case SymSTATIC_INST_VAR:
485 TransformVarDecl(parser, decl_static_var,
486 true, is_toplevel, StaticDecl);
487 break;
488 case SymExtern:
489 parser->advance();
490 if (parser->current().sym != SymLiteral)
491 break;
492 if (!parser->current().str->eq("\"C\""))
493 break;
494 tl = 1;
495 break;
496 case SymNamespace:
497 tl = 1;
498 parser->advance();
499 break;
500 case SymLBrace:
501 toplevel->add(tl);
502 parser->advance();
503 break;
504 case SymRBrace:
505 if (toplevel->len() > 0)
506 toplevel->pop();
507 tl = 0;
508 parser->advance();
509 break;
510 case SymEOF:
511 break;
512 default:
513 assert(0, "exhaustive switch hits default case");
514 return NULL;
515 }
516 }
517 EmitEpilogue(parser);
518 return parser->prologue->clone()->add(parser->output);
519 }
520
TestPreProcessor(Str * filename)521 Str *TestPreProcessor(Str *filename) {
522 SourceFile *source = ReadSource(filename);
523 if (!source) {
524 return S("ERROR: File not found: ")->add(filename)->add("\n");
525 }
526 TokenList *tokens = Transform(source);
527 Str *result = new Str();
528 for (Int i = 0; i < tokens->len(); i++) {
529 result->add(tokens->at(i).str);
530 }
531 return result;
532 }
533
RunPreProcessor(Str * filename,Str * filedata)534 Str *RunPreProcessor(Str *filename, Str *filedata) {
535 SourceFile *source = ReadSource(filename, filedata);
536 TokenList *tokens = Transform(source);
537 Str *result = new Str();
538 for (Int i = 0; i < tokens->len(); i++) {
539 result->add(tokens->at(i).str);
540 }
541 return result;
542 }
543