1 /* Small compiler - Staging buffer and optimizer
2 *
3 * The staging buffer
4 * ------------------
5 * The staging buffer allows buffered output of generated code, deletion
6 * of redundant code, optimization by a tinkering process and reversing
7 * the ouput of evaluated expressions (which is used for the reversed
8 * evaluation of arguments in functions).
9 * Initially, stgwrite() writes to the file directly, but after a call to
10 * stgset(TRUE), output is redirected to the buffer. After a call to
11 * stgset(FALSE), stgwrite()'s output is directed to the file again. Thus
12 * only one routine is used for writing to the output, which can be
13 * buffered output or direct output.
14 *
15 * staging buffer variables: stgbuf - the buffer
16 * stgidx - current index in the staging buffer
17 * staging - if true, write to the staging buffer;
18 * if false, write to file directly.
19 *
20 * Copyright (c) ITB CompuPhase, 1997-2003
21 *
22 * This software is provided "as-is", without any express or implied warranty.
23 * In no event will the authors be held liable for any damages arising from
24 * the use of this software.
25 *
26 * Permission is granted to anyone to use this software for any purpose,
27 * including commercial applications, and to alter it and redistribute it
28 * freely, subject to the following restrictions:
29 *
30 * 1. The origin of this software must not be misrepresented; you must not
31 * claim that you wrote the original software. If you use this software in
32 * a product, an acknowledgment in the product documentation would be
33 * appreciated but is not required.
34 * 2. Altered source versions must be plainly marked as such, and must not be
35 * misrepresented as being the original software.
36 * 3. This notice may not be removed or altered from any source distribution.
37 *
38 * Version: $Id$
39 */
40
41
42 #ifdef HAVE_CONFIG_H
43 # include <config.h>
44 #endif
45
46 #include <assert.h>
47 #include <stdio.h>
48 #include <stdlib.h> /* for atoi() */
49 #include <string.h>
50 #include <ctype.h>
51
52 #include "embryo_cc_sc.h"
53
54 #include "embryo_cc_sc7.scp"
55
56 static void stgstring(char *start, char *end);
57 static void stgopt(char *start, char *end);
58
59 #define sSTG_GROW 512
60 #define sSTG_MAX 20480
61
62 static char *stgbuf = NULL;
63 static int stgmax = 0; /* current size of the staging buffer */
64
65 #define CHECK_STGBUFFER(index) if ((int)(index)>=stgmax) grow_stgbuffer((index)+1)
66
67 static void
grow_stgbuffer(int requiredsize)68 grow_stgbuffer(int requiredsize)
69 {
70 char *p;
71 int clear = !stgbuf; /* if previously none, empty buffer explicitly */
72
73 assert(stgmax < requiredsize);
74 /* if the staging buffer (holding intermediate code for one line) grows
75 * over a few kBytes, there is probably a run-away expression
76 */
77 if (requiredsize > sSTG_MAX)
78 error(102, "staging buffer"); /* staging buffer overflow (fatal error) */
79 stgmax = requiredsize + sSTG_GROW;
80 if (stgbuf)
81 p = (char *)realloc(stgbuf, stgmax * sizeof(char));
82 else
83 p = (char *)malloc(stgmax * sizeof(char));
84 if (!p)
85 error(102, "staging buffer"); /* staging buffer overflow (fatal error) */
86 stgbuf = p;
87 if (clear)
88 *stgbuf = '\0';
89 }
90
91 void
stgbuffer_cleanup(void)92 stgbuffer_cleanup(void)
93 {
94 if (stgbuf)
95 {
96 free(stgbuf);
97 stgbuf = NULL;
98 stgmax = 0;
99 } /* if */
100 }
101
102 /* the variables "stgidx" and "staging" are declared in "scvars.c" */
103
104 /* stgmark
105 *
106 * Copies a mark into the staging buffer. At this moment there are three
107 * possible marks:
108 * sSTARTREORDER identifies the beginning of a series of expression
109 * strings that must be written to the output file in
110 * reordered order
111 * sENDREORDER identifies the end of 'reverse evaluation'
112 * sEXPRSTART + idx only valid within a block that is evaluated in
113 * reordered order, it identifies the start of an
114 * expression; the "idx" value is the argument position
115 *
116 * Global references: stgidx (altered)
117 * stgbuf (altered)
118 * staging (referred to only)
119 */
120 void
stgmark(char mark)121 stgmark(char mark)
122 {
123 if (staging)
124 {
125 CHECK_STGBUFFER(stgidx);
126 stgbuf[stgidx++] = mark;
127 } /* if */
128 }
129
130 static int
filewrite(char * str)131 filewrite(char *str)
132 {
133 if (sc_status == statWRITE)
134 return sc_writeasm(outf, str);
135 return TRUE;
136 }
137
138 /* stgwrite
139 *
140 * Writes the string "st" to the staging buffer or to the output file. In the
141 * case of writing to the staging buffer, the terminating byte of zero is
142 * copied too, but... the optimizer can only work on complete lines (not on
143 * fractions of it. Therefore if the string is staged, if the last character
144 * written to the buffer is a '\0' and the previous-to-last is not a '\n',
145 * the string is concatenated to the last string in the buffer (the '\0' is
146 * overwritten). This also means an '\n' used in the middle of a string isn't
147 * recognized and could give wrong results with the optimizer.
148 * Even when writing to the output file directly, all strings are buffered
149 * until a whole line is complete.
150 *
151 * Global references: stgidx (altered)
152 * stgbuf (altered)
153 * staging (referred to only)
154 */
155 void
stgwrite(char * st)156 stgwrite(char *st)
157 {
158 int len;
159
160 CHECK_STGBUFFER(0);
161 if (staging)
162 {
163 if (stgidx >= 2 && stgbuf[stgidx - 1] == '\0'
164 && stgbuf[stgidx - 2] != '\n')
165 stgidx -= 1; /* overwrite last '\0' */
166 while (*st != '\0')
167 { /* copy to staging buffer */
168 CHECK_STGBUFFER(stgidx);
169 stgbuf[stgidx++] = *st++;
170 } /* while */
171 CHECK_STGBUFFER(stgidx);
172 stgbuf[stgidx++] = '\0';
173 }
174 else
175 {
176 CHECK_STGBUFFER(strlen(stgbuf) + strlen(st) + 1);
177 strcat(stgbuf, st);
178 len = strlen(stgbuf);
179 if (len > 0 && stgbuf[len - 1] == '\n')
180 {
181 filewrite(stgbuf);
182 stgbuf[0] = '\0';
183 } /* if */
184 } /* if */
185 }
186
187 /* stgout
188 *
189 * Writes the staging buffer to the output file via stgstring() (for
190 * reversing expressions in the buffer) and stgopt() (for optimizing). It
191 * resets "stgidx".
192 *
193 * Global references: stgidx (altered)
194 * stgbuf (referred to only)
195 * staging (referred to only)
196 */
197 void
stgout(int idx)198 stgout(int idx)
199 {
200 if (!staging)
201 return;
202 stgstring(&stgbuf[idx], &stgbuf[stgidx]);
203 stgidx = idx;
204 }
205
206 typedef struct
207 {
208 char *start, *end;
209 } argstack;
210
211 /* stgstring
212 *
213 * Analyses whether code strings should be output to the file as they appear
214 * in the staging buffer or whether portions of it should be re-ordered.
215 * Re-ordering takes place in function argument lists; Small passes arguments
216 * to functions from right to left. When arguments are "named" rather than
217 * positional, the order in the source stream is indeterminate.
218 * This function calls itself recursively in case it needs to re-order code
219 * strings, and it uses a private stack (or list) to mark the start and the
220 * end of expressions in their correct (reversed) order.
221 * In any case, stgstring() sends a block as large as possible to the
222 * optimizer stgopt().
223 *
224 * In "reorder" mode, each set of code strings must start with the token
225 * sEXPRSTART, even the first. If the token sSTARTREORDER is represented
226 * by '[', sENDREORDER by ']' and sEXPRSTART by '|' the following applies:
227 * '[]...' valid, but useless; no output
228 * '[|...] valid, but useless; only one string
229 * '[|...|...] valid and useful
230 * '[...|...] invalid, first string doesn't start with '|'
231 * '[|...|] invalid
232 */
233 static void
stgstring(char * start,char * end)234 stgstring(char *start, char *end)
235 {
236 char *ptr;
237 int nest, argc, arg;
238 argstack *stack;
239
240 while (start < end)
241 {
242 if (*start == sSTARTREORDER)
243 {
244 start += 1; /* skip token */
245 /* allocate a argstack with sMAXARGS items */
246 stack = (argstack *) malloc(sMAXARGS * sizeof(argstack));
247 if (!stack)
248 error(103); /* insufficient memory */
249 nest = 1; /* nesting counter */
250 argc = 0; /* argument counter */
251 arg = -1; /* argument index; no valid argument yet */
252 do
253 {
254 switch (*start)
255 {
256 case sSTARTREORDER:
257 nest++;
258 start++;
259 break;
260 case sENDREORDER:
261 nest--;
262 start++;
263 break;
264 default:
265 if ((*start & sEXPRSTART) == sEXPRSTART)
266 {
267 if (nest == 1)
268 {
269 if (arg >= 0)
270 stack[arg].end = start - 1; /* finish previous argument */
271 arg = (unsigned char)*start - sEXPRSTART;
272 stack[arg].start = start + 1;
273 if (arg >= argc)
274 argc = arg + 1;
275 } /* if */
276 start++;
277 }
278 else
279 {
280 start += strlen(start) + 1;
281 } /* if */
282 } /* switch */
283 }
284 while (nest); /* enddo */
285 if (arg >= 0)
286 stack[arg].end = start - 1; /* finish previous argument */
287 while (argc > 0)
288 {
289 argc--;
290 stgstring(stack[argc].start, stack[argc].end);
291 } /* while */
292 free(stack);
293 }
294 else
295 {
296 ptr = start;
297 while (ptr < end && *ptr != sSTARTREORDER)
298 ptr += strlen(ptr) + 1;
299 stgopt(start, ptr);
300 start = ptr;
301 } /* if */
302 } /* while */
303 }
304
305 /* stgdel
306 *
307 * Scraps code from the staging buffer by resetting "stgidx" to "index".
308 *
309 * Global references: stgidx (altered)
310 * staging (referred to only)
311 */
312 void
stgdel(int idx,cell code_index)313 stgdel(int idx, cell code_index)
314 {
315 if (staging)
316 {
317 stgidx = idx;
318 code_idx = code_index;
319 } /* if */
320 }
321
322 int
stgget(int * idx,cell * code_index)323 stgget(int *idx, cell * code_index)
324 {
325 if (staging)
326 {
327 *idx = stgidx;
328 *code_index = code_idx;
329 } /* if */
330 return staging;
331 }
332
333 /* stgset
334 *
335 * Sets staging on or off. If it's turned off, the staging buffer must be
336 * initialized to an empty string. If it's turned on, the routine makes sure
337 * the index ("stgidx") is set to 0 (it should already be 0).
338 *
339 * Global references: staging (altered)
340 * stgidx (altered)
341 * stgbuf (contents altered)
342 */
343 void
stgset(int onoff)344 stgset(int onoff)
345 {
346 staging = onoff;
347 if (staging)
348 {
349 assert(stgidx == 0);
350 stgidx = 0;
351 CHECK_STGBUFFER(stgidx);
352 /* write any contents that may be put in the buffer by stgwrite()
353 * when "staging" was 0
354 */
355 if (stgbuf[0] != '\0')
356 filewrite(stgbuf);
357 } /* if */
358 stgbuf[0] = '\0';
359 }
360
361 /* phopt_init
362 * Initialize all sequence strings of the peehole optimizer. The strings
363 * are embedded in the .EXE file in compressed format, here we expand
364 * them (and allocate memory for the sequences).
365 */
366 static SEQUENCE *sequences;
367
368 int
phopt_init(void)369 phopt_init(void)
370 {
371 int number, i, len;
372 char str[160];
373
374 /* count number of sequences */
375 for (number = 0; sequences_cmp[number].find; number++)
376 /* nothing */ ;
377 number++; /* include an item for the NULL terminator */
378
379 if (!(sequences = (SEQUENCE *)malloc(number * sizeof(SEQUENCE))))
380 return FALSE;
381
382 /* pre-initialize all to NULL (in case of failure) */
383 for (i = 0; i < number; i++)
384 {
385 sequences[i].find = NULL;
386 sequences[i].replace = NULL;
387 sequences[i].savesize = 0;
388 } /* for */
389
390 /* expand all strings */
391 for (i = 0; i < number - 1; i++)
392 {
393 len =
394 strexpand(str, (unsigned char *)sequences_cmp[i].find, sizeof str,
395 SCPACK_TABLE);
396 assert(len <= (int)(sizeof(str)));
397 assert(len == (int)(strlen(str) + 1));
398 sequences[i].find = (char *)malloc(len);
399 if (sequences[i].find)
400 strcpy(sequences[i].find, str);
401 len =
402 strexpand(str, (unsigned char *)sequences_cmp[i].replace, sizeof str,
403 SCPACK_TABLE);
404 assert(len <= (int)(sizeof(str)));
405 assert(len == (int)(strlen(str) + 1));
406 sequences[i].replace = (char *)malloc(len);
407 if (sequences[i].replace)
408 strcpy(sequences[i].replace, str);
409 sequences[i].savesize = sequences_cmp[i].savesize;
410 if (!sequences[i].find || !sequences[i].replace)
411 return phopt_cleanup();
412 } /* for */
413
414 return TRUE;
415 }
416
417 int
phopt_cleanup(void)418 phopt_cleanup(void)
419 {
420 int i;
421
422 if (sequences)
423 {
424 i = 0;
425 while (sequences[i].find || sequences[i].replace)
426 {
427 if (sequences[i].find)
428 free(sequences[i].find);
429 if (sequences[i].replace)
430 free(sequences[i].replace);
431 i++;
432 } /* while */
433 free(sequences);
434 sequences = NULL;
435 } /* if */
436 return FALSE;
437 }
438
439 #define _maxoptvars 4
440 #define _aliasmax 10 /* a 32-bit number can be represented in
441 * 9 decimal digits */
442
443 static int
matchsequence(char * start,char * end,char * pattern,char symbols[_maxoptvars][_aliasmax+1],int * match_length)444 matchsequence(char *start, char *end, char *pattern,
445 char symbols[_maxoptvars][_aliasmax + 1], int *match_length)
446 {
447 int var, i;
448 char str[_aliasmax + 2];
449 char *start_org = start;
450
451 *match_length = 0;
452 for (var = 0; var < _maxoptvars; var++)
453 symbols[var][0] = '\0';
454
455 while (*start == '\t' || *start == ' ')
456 start++;
457 while (*pattern)
458 {
459 if (start >= end)
460 return FALSE;
461 switch (*pattern)
462 {
463 case '%': /* new "symbol" */
464 pattern++;
465 assert(sc_isdigit(*pattern));
466 var = atoi(pattern) - 1;
467 assert(var >= 0 && var < _maxoptvars);
468 assert(alphanum(*start));
469 for (i = 0; start < end && alphanum(*start); i++, start++)
470 {
471 assert(i <= _aliasmax);
472 str[i] = *start;
473 } /* for */
474 str[i] = '\0';
475 if (symbols[var][0] != '\0')
476 {
477 if (strcmp(symbols[var], str) != 0)
478 return FALSE; /* symbols should be identical */
479 }
480 else
481 {
482 strcpy(symbols[var], str);
483 } /* if */
484 break;
485 case ' ':
486 if (*start != '\t' && *start != ' ')
487 return FALSE;
488 while ((start < end && *start == '\t') || *start == ' ')
489 start++;
490 break;
491 case '!':
492 while ((start < end && *start == '\t') || *start == ' ')
493 start++; /* skip trailing white space */
494 if (*start != '\n')
495 return FALSE;
496 assert(*(start + 1) == '\0');
497 start += 2; /* skip '\n' and '\0' */
498 if (*(pattern + 1) != '\0')
499 while ((start < end && *start == '\t') || *start == ' ')
500 start++; /* skip leading white space of next instruction */
501 break;
502 default:
503 if (tolower(*start) != tolower(*pattern))
504 return FALSE;
505 start++;
506 } /* switch */
507 pattern++;
508 } /* while */
509
510 *match_length = (int)(start - start_org);
511 return TRUE;
512 }
513
514 static char *
replacesequence(char * pattern,char symbols[_maxoptvars][_aliasmax+1],int * repl_length)515 replacesequence(char *pattern, char symbols[_maxoptvars][_aliasmax + 1],
516 int *repl_length)
517 {
518 char *sptr;
519 int var;
520 char *buffer;
521
522 /* calculate the length of the new buffer
523 * this is the length of the pattern plus the length of all symbols (note
524 * that the same symbol may occur multiple times in the pattern) plus
525 * line endings and startings ('\t' to start a line and '\n\0' to end one)
526 */
527 assert(repl_length != NULL);
528 *repl_length = 0;
529 sptr = pattern;
530 while (*sptr)
531 {
532 switch (*sptr)
533 {
534 case '%':
535 sptr++; /* skip '%' */
536 assert(sc_isdigit(*sptr));
537 var = atoi(sptr) - 1;
538 assert(var >= 0 && var < _maxoptvars);
539 assert(symbols[var][0] != '\0'); /* variable should be defined */
540 *repl_length += strlen(symbols[var]);
541 break;
542 case '!':
543 *repl_length += 3; /* '\t', '\n' & '\0' */
544 break;
545 default:
546 *repl_length += 1;
547 } /* switch */
548 sptr++;
549 } /* while */
550
551 /* allocate a buffer to replace the sequence in */
552 if (!(buffer = malloc(*repl_length)))
553 {
554 error(103);
555 return NULL;
556 }
557
558 /* replace the pattern into this temporary buffer */
559 sptr = buffer;
560 *sptr++ = '\t'; /* the "replace" patterns do not have tabs */
561 while (*pattern)
562 {
563 assert((int)(sptr - buffer) < *repl_length);
564 switch (*pattern)
565 {
566 case '%':
567 /* write out the symbol */
568 pattern++;
569 assert(sc_isdigit(*pattern));
570 var = atoi(pattern) - 1;
571 assert(var >= 0 && var < _maxoptvars);
572 assert(symbols[var][0] != '\0'); /* variable should be defined */
573 strcpy(sptr, symbols[var]);
574 sptr += strlen(symbols[var]);
575 break;
576 case '!':
577 /* finish the line, optionally start the next line with an indent */
578 *sptr++ = '\n';
579 *sptr++ = '\0';
580 if (*(pattern + 1) != '\0')
581 *sptr++ = '\t';
582 break;
583 default:
584 *sptr++ = *pattern;
585 } /* switch */
586 pattern++;
587 } /* while */
588
589 assert((int)(sptr - buffer) == *repl_length);
590 return buffer;
591 }
592
593 static void
strreplace(char * dest,char * replace,int sub_length,int repl_length,int dest_length)594 strreplace(char *dest, char *replace, int sub_length, int repl_length,
595 int dest_length)
596 {
597 int offset = sub_length - repl_length;
598
599 if (offset > 0) /* delete a section */
600 memmove(dest, dest + offset, dest_length - offset);
601 else if (offset < 0) /* insert a section */
602 memmove(dest - offset, dest, dest_length);
603 memcpy(dest, replace, repl_length);
604 }
605
606 /* stgopt
607 *
608 * Optimizes the staging buffer by checking for series of instructions that
609 * can be coded more compact. The routine expects the lines in the staging
610 * buffer to be separated with '\n' and '\0' characters.
611 *
612 * The longest sequences must be checked first.
613 */
614
615 static void
stgopt(char * start,char * end)616 stgopt(char *start, char *end)
617 {
618 char symbols[_maxoptvars][_aliasmax + 1];
619 int seq, match_length, repl_length;
620
621 assert(sequences != NULL);
622 while (start < end)
623 {
624 if ((sc_debug & sNOOPTIMIZE) != 0 || sc_status != statWRITE)
625 {
626 /* do not match anything if debug-level is maximum */
627 filewrite(start);
628 }
629 else
630 {
631 seq = 0;
632 while (sequences[seq].find)
633 {
634 assert(seq >= 0);
635 if (matchsequence
636 (start, end, sequences[seq].find, symbols, &match_length))
637 {
638 char *replace =
639 replacesequence(sequences[seq].replace, symbols,
640 &repl_length);
641 /* If the replacement is bigger than the original section, we may need
642 * to "grow" the staging buffer. This is quite complex, due to the
643 * re-ordering of expressions that can also happen in the staging
644 * buffer. In addition, it should not happen: the peephole optimizer
645 * must replace sequences with *shorter* sequences, not longer ones.
646 * So, I simply forbid sequences that are longer than the ones they
647 * are meant to replace.
648 */
649 assert(match_length >= repl_length);
650 if (match_length >= repl_length)
651 {
652 strreplace(start, replace, match_length,
653 repl_length, (int)(end - start));
654 end -= match_length - repl_length;
655 free(replace);
656 code_idx -= sequences[seq].savesize;
657 seq = 0; /* restart search for matches */
658 }
659 else
660 {
661 /* actually, we should never get here (match_length<repl_length) */
662 assert(0);
663 seq++;
664 } /* if */
665 }
666 else
667 {
668 seq++;
669 } /* if */
670 } /* while */
671 assert(sequences[seq].find == NULL);
672 filewrite(start);
673 } /* if */
674 assert(start < end);
675 start += strlen(start) + 1; /* to next string */
676 } /* while (start<end) */
677 }
678
679 #undef SCPACK_TABLE
680