1 /*  Small compiler - Staging buffer and optimizer
2  *
3  *  The staging buffer
4  *  ------------------
5  *  The staging buffer allows buffered output of generated code, deletion
6  *  of redundant code, optimization by a tinkering process and reversing
7  *  the ouput of evaluated expressions (which is used for the reversed
8  *  evaluation of arguments in functions).
9  *  Initially, stgwrite() writes to the file directly, but after a call to
10  *  stgset(TRUE), output is redirected to the buffer. After a call to
11  *  stgset(FALSE), stgwrite()'s output is directed to the file again. Thus
12  *  only one routine is used for writing to the output, which can be
13  *  buffered output or direct output.
14  *
15  *  staging buffer variables:   stgbuf  - the buffer
16  *                              stgidx  - current index in the staging buffer
17  *                              staging - if true, write to the staging buffer;
18  *                                        if false, write to file directly.
19  *
20  *  Copyright (c) ITB CompuPhase, 1997-2003
21  *
22  *  This software is provided "as-is", without any express or implied warranty.
23  *  In no event will the authors be held liable for any damages arising from
24  *  the use of this software.
25  *
26  *  Permission is granted to anyone to use this software for any purpose,
27  *  including commercial applications, and to alter it and redistribute it
28  *  freely, subject to the following restrictions:
29  *
30  *  1.  The origin of this software must not be misrepresented; you must not
31  *      claim that you wrote the original software. If you use this software in
32  *      a product, an acknowledgment in the product documentation would be
33  *      appreciated but is not required.
34  *  2.  Altered source versions must be plainly marked as such, and must not be
35  *      misrepresented as being the original software.
36  *  3.  This notice may not be removed or altered from any source distribution.
37  *
38  *  Version: $Id$
39  */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 # include <config.h>
44 #endif
45 
46 #include <assert.h>
47 #include <stdio.h>
48 #include <stdlib.h>		/* for atoi() */
49 #include <string.h>
50 #include <ctype.h>
51 
52 #include "embryo_cc_sc.h"
53 
54 #include "embryo_cc_sc7.scp"
55 
56 static void         stgstring(char *start, char *end);
57 static void         stgopt(char *start, char *end);
58 
59 #define sSTG_GROW   512
60 #define sSTG_MAX    20480
61 
62 static char        *stgbuf = NULL;
63 static int          stgmax = 0;	/* current size of the staging buffer */
64 
65 #define CHECK_STGBUFFER(index) if ((int)(index)>=stgmax) grow_stgbuffer((index)+1)
66 
67 static void
grow_stgbuffer(int requiredsize)68 grow_stgbuffer(int requiredsize)
69 {
70    char               *p;
71    int                 clear = !stgbuf;	/* if previously none, empty buffer explicitly */
72 
73    assert(stgmax < requiredsize);
74    /* if the staging buffer (holding intermediate code for one line) grows
75     * over a few kBytes, there is probably a run-away expression
76     */
77    if (requiredsize > sSTG_MAX)
78       error(102, "staging buffer");	/* staging buffer overflow (fatal error) */
79    stgmax = requiredsize + sSTG_GROW;
80    if (stgbuf)
81       p = (char *)realloc(stgbuf, stgmax * sizeof(char));
82    else
83       p = (char *)malloc(stgmax * sizeof(char));
84    if (!p)
85       error(102, "staging buffer");	/* staging buffer overflow (fatal error) */
86    stgbuf = p;
87    if (clear)
88       *stgbuf = '\0';
89 }
90 
91 void
stgbuffer_cleanup(void)92 stgbuffer_cleanup(void)
93 {
94    if (stgbuf)
95      {
96 	free(stgbuf);
97 	stgbuf = NULL;
98 	stgmax = 0;
99      }				/* if */
100 }
101 
102 /* the variables "stgidx" and "staging" are declared in "scvars.c" */
103 
104 /*  stgmark
105  *
106  *  Copies a mark into the staging buffer. At this moment there are three
107  *  possible marks:
108  *     sSTARTREORDER    identifies the beginning of a series of expression
109  *                      strings that must be written to the output file in
110  *                      reordered order
111  *    sENDREORDER       identifies the end of 'reverse evaluation'
112  *    sEXPRSTART + idx  only valid within a block that is evaluated in
113  *                      reordered order, it identifies the start of an
114  *                      expression; the "idx" value is the argument position
115  *
116  *  Global references: stgidx  (altered)
117  *                     stgbuf  (altered)
118  *                     staging (referred to only)
119  */
120 void
stgmark(char mark)121 stgmark(char mark)
122 {
123    if (staging)
124      {
125 	CHECK_STGBUFFER(stgidx);
126 	stgbuf[stgidx++] = mark;
127      }				/* if */
128 }
129 
130 static int
filewrite(char * str)131 filewrite(char *str)
132 {
133    if (sc_status == statWRITE)
134       return sc_writeasm(outf, str);
135    return TRUE;
136 }
137 
138 /*  stgwrite
139  *
140  *  Writes the string "st" to the staging buffer or to the output file. In the
141  *  case of writing to the staging buffer, the terminating byte of zero is
142  *  copied too, but... the optimizer can only work on complete lines (not on
143  *  fractions of it. Therefore if the string is staged, if the last character
144  *  written to the buffer is a '\0' and the previous-to-last is not a '\n',
145  *  the string is concatenated to the last string in the buffer (the '\0' is
146  *  overwritten). This also means an '\n' used in the middle of a string isn't
147  *  recognized and could give wrong results with the optimizer.
148  *  Even when writing to the output file directly, all strings are buffered
149  *  until a whole line is complete.
150  *
151  *  Global references: stgidx  (altered)
152  *                     stgbuf  (altered)
153  *                     staging (referred to only)
154  */
155 void
stgwrite(char * st)156 stgwrite(char *st)
157 {
158    int                 len;
159 
160    CHECK_STGBUFFER(0);
161    if (staging)
162      {
163 	if (stgidx >= 2 && stgbuf[stgidx - 1] == '\0'
164 	    && stgbuf[stgidx - 2] != '\n')
165 	   stgidx -= 1;		/* overwrite last '\0' */
166 	while (*st != '\0')
167 	  {			/* copy to staging buffer */
168 	     CHECK_STGBUFFER(stgidx);
169 	     stgbuf[stgidx++] = *st++;
170 	  }			/* while */
171 	CHECK_STGBUFFER(stgidx);
172 	stgbuf[stgidx++] = '\0';
173      }
174    else
175      {
176 	CHECK_STGBUFFER(strlen(stgbuf) + strlen(st) + 1);
177 	strcat(stgbuf, st);
178 	len = strlen(stgbuf);
179 	if (len > 0 && stgbuf[len - 1] == '\n')
180 	  {
181 	     filewrite(stgbuf);
182 	     stgbuf[0] = '\0';
183 	  }			/* if */
184      }				/* if */
185 }
186 
187 /*  stgout
188  *
189  *  Writes the staging buffer to the output file via stgstring() (for
190  *  reversing expressions in the buffer) and stgopt() (for optimizing). It
191  *  resets "stgidx".
192  *
193  *  Global references: stgidx  (altered)
194  *                     stgbuf  (referred to only)
195  *                     staging (referred to only)
196  */
197 void
stgout(int idx)198 stgout(int idx)
199 {
200    if (!staging)
201       return;
202    stgstring(&stgbuf[idx], &stgbuf[stgidx]);
203    stgidx = idx;
204 }
205 
206 typedef struct
207 {
208    char               *start, *end;
209 } argstack;
210 
211 /*  stgstring
212  *
213  *  Analyses whether code strings should be output to the file as they appear
214  *  in the staging buffer or whether portions of it should be re-ordered.
215  *  Re-ordering takes place in function argument lists; Small passes arguments
216  *  to functions from right to left. When arguments are "named" rather than
217  *  positional, the order in the source stream is indeterminate.
218  *  This function calls itself recursively in case it needs to re-order code
219  *  strings, and it uses a private stack (or list) to mark the start and the
220  *  end of expressions in their correct (reversed) order.
221  *  In any case, stgstring() sends a block as large as possible to the
222  *  optimizer stgopt().
223  *
224  *  In "reorder" mode, each set of code strings must start with the token
225  *  sEXPRSTART, even the first. If the token sSTARTREORDER is represented
226  *  by '[', sENDREORDER by ']' and sEXPRSTART by '|' the following applies:
227  *     '[]...'     valid, but useless; no output
228  *     '[|...]     valid, but useless; only one string
229  *     '[|...|...] valid and useful
230  *     '[...|...]  invalid, first string doesn't start with '|'
231  *     '[|...|]    invalid
232  */
233 static void
stgstring(char * start,char * end)234 stgstring(char *start, char *end)
235 {
236    char               *ptr;
237    int                 nest, argc, arg;
238    argstack           *stack;
239 
240    while (start < end)
241      {
242 	if (*start == sSTARTREORDER)
243 	  {
244 	     start += 1;	/* skip token */
245 	     /* allocate a argstack with sMAXARGS items */
246 	     stack = (argstack *) malloc(sMAXARGS * sizeof(argstack));
247 	     if (!stack)
248 		error(103);	/* insufficient memory */
249 	     nest = 1;		/* nesting counter */
250 	     argc = 0;		/* argument counter */
251 	     arg = -1;		/* argument index; no valid argument yet */
252 	     do
253 	       {
254 		  switch (*start)
255 		    {
256 		    case sSTARTREORDER:
257 		       nest++;
258 		       start++;
259 		       break;
260 		    case sENDREORDER:
261 		       nest--;
262 		       start++;
263 		       break;
264 		    default:
265 		       if ((*start & sEXPRSTART) == sEXPRSTART)
266 			 {
267 			    if (nest == 1)
268 			      {
269 				 if (arg >= 0)
270 				    stack[arg].end = start - 1;	/* finish previous argument */
271 				 arg = (unsigned char)*start - sEXPRSTART;
272 				 stack[arg].start = start + 1;
273 				 if (arg >= argc)
274 				    argc = arg + 1;
275 			      }	/* if */
276 			    start++;
277 			 }
278 		       else
279 			 {
280 			    start += strlen(start) + 1;
281 			 }	/* if */
282 		    }		/* switch */
283 	       }
284 	     while (nest);	/* enddo */
285 	     if (arg >= 0)
286 		stack[arg].end = start - 1;	/* finish previous argument */
287 	     while (argc > 0)
288 	       {
289 		  argc--;
290 		  stgstring(stack[argc].start, stack[argc].end);
291 	       }		/* while */
292 	     free(stack);
293 	  }
294 	else
295 	  {
296 	     ptr = start;
297 	     while (ptr < end && *ptr != sSTARTREORDER)
298 		ptr += strlen(ptr) + 1;
299 	     stgopt(start, ptr);
300 	     start = ptr;
301 	  }			/* if */
302      }				/* while */
303 }
304 
305 /*  stgdel
306  *
307  *  Scraps code from the staging buffer by resetting "stgidx" to "index".
308  *
309  *  Global references: stgidx (altered)
310  *                     staging (referred to only)
311  */
312 void
stgdel(int idx,cell code_index)313 stgdel(int idx, cell code_index)
314 {
315    if (staging)
316      {
317 	stgidx = idx;
318 	code_idx = code_index;
319      }				/* if */
320 }
321 
322 int
stgget(int * idx,cell * code_index)323 stgget(int *idx, cell * code_index)
324 {
325    if (staging)
326      {
327 	*idx = stgidx;
328 	*code_index = code_idx;
329      }				/* if */
330    return staging;
331 }
332 
333 /*  stgset
334  *
335  *  Sets staging on or off. If it's turned off, the staging buffer must be
336  *  initialized to an empty string. If it's turned on, the routine makes sure
337  *  the index ("stgidx") is set to 0 (it should already be 0).
338  *
339  *  Global references: staging  (altered)
340  *                     stgidx   (altered)
341  *                     stgbuf   (contents altered)
342  */
343 void
stgset(int onoff)344 stgset(int onoff)
345 {
346    staging = onoff;
347    if (staging)
348      {
349 	assert(stgidx == 0);
350 	stgidx = 0;
351 	CHECK_STGBUFFER(stgidx);
352 	/* write any contents that may be put in the buffer by stgwrite()
353 	 * when "staging" was 0
354 	 */
355 	if (stgbuf[0] != '\0')
356 	   filewrite(stgbuf);
357      }				/* if */
358    stgbuf[0] = '\0';
359 }
360 
361 /* phopt_init
362  * Initialize all sequence strings of the peehole optimizer. The strings
363  * are embedded in the .EXE file in compressed format, here we expand
364  * them (and allocate memory for the sequences).
365  */
366 static SEQUENCE    *sequences;
367 
368 int
phopt_init(void)369 phopt_init(void)
370 {
371    int                 number, i, len;
372    char                str[160];
373 
374    /* count number of sequences */
375    for (number = 0; sequences_cmp[number].find; number++)
376       /* nothing */ ;
377    number++;			/* include an item for the NULL terminator */
378 
379    if (!(sequences = (SEQUENCE *)malloc(number * sizeof(SEQUENCE))))
380       return FALSE;
381 
382    /* pre-initialize all to NULL (in case of failure) */
383    for (i = 0; i < number; i++)
384      {
385 	sequences[i].find = NULL;
386 	sequences[i].replace = NULL;
387 	sequences[i].savesize = 0;
388      }				/* for */
389 
390    /* expand all strings */
391    for (i = 0; i < number - 1; i++)
392      {
393 	len =
394 	   strexpand(str, (unsigned char *)sequences_cmp[i].find, sizeof str,
395 		     SCPACK_TABLE);
396 	assert(len <= (int)(sizeof(str)));
397 	assert(len == (int)(strlen(str) + 1));
398 	sequences[i].find = (char *)malloc(len);
399 	if (sequences[i].find)
400 	   strcpy(sequences[i].find, str);
401 	len =
402 	   strexpand(str, (unsigned char *)sequences_cmp[i].replace, sizeof str,
403 		     SCPACK_TABLE);
404 	assert(len <= (int)(sizeof(str)));
405 	assert(len == (int)(strlen(str) + 1));
406 	sequences[i].replace = (char *)malloc(len);
407 	if (sequences[i].replace)
408 	   strcpy(sequences[i].replace, str);
409 	sequences[i].savesize = sequences_cmp[i].savesize;
410 	if (!sequences[i].find || !sequences[i].replace)
411 	   return phopt_cleanup();
412      }				/* for */
413 
414    return TRUE;
415 }
416 
417 int
phopt_cleanup(void)418 phopt_cleanup(void)
419 {
420    int                 i;
421 
422    if (sequences)
423      {
424 	i = 0;
425 	while (sequences[i].find || sequences[i].replace)
426 	  {
427 	     if (sequences[i].find)
428 		free(sequences[i].find);
429 	     if (sequences[i].replace)
430 		free(sequences[i].replace);
431 	     i++;
432 	  }			/* while */
433 	free(sequences);
434 	sequences = NULL;
435      }				/* if */
436    return FALSE;
437 }
438 
439 #define _maxoptvars     4
440 #define _aliasmax       10	/* a 32-bit number can be represented in
441 				 * 9 decimal digits */
442 
443 static int
matchsequence(char * start,char * end,char * pattern,char symbols[_maxoptvars][_aliasmax+1],int * match_length)444 matchsequence(char *start, char *end, char *pattern,
445 	      char symbols[_maxoptvars][_aliasmax + 1], int *match_length)
446 {
447    int                 var, i;
448    char                str[_aliasmax + 2];
449    char               *start_org = start;
450 
451    *match_length = 0;
452    for (var = 0; var < _maxoptvars; var++)
453       symbols[var][0] = '\0';
454 
455    while (*start == '\t' || *start == ' ')
456       start++;
457    while (*pattern)
458      {
459 	if (start >= end)
460 	   return FALSE;
461 	switch (*pattern)
462 	  {
463 	  case '%':		/* new "symbol" */
464 	     pattern++;
465 	     assert(sc_isdigit(*pattern));
466 	     var = atoi(pattern) - 1;
467 	     assert(var >= 0 && var < _maxoptvars);
468 	     assert(alphanum(*start));
469 	     for (i = 0; start < end && alphanum(*start); i++, start++)
470 	       {
471 		  assert(i <= _aliasmax);
472 		  str[i] = *start;
473 	       }		/* for */
474 	     str[i] = '\0';
475 	     if (symbols[var][0] != '\0')
476 	       {
477 		  if (strcmp(symbols[var], str) != 0)
478 		     return FALSE;	/* symbols should be identical */
479 	       }
480 	     else
481 	       {
482 		  strcpy(symbols[var], str);
483 	       }		/* if */
484 	     break;
485 	  case ' ':
486 	     if (*start != '\t' && *start != ' ')
487 		return FALSE;
488 	     while ((start < end && *start == '\t') || *start == ' ')
489 		start++;
490 	     break;
491 	  case '!':
492 	     while ((start < end && *start == '\t') || *start == ' ')
493 		start++;	/* skip trailing white space */
494 	     if (*start != '\n')
495 		return FALSE;
496 	     assert(*(start + 1) == '\0');
497 	     start += 2;	/* skip '\n' and '\0' */
498 	     if (*(pattern + 1) != '\0')
499 		while ((start < end && *start == '\t') || *start == ' ')
500 		   start++;	/* skip leading white space of next instruction */
501 	     break;
502 	  default:
503 	     if (tolower(*start) != tolower(*pattern))
504 		return FALSE;
505 	     start++;
506 	  }			/* switch */
507 	pattern++;
508      }				/* while */
509 
510    *match_length = (int)(start - start_org);
511    return TRUE;
512 }
513 
514 static char        *
replacesequence(char * pattern,char symbols[_maxoptvars][_aliasmax+1],int * repl_length)515 replacesequence(char *pattern, char symbols[_maxoptvars][_aliasmax + 1],
516 		int *repl_length)
517 {
518    char               *sptr;
519    int                 var;
520    char               *buffer;
521 
522    /* calculate the length of the new buffer
523     * this is the length of the pattern plus the length of all symbols (note
524     * that the same symbol may occur multiple times in the pattern) plus
525     * line endings and startings ('\t' to start a line and '\n\0' to end one)
526     */
527    assert(repl_length != NULL);
528    *repl_length = 0;
529    sptr = pattern;
530    while (*sptr)
531      {
532 	switch (*sptr)
533 	  {
534 	  case '%':
535 	     sptr++;		/* skip '%' */
536 	     assert(sc_isdigit(*sptr));
537 	     var = atoi(sptr) - 1;
538 	     assert(var >= 0 && var < _maxoptvars);
539 	     assert(symbols[var][0] != '\0');	/* variable should be defined */
540 	     *repl_length += strlen(symbols[var]);
541 	     break;
542 	  case '!':
543 	     *repl_length += 3;	/* '\t', '\n' & '\0' */
544 	     break;
545 	  default:
546 	     *repl_length += 1;
547 	  }			/* switch */
548 	sptr++;
549      }				/* while */
550 
551    /* allocate a buffer to replace the sequence in */
552    if (!(buffer = malloc(*repl_length)))
553      {
554 	error(103);
555 	return NULL;
556      }
557 
558    /* replace the pattern into this temporary buffer */
559    sptr = buffer;
560    *sptr++ = '\t';		/* the "replace" patterns do not have tabs */
561    while (*pattern)
562      {
563 	assert((int)(sptr - buffer) < *repl_length);
564 	switch (*pattern)
565 	  {
566 	  case '%':
567 	     /* write out the symbol */
568 	     pattern++;
569 	     assert(sc_isdigit(*pattern));
570 	     var = atoi(pattern) - 1;
571 	     assert(var >= 0 && var < _maxoptvars);
572 	     assert(symbols[var][0] != '\0');	/* variable should be defined */
573 	     strcpy(sptr, symbols[var]);
574 	     sptr += strlen(symbols[var]);
575 	     break;
576 	  case '!':
577 	     /* finish the line, optionally start the next line with an indent */
578 	     *sptr++ = '\n';
579 	     *sptr++ = '\0';
580 	     if (*(pattern + 1) != '\0')
581 		*sptr++ = '\t';
582 	     break;
583 	  default:
584 	     *sptr++ = *pattern;
585 	  }			/* switch */
586 	pattern++;
587      }				/* while */
588 
589    assert((int)(sptr - buffer) == *repl_length);
590    return buffer;
591 }
592 
593 static void
strreplace(char * dest,char * replace,int sub_length,int repl_length,int dest_length)594 strreplace(char *dest, char *replace, int sub_length, int repl_length,
595 	   int dest_length)
596 {
597    int                 offset = sub_length - repl_length;
598 
599    if (offset > 0)		/* delete a section */
600       memmove(dest, dest + offset, dest_length - offset);
601    else if (offset < 0)		/* insert a section */
602       memmove(dest - offset, dest, dest_length);
603    memcpy(dest, replace, repl_length);
604 }
605 
606 /*  stgopt
607  *
608  *  Optimizes the staging buffer by checking for series of instructions that
609  *  can be coded more compact. The routine expects the lines in the staging
610  *  buffer to be separated with '\n' and '\0' characters.
611  *
612  *  The longest sequences must be checked first.
613  */
614 
615 static void
stgopt(char * start,char * end)616 stgopt(char *start, char *end)
617 {
618    char                symbols[_maxoptvars][_aliasmax + 1];
619    int                 seq, match_length, repl_length;
620 
621    assert(sequences != NULL);
622    while (start < end)
623      {
624 	if ((sc_debug & sNOOPTIMIZE) != 0 || sc_status != statWRITE)
625 	  {
626 	     /* do not match anything if debug-level is maximum */
627 	     filewrite(start);
628 	  }
629 	else
630 	  {
631 	     seq = 0;
632 	     while (sequences[seq].find)
633 	       {
634 		  assert(seq >= 0);
635 		  if (matchsequence
636 		      (start, end, sequences[seq].find, symbols, &match_length))
637 		    {
638 		       char               *replace =
639 			  replacesequence(sequences[seq].replace, symbols,
640 					  &repl_length);
641 		       /* If the replacement is bigger than the original section, we may need
642 		        * to "grow" the staging buffer. This is quite complex, due to the
643 		        * re-ordering of expressions that can also happen in the staging
644 		        * buffer. In addition, it should not happen: the peephole optimizer
645 		        * must replace sequences with *shorter* sequences, not longer ones.
646 		        * So, I simply forbid sequences that are longer than the ones they
647 		        * are meant to replace.
648 		        */
649 		       assert(match_length >= repl_length);
650 		       if (match_length >= repl_length)
651 			 {
652 			    strreplace(start, replace, match_length,
653 				       repl_length, (int)(end - start));
654 			    end -= match_length - repl_length;
655 			    free(replace);
656 			    code_idx -= sequences[seq].savesize;
657 			    seq = 0;	/* restart search for matches */
658 			 }
659 		       else
660 			 {
661 			    /* actually, we should never get here (match_length<repl_length) */
662 			    assert(0);
663 			    seq++;
664 			 }	/* if */
665 		    }
666 		  else
667 		    {
668 		       seq++;
669 		    }		/* if */
670 	       }		/* while */
671 	     assert(sequences[seq].find == NULL);
672 	     filewrite(start);
673 	  }			/* if */
674 	assert(start < end);
675 	start += strlen(start) + 1;	/* to next string */
676      }				/* while (start<end) */
677 }
678 
679 #undef SCPACK_TABLE
680