1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include <assert.h>
8 #include <ctype.h>
9 #include <stdarg.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "vm/Keywords.h"
15 
16 static const char * const keyword_list[] = {
17 #define KEYWORD_STRING(keyword, name, type) #keyword,
18     FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_STRING)
19 #undef KEYWORD_STRING
20 };
21 
22 struct gen_opt {
23     FILE* output;                       /* output file for generated source */
24     unsigned use_if_threshold;          /* max number of choices to generate
25                                            "if" selector instead of "switch" */
26     unsigned char_tail_test_threshold;  /* max number of unprocessed columns
27                                            to use inlined char compare
28                                            for remaining chars and not generic
29                                            string compare code */
30     unsigned indent_level;              /* current source identation level */
31 };
32 
33 static unsigned column_to_compare;
34 
35 static int
length_comparator(const void * a,const void * b)36 length_comparator(const void* a, const void* b)
37 {
38     const char* str1 = keyword_list[*(unsigned*)a];
39     const char* str2 = keyword_list[*(unsigned*)b];
40     return (int)strlen(str1) - (int)strlen(str2);
41 }
42 
43 static int
column_comparator(const void * a,const void * b)44 column_comparator(const void* a, const void* b)
45 {
46     const char* str1 = keyword_list[*(unsigned*)a];
47     const char* str2 = keyword_list[*(unsigned*)b];
48     return (int)str1[column_to_compare] - (int)str2[column_to_compare];
49 }
50 
51 static unsigned
count_different_lengths(unsigned indexes[],unsigned nelem)52 count_different_lengths(unsigned indexes[], unsigned nelem)
53 {
54     unsigned nlength, current_length, i, l;
55 
56     current_length = 0;
57     nlength = 0;
58     for (i = 0; i != nelem; ++i) {
59         l = (unsigned)strlen(keyword_list[indexes[i]]);
60         assert(l != 0);
61         if (current_length != l) {
62             ++nlength;
63             current_length = l;
64         }
65     }
66     return nlength;
67 }
68 
69 static void
find_char_span_and_count(unsigned indexes[],unsigned nelem,unsigned column,unsigned * span_result,unsigned * count_result)70 find_char_span_and_count(unsigned indexes[], unsigned nelem, unsigned column,
71                          unsigned* span_result, unsigned* count_result)
72 {
73     unsigned i, count;
74     unsigned char c, prev, minc, maxc;
75 
76     assert(nelem != 0);
77     minc = maxc = prev = (unsigned char)keyword_list[indexes[0]][column];
78     count = 1;
79     for (i = 1; i != nelem; ++i) {
80         c = (unsigned char)keyword_list[indexes[i]][column];
81         if (prev != c) {
82             prev = c;
83             ++count;
84             if (minc > c) {
85                 minc = c;
86             } else if (maxc < c) {
87                 maxc = c;
88             }
89         }
90     }
91 
92     *span_result = maxc - minc + 1;
93     *count_result = count;
94 }
95 
96 static unsigned
find_optimal_switch_column(struct gen_opt * opt,unsigned indexes[],unsigned nelem,unsigned columns[],unsigned unprocessed_columns,int * use_if_result)97 find_optimal_switch_column(struct gen_opt* opt,
98                            unsigned indexes[], unsigned nelem,
99                            unsigned columns[], unsigned unprocessed_columns,
100                            int* use_if_result)
101 {
102     unsigned i;
103     unsigned span, min_span, min_span_index;
104     unsigned nchar, min_nchar, min_nchar_index;
105 
106     assert(unprocessed_columns != 0);
107     i = 0;
108     min_nchar = min_span = (unsigned)-1;
109     min_nchar_index = min_span_index = 0;
110     do {
111         column_to_compare = columns[i];
112         qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
113         find_char_span_and_count(indexes, nelem, column_to_compare,
114                                  &span, &nchar);
115         assert(span != 0);
116         if (span == 1) {
117             assert(nchar == 1);
118             *use_if_result = 1;
119             return 1;
120         }
121         assert(nchar != 1);
122         if (min_span > span) {
123             min_span = span;
124             min_span_index = i;
125         }
126         if (min_nchar > nchar) {
127             min_nchar = nchar;
128             min_nchar_index = i;
129         }
130     } while (++i != unprocessed_columns);
131 
132     if (min_nchar <= opt->use_if_threshold) {
133         *use_if_result = 1;
134         i = min_nchar_index;
135     } else {
136         *use_if_result = 0;
137         i = min_span_index;
138     }
139 
140     /*
141      * Restore order corresponding to i if it was destroyed by
142      * subsequent sort.
143      */
144     if (i != unprocessed_columns - 1) {
145         column_to_compare = columns[i];
146         qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
147     }
148 
149     return i;
150 }
151 
152 
153 static void
p(struct gen_opt * opt,const char * format,...)154 p(struct gen_opt* opt, const char* format, ...)
155 {
156     va_list ap;
157 
158     va_start(ap, format);
159     vfprintf(opt->output, format, ap);
160     va_end(ap);
161 }
162 
163 /* Size for '\xxx' where xxx is octal escape */
164 #define MIN_QUOTED_CHAR_BUFFER 7
165 
166 static char*
qchar(char c,char * quoted_buffer)167 qchar(char c, char* quoted_buffer)
168 {
169     char* s;
170 
171     s = quoted_buffer;
172     *s++ = '\'';
173     switch (c) {
174       case '\n': c = 'n'; goto one_char_escape;
175       case '\r': c = 'r'; goto one_char_escape;
176       case '\t': c = 't'; goto one_char_escape;
177       case '\f': c = 't'; goto one_char_escape;
178       case '\0': c = '0'; goto one_char_escape;
179       case '\'': goto one_char_escape;
180       one_char_escape:
181         *s++ = '\\';
182         break;
183       default:
184         if (!isprint(c)) {
185             *s++ = '\\';
186             *s++ = (char)('0' + (0x3 & (((unsigned char)c) >> 6)));
187             *s++ = (char)('0' + (0x7 & (((unsigned char)c) >> 3)));
188             c = (char)('0' + (0x7 & ((unsigned char)c)));
189         }
190     }
191     *s++ = c;
192     *s++ = '\'';
193     *s = '\0';
194     assert(s + 1 <= quoted_buffer + MIN_QUOTED_CHAR_BUFFER);
195     return quoted_buffer;
196 }
197 
198 static void
nl(struct gen_opt * opt)199 nl(struct gen_opt* opt)
200 {
201     putc('\n', opt->output);
202 }
203 
204 static void
indent(struct gen_opt * opt)205 indent(struct gen_opt* opt)
206 {
207     unsigned n = opt->indent_level;
208     while (n != 0) {
209         --n;
210         fputs("    ", opt->output);
211     }
212 }
213 
214 static void
line(struct gen_opt * opt,const char * format,...)215 line(struct gen_opt* opt, const char* format, ...)
216 {
217     va_list ap;
218 
219     indent(opt);
220     va_start(ap, format);
221     vfprintf(opt->output, format, ap);
222     va_end(ap);
223     nl(opt);
224 }
225 
226 static void
generate_letter_switch_r(struct gen_opt * opt,unsigned indexes[],unsigned nelem,unsigned columns[],unsigned unprocessed_columns)227 generate_letter_switch_r(struct gen_opt* opt,
228                          unsigned indexes[], unsigned nelem,
229                          unsigned columns[], unsigned unprocessed_columns)
230 {
231     char qbuf[MIN_QUOTED_CHAR_BUFFER];
232 
233     assert(nelem != 0);
234     if (nelem == 1) {
235         unsigned kw_index = indexes[0];
236         const char* keyword = keyword_list[kw_index];
237 
238         if (unprocessed_columns == 0) {
239             line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
240         } else if (unprocessed_columns > opt->char_tail_test_threshold) {
241             line(opt, "JSKW_TEST_GUESS(%u) /* %s */", kw_index, keyword);
242         } else {
243             unsigned i, column;
244 
245             indent(opt); p(opt, "if (");
246             for (i = 0; i != unprocessed_columns; ++i) {
247                 column = columns[i];
248                 qchar(keyword[column], qbuf);
249                 p(opt, "%sJSKW_AT(%u)==%s", (i == 0) ? "" : " && ",
250                   column, qbuf);
251             }
252             p(opt, ") {"); nl(opt);
253             ++opt->indent_level;
254             line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
255             --opt->indent_level;
256             line(opt, "}");
257             line(opt, "JSKW_NO_MATCH()");
258         }
259     } else {
260         unsigned optimal_column_index, optimal_column;
261         unsigned i;
262         int use_if;
263         char current;
264 
265         assert(unprocessed_columns != 0);
266         optimal_column_index = find_optimal_switch_column(opt, indexes, nelem,
267                                                           columns,
268                                                           unprocessed_columns,
269                                                           &use_if);
270         optimal_column = columns[optimal_column_index];
271         columns[optimal_column_index] = columns[unprocessed_columns - 1];
272 
273         if (!use_if)
274             line(opt, "switch (JSKW_AT(%u)) {", optimal_column);
275 
276         current = keyword_list[indexes[0]][optimal_column];
277         for (i = 0; i != nelem;) {
278             unsigned same_char_begin = i;
279             char next = current;
280 
281             for (++i; i != nelem; ++i) {
282                 next = keyword_list[indexes[i]][optimal_column];
283                 if (next != current)
284                     break;
285             }
286             qchar(current, qbuf);
287             if (use_if) {
288                 line(opt, "if (JSKW_AT(%u) == %s) {", optimal_column, qbuf);
289             } else {
290                 line(opt, "  case %s:", qbuf);
291             }
292             ++opt->indent_level;
293             generate_letter_switch_r(opt, indexes + same_char_begin,
294                                      i - same_char_begin,
295                                      columns, unprocessed_columns - 1);
296             --opt->indent_level;
297             if (use_if) {
298                 line(opt, "}");
299             }
300             current = next;
301         }
302 
303         if (!use_if) {
304             line(opt, "}");
305         }
306 
307         columns[optimal_column_index] = optimal_column;
308 
309         line(opt, "JSKW_NO_MATCH()");
310     }
311 }
312 
313 static void
generate_letter_switch(struct gen_opt * opt,unsigned indexes[],unsigned nelem,unsigned current_length)314 generate_letter_switch(struct gen_opt* opt,
315                        unsigned indexes[], unsigned nelem,
316                        unsigned current_length)
317 {
318     unsigned* columns;
319     unsigned i;
320 
321     columns = (unsigned*) malloc(sizeof(columns[0]) * current_length);
322     if (!columns) {
323         perror("malloc");
324         exit(EXIT_FAILURE);
325     }
326     for (i = 0; i != current_length; ++i) {
327         columns[i] = i;
328     }
329     generate_letter_switch_r(opt, indexes, nelem, columns, current_length);
330     free(columns);
331 }
332 
333 
334 static void
generate_switch(struct gen_opt * opt)335 generate_switch(struct gen_opt* opt)
336 {
337     unsigned* indexes;
338     unsigned nlength;
339     unsigned i, current;
340     int use_if;
341     unsigned nelem;
342 
343     nelem = sizeof(keyword_list)/sizeof(keyword_list[0]);
344 
345     line(opt, "/*");
346     line(opt, " * Generating switch for the list of %u entries:", nelem);
347     for (i = 0; i != nelem; ++i) {
348         line(opt, " * %s", keyword_list[i]);
349     }
350     line(opt, " */");
351 
352     indexes = (unsigned*) malloc(sizeof(indexes[0]) * nelem);
353     if (!indexes) {
354         perror("malloc");
355         exit(EXIT_FAILURE);
356     }
357     for (i = 0; i != nelem; ++i)
358         indexes[i] = i;
359     qsort(indexes, nelem, sizeof(indexes[i]), length_comparator);
360     nlength = count_different_lengths(indexes, nelem);
361 
362     use_if = (nlength <= opt->use_if_threshold);
363 
364     if (!use_if)
365         line(opt, "switch (JSKW_LENGTH()) {");
366 
367     current = (unsigned)strlen(keyword_list[indexes[0]]);
368     for (i = 0; i != nelem;) {
369         unsigned same_length_begin = i;
370         unsigned next = current;
371 
372         for (++i; i != nelem; ++i) {
373             next = (unsigned)strlen(keyword_list[indexes[i]]);
374             if (next != current)
375                 break;
376         }
377         if (use_if) {
378             line(opt, "if (JSKW_LENGTH() == %u) {", current);
379         } else {
380             line(opt, "  case %u:", current);
381         }
382         ++opt->indent_level;
383         generate_letter_switch(opt, indexes + same_length_begin,
384                                i - same_length_begin,
385                                current);
386         --opt->indent_level;
387         if (use_if) {
388             line(opt, "}");
389         }
390         current = next;
391     }
392     if (!use_if)
393         line(opt, "}");
394     line(opt, "JSKW_NO_MATCH()");
395     free(indexes);
396 }
397 
main(int argc,char ** argv)398 int main(int argc, char** argv)
399 {
400     struct gen_opt opt;
401 
402     if (argc < 2) {
403         opt.output = stdout;
404     } else {
405         opt.output = fopen(argv[1], "w");
406         if (!opt.output) {
407             perror("fopen");
408             exit(EXIT_FAILURE);
409         }
410     }
411     opt.indent_level = 1;
412     opt.use_if_threshold = 3;
413     opt.char_tail_test_threshold = 4;
414 
415     generate_switch(&opt);
416 
417     if (opt.output != stdout) {
418         if (fclose(opt.output)) {
419             perror("fclose");
420             exit(EXIT_FAILURE);
421         }
422     }
423 
424     return EXIT_SUCCESS;
425 }
426