1 /*
2 ** Compile and run this standalone program in order to generate code that
3 ** implements a function that will translate alphabetic identifiers into
4 ** parser token codes.
5 */
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <assert.h>
10
11 /* if TEST_RESERVED_WORDS is defined, then the test_keywords() function is created
12 * which displays errors for any keyword
13 */
14 #ifndef TEST_RESERVED_WORDS
15 #ifdef GDA_DEBUG
16 #define TEST_RESERVED_WORDS
17 #endif
18 #endif
19
20 /*
21 ** A header comment placed at the beginning of generated code.
22 */
23 static const char zHdr[] =
24 "/* file contains automatically generated code, DO NOT MODIFY *\n"
25 " *\n"
26 " * The code in this file implements a function that determines whether\n"
27 " * or not a given identifier is really an SQL keyword. The same thing\n"
28 " * might be implemented more directly using a hand-written hash table.\n"
29 " * But by using this automatically generated code, the size of the code\n"
30 " * is substantially reduced. This is important for embedded applications\n"
31 " * on platforms with limited memory.\n"
32 " *\n"
33 " * This code has been copied from SQLite's mkkeywordhash.c file and modified.\n"
34 " * to read the SQL keywords from a file instead of static ones\n"
35 " */\n";
36
37 /*
38 ** All the keywords of the SQL language are stored in a hash
39 ** table composed of instances of the following structure.
40 */
41 typedef struct Keyword Keyword;
42 struct Keyword {
43 char *zName; /* The keyword name */
44 int id; /* Unique ID for this record */
45 int hash; /* Hash on the keyword */
46 int offset; /* Offset to start of name string */
47 int len; /* Length of this keyword, not counting final \000 */
48 int prefix; /* Number of characters in prefix */
49 int longestSuffix; /* Longest suffix that is a prefix on another word */
50 int iNext; /* Index in aKeywordTable[] of next with same hash */
51 int substrId; /* Id to another keyword this keyword is embedded in */
52 int substrOffset; /* Offset into substrId for start of this keyword */
53 char zOrigName[40]; /* Original keyword name before processing */
54 };
55
56 Keyword *aKeywordTable = NULL;
57 int nKeyword = 0;
58 char *prefix = NULL;
59
60 /* An array to map all upper-case characters into their corresponding
61 ** lower-case character.
62 */
63 const unsigned char UpperToLower[] = {
64 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
65 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
66 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
67 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
68 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
69 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
70 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
71 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
72 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
73 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
74 180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
75 198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
76 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
77 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
78 252,253,254,255
79 };
80
81 /*
82 ** Comparision function for two Keyword records
83 */
keywordCompare1(const void * a,const void * b)84 static int keywordCompare1(const void *a, const void *b){
85 const Keyword *pA = (Keyword*)a;
86 const Keyword *pB = (Keyword*)b;
87 int n = pA->len - pB->len;
88 if( n==0 ){
89 n = strcmp(pA->zName, pB->zName);
90 }
91 return n;
92 }
keywordCompare2(const void * a,const void * b)93 static int keywordCompare2(const void *a, const void *b){
94 const Keyword *pA = (Keyword*)a;
95 const Keyword *pB = (Keyword*)b;
96 int n = pB->longestSuffix - pA->longestSuffix;
97 if( n==0 ){
98 n = strcmp(pA->zName, pB->zName);
99 }
100 return n;
101 }
keywordCompare3(const void * a,const void * b)102 static int keywordCompare3(const void *a, const void *b){
103 const Keyword *pA = (Keyword*)a;
104 const Keyword *pB = (Keyword*)b;
105 int n = pA->offset - pB->offset;
106 return n;
107 }
108
109 /*
110 ** Return a KeywordTable entry with the given id
111 */
findById(int id)112 static Keyword *findById(int id){
113 int i;
114 for(i=0; i<nKeyword; i++){
115 if( aKeywordTable[i].id==id ) break;
116 }
117 return &aKeywordTable[i];
118 }
119
120 /* @len is the number of chars, NOT including the \0 at the end */
121 static void
add_keywork(const char * keyword,int len)122 add_keywork (const char *keyword, int len)
123 {
124 #define MAXKEYWORDS 1000
125 const char *ptr;
126
127 if (len == 0)
128 return;
129 for (ptr = keyword; *ptr && (ptr - keyword) < len; ptr++) {
130 if (((*ptr < 'A') || (*ptr > 'Z')) &&
131 ((*ptr < 'a') || (*ptr > 'z')) &&
132 ((*ptr < '0') || (*ptr > '9')) &&
133 (*ptr != '_')) {
134 /* ignore this keyword */
135 return;
136 }
137 }
138
139 aKeywordTable [nKeyword].zName = malloc (sizeof (char) * (len + 1));
140 memcpy (aKeywordTable [nKeyword].zName, keyword, sizeof (char) * len);
141 aKeywordTable [nKeyword].zName [len] = 0;
142
143 printf (" * KEYWORD: '%s'\n", aKeywordTable [nKeyword].zName);
144
145 nKeyword ++;
146 if (nKeyword > MAXKEYWORDS) {
147 fprintf (stderr, "Maximum number of SQL keywords exceeded, "
148 "change the MAXKEYWORDS constant and recompile\n");
149 exit (1);
150 }
151 }
152
153 static void
parse_input(const char * filename)154 parse_input (const char *filename)
155 {
156 #define BUFSIZE 500
157 FILE *stream;
158 char buffer[BUFSIZE+1];
159 int read;
160 char *end;
161
162 stream = fopen (filename, "r");
163 if (!stream)
164 return;
165
166 printf ("\n/* Parsed keywords\n");
167
168 aKeywordTable = malloc (sizeof (Keyword) * MAXKEYWORDS);
169 memset (aKeywordTable, 0, sizeof (Keyword) * MAXKEYWORDS);
170 read = fread (buffer, 1, BUFSIZE, stream);
171 end = buffer + read;
172 *end = 0;
173 while (read > 0) {
174 char *ptr;
175
176 /* read all complete lines in buffer */
177 while (end > buffer) {
178 char *hold = NULL;
179 for (ptr = buffer; (ptr < end) && *ptr && (*ptr != '\n'); ptr++);
180 if (ptr == end)
181 break;
182 if (*ptr)
183 hold = ptr+1;
184 *ptr = 0;
185
186 /* treat the line */
187 if (*buffer && *buffer != '#') {
188 char *tmp1, *tmp2;
189 printf (" *\n * From line: %s\n", buffer);
190 for (tmp1 = tmp2 = buffer; *tmp2; tmp2++) {
191 if (((*tmp2 < 'A') || (*tmp2 > 'Z')) &&
192 ((*tmp2 < 'a') || (*tmp2 > 'z')) &&
193 ((*tmp2 < '0') || (*tmp2 > '9')) &&
194 (*tmp2 != '_')) {
195 /* keyword found */
196 add_keywork (tmp1, tmp2 - tmp1);
197
198 tmp1 = tmp2 + 1;
199 }
200 }
201 if ((tmp1 != tmp2) && *tmp1)
202 add_keywork (tmp1, tmp2 - tmp1);
203 }
204
205 if (hold) {
206 int l = end - hold;
207 end -= hold - buffer;
208 memmove (buffer, hold, l);
209 }
210 else
211 break;
212 }
213
214 read = fread (end, 1, BUFSIZE - (end - buffer), stream);
215 end += read;
216 }
217
218 printf (" */\n");
219
220 #ifdef TEST_RESERVED_WORDS
221 int i;
222 printf ("static char *%skeywords[] = {\n", prefix ? prefix : "");
223 for (i = 0; i < nKeyword; i++)
224 printf ("\t\"%s\",\n", aKeywordTable [i].zName);
225 printf ("};\n");
226 #endif
227
228 fclose (stream);
229 }
230
231 /*
232 ** This routine does the work. The generated code is printed on standard
233 ** output.
234 */
235 int
main(int argc,char ** argv)236 main (int argc, char **argv)
237 {
238 int i, j, k, h;
239 int bestSize, bestCount;
240 int count;
241 int nChar;
242 int totalLen = 0;
243 int aHash[5000]; /* 2000 is much bigger than nKeyword */
244 char zText[10000];
245
246 if ((argc < 2) || (argc > 3)) {
247 printf ("Usage: %s <file with SQL keywords> [<prefix>]\n", argv[0]);
248 return 1;
249 }
250 if (argc == 3)
251 prefix = argv[2];
252
253 printf("%s", zHdr);
254 parse_input (argv[1]);
255 if (!aKeywordTable) {
256 printf ("Error parsing file '%s', aborting\n", argv[1]);
257 return 1;
258 }
259
260 /* Fill in the lengths of strings and hashes for all entries. */
261 for(i=0; i<nKeyword; i++){
262 Keyword *p = &aKeywordTable[i];
263 p->len = strlen(p->zName);
264 assert( p->len<sizeof(p->zOrigName) );
265 strcpy(p->zOrigName, p->zName);
266 totalLen += p->len;
267 p->hash = (UpperToLower[(int)p->zName[0]]*4) ^
268 (UpperToLower[(int)p->zName[p->len-1]] *3) ^ p->len;
269 p->id = i+1;
270 }
271
272 /* Sort the table from shortest to longest keyword */
273 qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1);
274
275 /* Look for short keywords embedded in longer keywords */
276 for(i=nKeyword-2; i>=0; i--){
277 Keyword *p = &aKeywordTable[i];
278 for(j=nKeyword-1; j>i && p->substrId==0; j--){
279 Keyword *pOther = &aKeywordTable[j];
280 if( pOther->substrId ) continue;
281 if( pOther->len<=p->len ) continue;
282 for(k=0; k<=pOther->len-p->len; k++){
283 if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){
284 p->substrId = pOther->id;
285 p->substrOffset = k;
286 break;
287 }
288 }
289 }
290 }
291
292 /* Compute the longestSuffix value for every word */
293 for(i=0; i<nKeyword; i++){
294 Keyword *p = &aKeywordTable[i];
295 if( p->substrId ) continue;
296 for(j=0; j<nKeyword; j++){
297 Keyword *pOther;
298 if( j==i ) continue;
299 pOther = &aKeywordTable[j];
300 if( pOther->substrId ) continue;
301 for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){
302 if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
303 p->longestSuffix = k;
304 }
305 }
306 }
307 }
308
309 /* Sort the table into reverse order by length */
310 qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2);
311
312 /* Fill in the offset for all entries */
313 nChar = 0;
314 for(i=0; i<nKeyword; i++){
315 Keyword *p = &aKeywordTable[i];
316 if( p->offset>0 || p->substrId ) continue;
317 p->offset = nChar;
318 nChar += p->len;
319 for(k=p->len-1; k>=1; k--){
320 for(j=i+1; j<nKeyword; j++){
321 Keyword *pOther = &aKeywordTable[j];
322 if( pOther->offset>0 || pOther->substrId ) continue;
323 if( pOther->len<=k ) continue;
324 if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
325 p = pOther;
326 p->offset = nChar - k;
327 nChar = p->offset + p->len;
328 p->zName += k;
329 p->len -= k;
330 p->prefix = k;
331 j = i;
332 k = p->len;
333 }
334 }
335 }
336 }
337 for(i=0; i<nKeyword; i++){
338 Keyword *p = &aKeywordTable[i];
339 if( p->substrId ){
340 p->offset = findById(p->substrId)->offset + p->substrOffset;
341 }
342 }
343
344 /* Sort the table by offset */
345 qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3);
346
347 /* Figure out how big to make the hash table in order to minimize the
348 ** number of collisions */
349 bestSize = nKeyword;
350 bestCount = nKeyword*nKeyword;
351 for(i=nKeyword/2; i<=2*nKeyword; i++){
352 for(j=0; j<i; j++) aHash[j] = 0;
353 for(j=0; j<nKeyword; j++){
354 h = aKeywordTable[j].hash % i;
355 aHash[h] *= 2;
356 aHash[h]++;
357 }
358 for(j=count=0; j<i; j++) count += aHash[j];
359 if( count<bestCount ){
360 bestCount = count;
361 bestSize = i;
362 }
363 }
364
365 /* Compute the hash */
366 for(i=0; i<bestSize; i++) aHash[i] = 0;
367 for(i=0; i<nKeyword; i++){
368 h = aKeywordTable[i].hash % bestSize;
369 aKeywordTable[i].iNext = aHash[h];
370 aHash[h] = i+1;
371 }
372
373 /* Begin generating code */
374 printf("/* Hash score: %d */\n", bestCount);
375 printf("static int %skeywordCode(const char *z, int n){\n", prefix ? prefix : "");
376 printf(" /* zText[] encodes %d bytes of keywords in %d bytes */\n",
377 totalLen + nKeyword, nChar+1 );
378 for(i=j=k=0; i<nKeyword; i++){
379 Keyword *p = &aKeywordTable[i];
380 if( p->substrId ) continue;
381 memcpy(&zText[k], p->zName, p->len);
382 k += p->len;
383 if( j+p->len>70 ){
384 printf("%*s */\n", 74-j, "");
385 j = 0;
386 }
387 if( j==0 ){
388 printf(" /* ");
389 j = 8;
390 }
391 printf("%s", p->zName);
392 j += p->len;
393 }
394 if( j>0 ){
395 printf("%*s */\n", 74-j, "");
396 }
397 printf(" static const char %szText[%d] = {\n", prefix ? prefix : "", nChar);
398 zText[nChar] = 0;
399 for(i=j=0; i<k; i++){
400 if( j==0 ){
401 printf(" ");
402 }
403 if( zText[i]==0 ){
404 printf("0");
405 }else{
406 printf("'%c',", zText[i]);
407 }
408 j += 4;
409 if( j>68 ){
410 printf("\n");
411 j = 0;
412 }
413 }
414 if( j>0 ) printf("\n");
415 printf(" };\n");
416
417 printf(" static const unsigned int %saHash[%d] = {\n", prefix ? prefix : "", bestSize);
418 for(i=j=0; i<bestSize; i++){
419 if( j==0 ) printf(" ");
420 printf(" %d,", aHash[i]);
421 j++;
422 if( j>12 ){
423 printf("\n");
424 j = 0;
425 }
426 }
427 printf("%s };\n", j==0 ? "" : "\n");
428
429 printf(" static const unsigned int %saNext[%d] = {\n", prefix ? prefix : "", nKeyword);
430 for(i=j=0; i<nKeyword; i++){
431 if( j==0 ) printf(" ");
432 printf(" %3d,", aKeywordTable[i].iNext);
433 j++;
434 if( j>12 ){
435 printf("\n");
436 j = 0;
437 }
438 }
439 printf("%s };\n", j==0 ? "" : "\n");
440
441 printf(" static const unsigned char %saLen[%d] = {\n", prefix ? prefix : "", nKeyword);
442 for(i=j=0; i<nKeyword; i++){
443 if( j==0 ) printf(" ");
444 printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix);
445 j++;
446 if( j>12 ){
447 printf("\n");
448 j = 0;
449 }
450 }
451 printf("%s };\n", j==0 ? "" : "\n");
452
453 printf(" static const unsigned short int %saOffset[%d] = {\n", prefix ? prefix : "", nKeyword);
454 for(i=j=0; i<nKeyword; i++){
455 if( j==0 ) printf(" ");
456 printf(" %3d,", aKeywordTable[i].offset);
457 j++;
458 if( j>12 ){
459 printf("\n");
460 j = 0;
461 }
462 }
463 printf("%s };\n", j==0 ? "" : "\n");
464
465
466 printf(" int h, i;\n");
467 printf(" if( n<2 ) return 0;\n");
468 printf(" h = ((charMap(z[0])*4) ^\n"
469 " (charMap(z[n-1])*3) ^\n"
470 " n) %% %d;\n", bestSize);
471 printf(" for(i=((int)%saHash[h])-1; i>=0; i=((int)%saNext[i])-1){\n",
472 prefix ? prefix : "", prefix ? prefix : "");
473 printf(" if( %saLen[i]==n &&"
474 " casecmp(&%szText[%saOffset[i]],z,n)==0 ){\n",
475 prefix ? prefix : "",
476 prefix ? prefix : "",
477 prefix ? prefix : "");
478 printf(" return 1;\n");
479 printf(" }\n");
480 printf(" }\n");
481 printf(" return 0;\n");
482 printf("}\n");
483 printf("\nstatic gboolean\n%sis_keyword (const char *z)\n{\n", prefix ? prefix : "");
484 printf("\treturn %skeywordCode(z, strlen (z));\n", prefix ? prefix : "");
485 printf("}\n\n");
486
487 #ifdef TEST_RESERVED_WORDS
488 printf("\nstatic void\n%stest_keywords (void)\n{\n", prefix ? prefix : "");
489 printf("\tint i;\n");
490 printf("\tfor (i = 0; i < %d; i++) {\n", nKeyword);
491 printf("\t\tif (! %sis_keyword (%skeywords[i]))\n", prefix ? prefix : "", prefix ? prefix : "");
492 printf("\t\t\tg_print (\"KEYWORK %%s ignored!\\n\", %skeywords[i]);\n", prefix ? prefix : "");
493 printf("\t}\n");
494 printf("}\n\n");
495 #endif
496
497
498 return 0;
499 }
500