1 /*
2 ** Compile and run this standalone program in order to generate code that
3 ** implements a function that will translate alphabetic identifiers into
4 ** parser token codes.
5 */
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <assert.h>
10 
11 /* if TEST_RESERVED_WORDS is defined, then the test_keywords() function is created
12  * which displays errors for any keyword
13  */
14 #ifndef TEST_RESERVED_WORDS
15 #ifdef GDA_DEBUG
16 #define TEST_RESERVED_WORDS
17 #endif
18 #endif
19 
20 /*
21 ** A header comment placed at the beginning of generated code.
22 */
23 static const char zHdr[] =
24 	"/* file contains automatically generated code, DO NOT MODIFY *\n"
25 	" *\n"
26 	" * The code in this file implements a function that determines whether\n"
27 	" * or not a given identifier is really an SQL keyword.  The same thing\n"
28 	" * might be implemented more directly using a hand-written hash table.\n"
29 	" * But by using this automatically generated code, the size of the code\n"
30 	" * is substantially reduced.  This is important for embedded applications\n"
31 	" * on platforms with limited memory.\n"
32 	" *\n"
33 	" * This code has been copied from SQLite's mkkeywordhash.c file and modified.\n"
34 	" * to read the SQL keywords from a file instead of static ones\n"
35 	" */\n";
36 
37 /*
38 ** All the keywords of the SQL language are stored in a hash
39 ** table composed of instances of the following structure.
40 */
41 typedef struct Keyword Keyword;
42 struct Keyword {
43 	char *zName;         /* The keyword name */
44 	int id;              /* Unique ID for this record */
45 	int hash;            /* Hash on the keyword */
46 	int offset;          /* Offset to start of name string */
47 	int len;             /* Length of this keyword, not counting final \000 */
48 	int prefix;          /* Number of characters in prefix */
49 	int longestSuffix;   /* Longest suffix that is a prefix on another word */
50 	int iNext;           /* Index in aKeywordTable[] of next with same hash */
51 	int substrId;        /* Id to another keyword this keyword is embedded in */
52 	int substrOffset;    /* Offset into substrId for start of this keyword */
53 	char zOrigName[40];  /* Original keyword name before processing */
54 };
55 
56 Keyword *aKeywordTable = NULL;
57 int nKeyword = 0;
58 char *prefix = NULL;
59 
60 /* An array to map all upper-case characters into their corresponding
61 ** lower-case character.
62 */
63 const unsigned char UpperToLower[] = {
64 	0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
65 	18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
66 	36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
67 	54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
68 	104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
69 	122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
70 	108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
71 	126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
72 	144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
73 	162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
74 	180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
75 	198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
76 	216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
77 	234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
78 	252,253,254,255
79 };
80 
81 /*
82 ** Comparision function for two Keyword records
83 */
keywordCompare1(const void * a,const void * b)84 static int keywordCompare1(const void *a, const void *b){
85 	const Keyword *pA = (Keyword*)a;
86 	const Keyword *pB = (Keyword*)b;
87 	int n = pA->len - pB->len;
88 	if( n==0 ){
89 		n = strcmp(pA->zName, pB->zName);
90 	}
91 	return n;
92 }
keywordCompare2(const void * a,const void * b)93 static int keywordCompare2(const void *a, const void *b){
94 	const Keyword *pA = (Keyword*)a;
95 	const Keyword *pB = (Keyword*)b;
96 	int n = pB->longestSuffix - pA->longestSuffix;
97 	if( n==0 ){
98 		n = strcmp(pA->zName, pB->zName);
99 	}
100 	return n;
101 }
keywordCompare3(const void * a,const void * b)102 static int keywordCompare3(const void *a, const void *b){
103 	const Keyword *pA = (Keyword*)a;
104 	const Keyword *pB = (Keyword*)b;
105 	int n = pA->offset - pB->offset;
106 	return n;
107 }
108 
109 /*
110 ** Return a KeywordTable entry with the given id
111 */
findById(int id)112 static Keyword *findById(int id){
113 	int i;
114 	for(i=0; i<nKeyword; i++){
115 		if( aKeywordTable[i].id==id ) break;
116 	}
117 	return &aKeywordTable[i];
118 }
119 
120 /* @len is the number of chars, NOT including the \0 at the end */
121 static void
add_keywork(const char * keyword,int len)122 add_keywork (const char *keyword, int len)
123 {
124 #define MAXKEYWORDS 1000
125 	const char *ptr;
126 
127 	if (len == 0)
128 		return;
129 	for (ptr = keyword; *ptr && (ptr - keyword) < len; ptr++) {
130 		if (((*ptr < 'A') || (*ptr > 'Z')) &&
131 		    ((*ptr < 'a') || (*ptr > 'z')) &&
132 		    ((*ptr < '0') || (*ptr > '9')) &&
133 		    (*ptr != '_')) {
134 			/* ignore this keyword */
135 			return;
136 		}
137 	}
138 
139 	aKeywordTable [nKeyword].zName = malloc (sizeof (char) * (len + 1));
140 	memcpy (aKeywordTable [nKeyword].zName, keyword, sizeof (char) * len);
141 	aKeywordTable [nKeyword].zName [len] = 0;
142 
143 	printf (" * KEYWORD: '%s'\n", aKeywordTable [nKeyword].zName);
144 
145 	nKeyword ++;
146 	if (nKeyword > MAXKEYWORDS) {
147 		fprintf (stderr, "Maximum number of SQL keywords exceeded, "
148 			 "change the MAXKEYWORDS constant and recompile\n");
149 		exit (1);
150 	}
151 }
152 
153 static void
parse_input(const char * filename)154 parse_input (const char *filename)
155 {
156 #define BUFSIZE 500
157 	FILE *stream;
158         char buffer[BUFSIZE+1];
159         int read;
160         char *end;
161 
162 	stream = fopen (filename, "r");
163 	if (!stream)
164 		return;
165 
166 	printf ("\n/* Parsed keywords\n");
167 
168 	aKeywordTable = malloc (sizeof (Keyword) * MAXKEYWORDS);
169 	memset (aKeywordTable, 0, sizeof (Keyword) * MAXKEYWORDS);
170         read = fread (buffer, 1, BUFSIZE, stream);
171         end = buffer + read;
172 	*end = 0;
173         while (read > 0) {
174 		char *ptr;
175 
176                 /* read all complete lines in buffer */
177                 while (end > buffer) {
178                         char *hold = NULL;
179                         for (ptr = buffer; (ptr < end) && *ptr && (*ptr != '\n'); ptr++);
180                         if (ptr == end)
181                                 break;
182                         if (*ptr)
183                                 hold = ptr+1;
184                         *ptr = 0;
185 
186                         /* treat the line */
187 			if (*buffer && *buffer != '#') {
188 				char *tmp1, *tmp2;
189 				printf (" *\n * From line: %s\n", buffer);
190 				for (tmp1 = tmp2 = buffer; *tmp2; tmp2++) {
191 					if (((*tmp2 < 'A') || (*tmp2 > 'Z')) &&
192 					    ((*tmp2 < 'a') || (*tmp2 > 'z')) &&
193 					    ((*tmp2 < '0') || (*tmp2 > '9')) &&
194 					    (*tmp2 != '_')) {
195 						/* keyword found */
196 						add_keywork (tmp1, tmp2 - tmp1);
197 
198 						tmp1 = tmp2 + 1;
199 					}
200 				}
201 				if ((tmp1 != tmp2) && *tmp1)
202 					add_keywork (tmp1, tmp2 - tmp1);
203 			}
204 
205                         if (hold) {
206                                 int l = end - hold;
207                                 end -= hold - buffer;
208                                 memmove (buffer, hold, l);
209                         }
210                         else
211                                 break;
212                 }
213 
214 		read = fread (end, 1, BUFSIZE - (end - buffer), stream);
215                 end += read;
216 	}
217 
218 	printf (" */\n");
219 
220 #ifdef TEST_RESERVED_WORDS
221 	int i;
222 	printf ("static char *%skeywords[] = {\n", prefix ? prefix : "");
223 	for (i = 0; i < nKeyword; i++)
224 		printf ("\t\"%s\",\n", aKeywordTable [i].zName);
225 	printf ("};\n");
226 #endif
227 
228 	fclose (stream);
229 }
230 
231 /*
232 ** This routine does the work.  The generated code is printed on standard
233 ** output.
234 */
235 int
main(int argc,char ** argv)236 main (int argc, char **argv)
237 {
238 	int i, j, k, h;
239 	int bestSize, bestCount;
240 	int count;
241 	int nChar;
242 	int totalLen = 0;
243 	int aHash[5000];  /* 2000 is much bigger than nKeyword */
244 	char zText[10000];
245 
246 	if ((argc < 2)  || (argc > 3)) {
247 		printf ("Usage: %s <file with SQL keywords> [<prefix>]\n", argv[0]);
248 		return 1;
249 	}
250 	if (argc == 3)
251 		prefix = argv[2];
252 
253 	printf("%s", zHdr);
254 	parse_input (argv[1]);
255 	if (!aKeywordTable) {
256 		printf ("Error parsing file '%s', aborting\n", argv[1]);
257 		return 1;
258 	}
259 
260 	/* Fill in the lengths of strings and hashes for all entries. */
261 	for(i=0; i<nKeyword; i++){
262 		Keyword *p = &aKeywordTable[i];
263 		p->len = strlen(p->zName);
264 		assert( p->len<sizeof(p->zOrigName) );
265 		strcpy(p->zOrigName, p->zName);
266 		totalLen += p->len;
267 		p->hash = (UpperToLower[(int)p->zName[0]]*4) ^
268 			(UpperToLower[(int)p->zName[p->len-1]] *3) ^ p->len;
269 		p->id = i+1;
270 	}
271 
272 	/* Sort the table from shortest to longest keyword */
273 	qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1);
274 
275 	/* Look for short keywords embedded in longer keywords */
276 	for(i=nKeyword-2; i>=0; i--){
277 		Keyword *p = &aKeywordTable[i];
278 		for(j=nKeyword-1; j>i && p->substrId==0; j--){
279 			Keyword *pOther = &aKeywordTable[j];
280 			if( pOther->substrId ) continue;
281 			if( pOther->len<=p->len ) continue;
282 			for(k=0; k<=pOther->len-p->len; k++){
283 				if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){
284 					p->substrId = pOther->id;
285 					p->substrOffset = k;
286 					break;
287 				}
288 			}
289 		}
290 	}
291 
292 	/* Compute the longestSuffix value for every word */
293 	for(i=0; i<nKeyword; i++){
294 		Keyword *p = &aKeywordTable[i];
295 		if( p->substrId ) continue;
296 		for(j=0; j<nKeyword; j++){
297 			Keyword *pOther;
298 			if( j==i ) continue;
299 			pOther = &aKeywordTable[j];
300 			if( pOther->substrId ) continue;
301 			for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){
302 				if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
303 					p->longestSuffix = k;
304 				}
305 			}
306 		}
307 	}
308 
309 	/* Sort the table into reverse order by length */
310 	qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2);
311 
312 	/* Fill in the offset for all entries */
313 	nChar = 0;
314 	for(i=0; i<nKeyword; i++){
315 		Keyword *p = &aKeywordTable[i];
316 		if( p->offset>0 || p->substrId ) continue;
317 		p->offset = nChar;
318 		nChar += p->len;
319 		for(k=p->len-1; k>=1; k--){
320 			for(j=i+1; j<nKeyword; j++){
321 				Keyword *pOther = &aKeywordTable[j];
322 				if( pOther->offset>0 || pOther->substrId ) continue;
323 				if( pOther->len<=k ) continue;
324 				if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
325 					p = pOther;
326 					p->offset = nChar - k;
327 					nChar = p->offset + p->len;
328 					p->zName += k;
329 					p->len -= k;
330 					p->prefix = k;
331 					j = i;
332 					k = p->len;
333 				}
334 			}
335 		}
336 	}
337 	for(i=0; i<nKeyword; i++){
338 		Keyword *p = &aKeywordTable[i];
339 		if( p->substrId ){
340 			p->offset = findById(p->substrId)->offset + p->substrOffset;
341 		}
342 	}
343 
344 	/* Sort the table by offset */
345 	qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3);
346 
347 	/* Figure out how big to make the hash table in order to minimize the
348 	** number of collisions */
349 	bestSize = nKeyword;
350 	bestCount = nKeyword*nKeyword;
351 	for(i=nKeyword/2; i<=2*nKeyword; i++){
352 		for(j=0; j<i; j++) aHash[j] = 0;
353 		for(j=0; j<nKeyword; j++){
354 			h = aKeywordTable[j].hash % i;
355 			aHash[h] *= 2;
356 			aHash[h]++;
357 		}
358 		for(j=count=0; j<i; j++) count += aHash[j];
359 		if( count<bestCount ){
360 			bestCount = count;
361 			bestSize = i;
362 		}
363 	}
364 
365 	/* Compute the hash */
366 	for(i=0; i<bestSize; i++) aHash[i] = 0;
367 	for(i=0; i<nKeyword; i++){
368 		h = aKeywordTable[i].hash % bestSize;
369 		aKeywordTable[i].iNext = aHash[h];
370 		aHash[h] = i+1;
371 	}
372 
373 	/* Begin generating code */
374 	printf("/* Hash score: %d */\n", bestCount);
375 	printf("static int %skeywordCode(const char *z, int n){\n", prefix ? prefix : "");
376 	printf("  /* zText[] encodes %d bytes of keywords in %d bytes */\n",
377 	       totalLen + nKeyword, nChar+1 );
378 	for(i=j=k=0; i<nKeyword; i++){
379 		Keyword *p = &aKeywordTable[i];
380 		if( p->substrId ) continue;
381 		memcpy(&zText[k], p->zName, p->len);
382 		k += p->len;
383 		if( j+p->len>70 ){
384 			printf("%*s */\n", 74-j, "");
385 			j = 0;
386 		}
387 		if( j==0 ){
388 			printf("  /*   ");
389 			j = 8;
390 		}
391 		printf("%s", p->zName);
392 		j += p->len;
393 	}
394 	if( j>0 ){
395 		printf("%*s */\n", 74-j, "");
396 	}
397 	printf("  static const char %szText[%d] = {\n", prefix ? prefix : "", nChar);
398 	zText[nChar] = 0;
399 	for(i=j=0; i<k; i++){
400 		if( j==0 ){
401 			printf("    ");
402 		}
403 		if( zText[i]==0 ){
404 			printf("0");
405 		}else{
406 			printf("'%c',", zText[i]);
407 		}
408 		j += 4;
409 		if( j>68 ){
410 			printf("\n");
411 			j = 0;
412 		}
413 	}
414 	if( j>0 ) printf("\n");
415 	printf("  };\n");
416 
417 	printf("  static const unsigned int %saHash[%d] = {\n", prefix ? prefix : "", bestSize);
418 	for(i=j=0; i<bestSize; i++){
419 		if( j==0 ) printf("    ");
420 		printf(" %d,", aHash[i]);
421 		j++;
422 		if( j>12 ){
423 			printf("\n");
424 			j = 0;
425 		}
426 	}
427 	printf("%s  };\n", j==0 ? "" : "\n");
428 
429 	printf("  static const unsigned int %saNext[%d] = {\n", prefix ? prefix : "", nKeyword);
430 	for(i=j=0; i<nKeyword; i++){
431 		if( j==0 ) printf("    ");
432 		printf(" %3d,", aKeywordTable[i].iNext);
433 		j++;
434 		if( j>12 ){
435 			printf("\n");
436 			j = 0;
437 		}
438 	}
439 	printf("%s  };\n", j==0 ? "" : "\n");
440 
441 	printf("  static const unsigned char %saLen[%d] = {\n", prefix ? prefix : "", nKeyword);
442 	for(i=j=0; i<nKeyword; i++){
443 		if( j==0 ) printf("    ");
444 		printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix);
445 		j++;
446 		if( j>12 ){
447 			printf("\n");
448 			j = 0;
449 		}
450 	}
451 	printf("%s  };\n", j==0 ? "" : "\n");
452 
453 	printf("  static const unsigned short int %saOffset[%d] = {\n", prefix ? prefix : "", nKeyword);
454 	for(i=j=0; i<nKeyword; i++){
455 		if( j==0 ) printf("    ");
456 		printf(" %3d,", aKeywordTable[i].offset);
457 		j++;
458 		if( j>12 ){
459 			printf("\n");
460 			j = 0;
461 		}
462 	}
463 	printf("%s  };\n", j==0 ? "" : "\n");
464 
465 
466 	printf("  int h, i;\n");
467 	printf("  if( n<2 ) return 0;\n");
468 	printf("  h = ((charMap(z[0])*4) ^\n"
469 	       "      (charMap(z[n-1])*3) ^\n"
470 	       "      n) %% %d;\n", bestSize);
471 	printf("  for(i=((int)%saHash[h])-1; i>=0; i=((int)%saNext[i])-1){\n",
472 	       prefix ? prefix : "", prefix ? prefix : "");
473 	printf("    if( %saLen[i]==n &&"
474 	       " casecmp(&%szText[%saOffset[i]],z,n)==0 ){\n",
475 	       prefix ? prefix : "",
476 	       prefix ? prefix : "",
477 	       prefix ? prefix : "");
478 	printf("      return 1;\n");
479 	printf("    }\n");
480 	printf("  }\n");
481 	printf("  return 0;\n");
482 	printf("}\n");
483 	printf("\nstatic gboolean\n%sis_keyword (const char *z)\n{\n", prefix ? prefix : "");
484 	printf("\treturn %skeywordCode(z, strlen (z));\n", prefix ? prefix : "");
485 	printf("}\n\n");
486 
487 #ifdef TEST_RESERVED_WORDS
488 	printf("\nstatic void\n%stest_keywords (void)\n{\n", prefix ? prefix : "");
489 	printf("\tint i;\n");
490 	printf("\tfor (i = 0; i < %d; i++) {\n", nKeyword);
491 	printf("\t\tif (! %sis_keyword (%skeywords[i]))\n", prefix ? prefix : "", prefix ? prefix : "");
492 	printf("\t\t\tg_print (\"KEYWORK %%s ignored!\\n\", %skeywords[i]);\n", prefix ? prefix : "");
493 	printf("\t}\n");
494 	printf("}\n\n");
495 #endif
496 
497 
498 	return 0;
499 }
500