1 /*
2 ** Compile and run this standalone program in order to generate code that
3 ** implements a function that will translate alphabetic identifiers into
4 ** parser token codes.
5 */
6 #include <stdio.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <assert.h>
10 
11 /*
12 ** A header comment placed at the beginning of generated code.
13 */
14 static const char zHdr[] =
15   "/***** This file contains automatically generated code ******\n"
16   "**\n"
17   "** The code in this file has been automatically generated by\n"
18   "**\n"
19   "**   sqlite/tool/mkkeywordhash.c\n"
20   "**\n"
21   "** The code in this file implements a function that determines whether\n"
22   "** or not a given identifier is really an SQL keyword.  The same thing\n"
23   "** might be implemented more directly using a hand-written hash table.\n"
24   "** But by using this automatically generated code, the size of the code\n"
25   "** is substantially reduced.  This is important for embedded applications\n"
26   "** on platforms with limited memory.\n"
27   "*/\n"
28 ;
29 
30 /*
31 ** All the keywords of the SQL language are stored in a hash
32 ** table composed of instances of the following structure.
33 */
34 typedef struct Keyword Keyword;
35 struct Keyword {
36   char *zName;         /* The keyword name */
37   char *zTokenType;    /* Token value for this keyword */
38   int mask;            /* Code this keyword if non-zero */
39   int priority;        /* Put higher priorities earlier in the hash chain */
40   int id;              /* Unique ID for this record */
41   int hash;            /* Hash on the keyword */
42   int offset;          /* Offset to start of name string */
43   int len;             /* Length of this keyword, not counting final \000 */
44   int prefix;          /* Number of characters in prefix */
45   int longestSuffix;   /* Longest suffix that is a prefix on another word */
46   int iNext;           /* Index in aKeywordTable[] of next with same hash */
47   int substrId;        /* Id to another keyword this keyword is embedded in */
48   int substrOffset;    /* Offset into substrId for start of this keyword */
49   char zOrigName[20];  /* Original keyword name before processing */
50 };
51 
52 /*
53 ** Define masks used to determine which keywords are allowed
54 */
55 #ifdef SQLITE_OMIT_ALTERTABLE
56 #  define ALTER      0
57 #else
58 #  define ALTER      0x00000001
59 #endif
60 #define ALWAYS       0x00000002
61 #ifdef SQLITE_OMIT_ANALYZE
62 #  define ANALYZE    0
63 #else
64 #  define ANALYZE    0x00000004
65 #endif
66 #ifdef SQLITE_OMIT_ATTACH
67 #  define ATTACH     0
68 #else
69 #  define ATTACH     0x00000008
70 #endif
71 #ifdef SQLITE_OMIT_AUTOINCREMENT
72 #  define AUTOINCR   0
73 #else
74 #  define AUTOINCR   0x00000010
75 #endif
76 #ifdef SQLITE_OMIT_CAST
77 #  define CAST       0
78 #else
79 #  define CAST       0x00000020
80 #endif
81 #ifdef SQLITE_OMIT_COMPOUND_SELECT
82 #  define COMPOUND   0
83 #else
84 #  define COMPOUND   0x00000040
85 #endif
86 #ifdef SQLITE_OMIT_CONFLICT_CLAUSE
87 #  define CONFLICT   0
88 #else
89 #  define CONFLICT   0x00000080
90 #endif
91 #ifdef SQLITE_OMIT_EXPLAIN
92 #  define EXPLAIN    0
93 #else
94 #  define EXPLAIN    0x00000100
95 #endif
96 #ifdef SQLITE_OMIT_FOREIGN_KEY
97 #  define FKEY       0
98 #else
99 #  define FKEY       0x00000200
100 #endif
101 #ifdef SQLITE_OMIT_PRAGMA
102 #  define PRAGMA     0
103 #else
104 #  define PRAGMA     0x00000400
105 #endif
106 #ifdef SQLITE_OMIT_REINDEX
107 #  define REINDEX    0
108 #else
109 #  define REINDEX    0x00000800
110 #endif
111 #ifdef SQLITE_OMIT_SUBQUERY
112 #  define SUBQUERY   0
113 #else
114 #  define SUBQUERY   0x00001000
115 #endif
116 #ifdef SQLITE_OMIT_TRIGGER
117 #  define TRIGGER    0
118 #else
119 #  define TRIGGER    0x00002000
120 #endif
121 #if defined(SQLITE_OMIT_AUTOVACUUM) && \
122     (defined(SQLITE_OMIT_VACUUM) || defined(SQLITE_OMIT_ATTACH))
123 #  define VACUUM     0
124 #else
125 #  define VACUUM     0x00004000
126 #endif
127 #ifdef SQLITE_OMIT_VIEW
128 #  define VIEW       0
129 #else
130 #  define VIEW       0x00008000
131 #endif
132 #ifdef SQLITE_OMIT_VIRTUALTABLE
133 #  define VTAB       0
134 #else
135 #  define VTAB       0x00010000
136 #endif
137 #ifdef SQLITE_OMIT_AUTOVACUUM
138 #  define AUTOVACUUM 0
139 #else
140 #  define AUTOVACUUM 0x00020000
141 #endif
142 #ifdef SQLITE_OMIT_CTE
143 #  define CTE        0
144 #else
145 #  define CTE        0x00040000
146 #endif
147 #ifdef SQLITE_OMIT_UPSERT
148 #  define UPSERT     0
149 #else
150 #  define UPSERT     0x00080000
151 #endif
152 #ifdef SQLITE_OMIT_WINDOWFUNC
153 #  define WINDOWFUNC 0
154 #else
155 #  define WINDOWFUNC 0x00100000
156 #endif
157 #ifdef SQLITE_OMIT_GENERATED_COLUMNS
158 #  define GENCOL     0
159 #else
160 #  define GENCOL     0x00200000
161 #endif
162 #ifdef SQLITE_OMIT_RETURNING
163 #  define RETURNING  0
164 #else
165 #  define RETURNING  0x00400000
166 #endif
167 
168 
169 /*
170 ** These are the keywords
171 */
172 static Keyword aKeywordTable[] = {
173   { "ABORT",            "TK_ABORT",        CONFLICT|TRIGGER, 0      },
174   { "ACTION",           "TK_ACTION",       FKEY,             0      },
175   { "ADD",              "TK_ADD",          ALTER,            1      },
176   { "AFTER",            "TK_AFTER",        TRIGGER,          0      },
177   { "ALL",              "TK_ALL",          ALWAYS,           0      },
178   { "ALTER",            "TK_ALTER",        ALTER,            0      },
179   { "ALWAYS",           "TK_ALWAYS",       GENCOL,           0      },
180   { "ANALYZE",          "TK_ANALYZE",      ANALYZE,          0      },
181   { "AND",              "TK_AND",          ALWAYS,           10     },
182   { "AS",               "TK_AS",           ALWAYS,           10     },
183   { "ASC",              "TK_ASC",          ALWAYS,           0      },
184   { "ATTACH",           "TK_ATTACH",       ATTACH,           1      },
185   { "AUTOINCREMENT",    "TK_AUTOINCR",     AUTOINCR,         0      },
186   { "BEFORE",           "TK_BEFORE",       TRIGGER,          0      },
187   { "BEGIN",            "TK_BEGIN",        ALWAYS,           1      },
188   { "BETWEEN",          "TK_BETWEEN",      ALWAYS,           5      },
189   { "BY",               "TK_BY",           ALWAYS,           10     },
190   { "CASCADE",          "TK_CASCADE",      FKEY,             1      },
191   { "CASE",             "TK_CASE",         ALWAYS,           5      },
192   { "CAST",             "TK_CAST",         CAST,             5      },
193   { "CHECK",            "TK_CHECK",        ALWAYS,           1      },
194   { "COLLATE",          "TK_COLLATE",      ALWAYS,           1      },
195   { "COLUMN",           "TK_COLUMNKW",     ALTER,            1      },
196   { "COMMIT",           "TK_COMMIT",       ALWAYS,           1      },
197   { "CONFLICT",         "TK_CONFLICT",     CONFLICT,         0      },
198   { "CONSTRAINT",       "TK_CONSTRAINT",   ALWAYS,           1      },
199   { "CREATE",           "TK_CREATE",       ALWAYS,           2      },
200   { "CROSS",            "TK_JOIN_KW",      ALWAYS,           3      },
201   { "CURRENT",          "TK_CURRENT",      WINDOWFUNC,       1      },
202   { "CURRENT_DATE",     "TK_CTIME_KW",     ALWAYS,           1      },
203   { "CURRENT_TIME",     "TK_CTIME_KW",     ALWAYS,           1      },
204   { "CURRENT_TIMESTAMP","TK_CTIME_KW",     ALWAYS,           1      },
205   { "DATABASE",         "TK_DATABASE",     ATTACH,           0      },
206   { "DEFAULT",          "TK_DEFAULT",      ALWAYS,           1      },
207   { "DEFERRED",         "TK_DEFERRED",     ALWAYS,           1      },
208   { "DEFERRABLE",       "TK_DEFERRABLE",   FKEY,             1      },
209   { "DELETE",           "TK_DELETE",       ALWAYS,           10     },
210   { "DESC",             "TK_DESC",         ALWAYS,           3      },
211   { "DETACH",           "TK_DETACH",       ATTACH,           0      },
212   { "DISTINCT",         "TK_DISTINCT",     ALWAYS,           5      },
213   { "DO",               "TK_DO",           UPSERT,           2      },
214   { "DROP",             "TK_DROP",         ALWAYS,           1      },
215   { "END",              "TK_END",          ALWAYS,           1      },
216   { "EACH",             "TK_EACH",         TRIGGER,          1      },
217   { "ELSE",             "TK_ELSE",         ALWAYS,           2      },
218   { "ESCAPE",           "TK_ESCAPE",       ALWAYS,           4      },
219   { "EXCEPT",           "TK_EXCEPT",       COMPOUND,         4      },
220   { "EXCLUSIVE",        "TK_EXCLUSIVE",    ALWAYS,           1      },
221   { "EXCLUDE",          "TK_EXCLUDE",      WINDOWFUNC,       1      },
222   { "EXISTS",           "TK_EXISTS",       ALWAYS,           4      },
223   { "EXPLAIN",          "TK_EXPLAIN",      EXPLAIN,          1      },
224   { "FAIL",             "TK_FAIL",         CONFLICT|TRIGGER, 1      },
225   { "FILTER",           "TK_FILTER",       WINDOWFUNC,       4      },
226   { "FIRST",            "TK_FIRST",        ALWAYS,           4      },
227   { "FOLLOWING",        "TK_FOLLOWING",    WINDOWFUNC,       4      },
228   { "FOR",              "TK_FOR",          TRIGGER,          2      },
229   { "FOREIGN",          "TK_FOREIGN",      FKEY,             1      },
230   { "FROM",             "TK_FROM",         ALWAYS,           10     },
231   { "FULL",             "TK_JOIN_KW",      ALWAYS,           3      },
232   { "GENERATED",        "TK_GENERATED",    ALWAYS,           1      },
233   { "GLOB",             "TK_LIKE_KW",      ALWAYS,           3      },
234   { "GROUP",            "TK_GROUP",        ALWAYS,           5      },
235   { "GROUPS",           "TK_GROUPS",       WINDOWFUNC,       2      },
236   { "HAVING",           "TK_HAVING",       ALWAYS,           5      },
237   { "IF",               "TK_IF",           ALWAYS,           2      },
238   { "IGNORE",           "TK_IGNORE",       CONFLICT|TRIGGER, 1      },
239   { "IMMEDIATE",        "TK_IMMEDIATE",    ALWAYS,           1      },
240   { "IN",               "TK_IN",           ALWAYS,           10     },
241   { "INDEX",            "TK_INDEX",        ALWAYS,           1      },
242   { "INDEXED",          "TK_INDEXED",      ALWAYS,           0      },
243   { "INITIALLY",        "TK_INITIALLY",    FKEY,             1      },
244   { "INNER",            "TK_JOIN_KW",      ALWAYS,           1      },
245   { "INSERT",           "TK_INSERT",       ALWAYS,           10     },
246   { "INSTEAD",          "TK_INSTEAD",      TRIGGER,          1      },
247   { "INTERSECT",        "TK_INTERSECT",    COMPOUND,         5      },
248   { "INTO",             "TK_INTO",         ALWAYS,           10     },
249   { "IS",               "TK_IS",           ALWAYS,           5      },
250   { "ISNULL",           "TK_ISNULL",       ALWAYS,           5      },
251   { "JOIN",             "TK_JOIN",         ALWAYS,           5      },
252   { "KEY",              "TK_KEY",          ALWAYS,           1      },
253   { "LAST",             "TK_LAST",         ALWAYS,           4      },
254   { "LEFT",             "TK_JOIN_KW",      ALWAYS,           5      },
255   { "LIKE",             "TK_LIKE_KW",      ALWAYS,           5      },
256   { "LIMIT",            "TK_LIMIT",        ALWAYS,           3      },
257   { "MATCH",            "TK_MATCH",        ALWAYS,           2      },
258   { "MATERIALIZED",     "TK_MATERIALIZED", CTE,              12     },
259   { "NATURAL",          "TK_JOIN_KW",      ALWAYS,           3      },
260   { "NO",               "TK_NO",           FKEY|WINDOWFUNC,  2      },
261   { "NOT",              "TK_NOT",          ALWAYS,           10     },
262   { "NOTHING",          "TK_NOTHING",      UPSERT,           1      },
263   { "NOTNULL",          "TK_NOTNULL",      ALWAYS,           3      },
264   { "NULL",             "TK_NULL",         ALWAYS,           10     },
265   { "NULLS",            "TK_NULLS",        ALWAYS,           3      },
266   { "OF",               "TK_OF",           ALWAYS,           3      },
267   { "OFFSET",           "TK_OFFSET",       ALWAYS,           1      },
268   { "ON",               "TK_ON",           ALWAYS,           1      },
269   { "OR",               "TK_OR",           ALWAYS,           9      },
270   { "ORDER",            "TK_ORDER",        ALWAYS,           10     },
271   { "OTHERS",           "TK_OTHERS",       WINDOWFUNC,       3      },
272   { "OUTER",            "TK_JOIN_KW",      ALWAYS,           5      },
273   { "OVER",             "TK_OVER",         WINDOWFUNC,       3      },
274   { "PARTITION",        "TK_PARTITION",    WINDOWFUNC,       3      },
275   { "PLAN",             "TK_PLAN",         EXPLAIN,          0      },
276   { "PRAGMA",           "TK_PRAGMA",       PRAGMA,           0      },
277   { "PRECEDING",        "TK_PRECEDING",    WINDOWFUNC,       3      },
278   { "PRIMARY",          "TK_PRIMARY",      ALWAYS,           1      },
279   { "QUERY",            "TK_QUERY",        EXPLAIN,          0      },
280   { "RAISE",            "TK_RAISE",        TRIGGER,          1      },
281   { "RANGE",            "TK_RANGE",        WINDOWFUNC,       3      },
282   { "RECURSIVE",        "TK_RECURSIVE",    CTE,              3      },
283   { "REFERENCES",       "TK_REFERENCES",   FKEY,             1      },
284   { "REGEXP",           "TK_LIKE_KW",      ALWAYS,           3      },
285   { "REINDEX",          "TK_REINDEX",      REINDEX,          1      },
286   { "RELEASE",          "TK_RELEASE",      ALWAYS,           1      },
287   { "RENAME",           "TK_RENAME",       ALTER,            1      },
288   { "REPLACE",          "TK_REPLACE",      CONFLICT,         10     },
289   { "RESTRICT",         "TK_RESTRICT",     FKEY,             1      },
290   { "RETURNING",        "TK_RETURNING",    RETURNING,        10     },
291   { "RIGHT",            "TK_JOIN_KW",      ALWAYS,           0      },
292   { "ROLLBACK",         "TK_ROLLBACK",     ALWAYS,           1      },
293   { "ROW",              "TK_ROW",          TRIGGER,          1      },
294   { "ROWS",             "TK_ROWS",         ALWAYS,           1      },
295   { "SAVEPOINT",        "TK_SAVEPOINT",    ALWAYS,           1      },
296   { "SELECT",           "TK_SELECT",       ALWAYS,           10     },
297   { "SET",              "TK_SET",          ALWAYS,           10     },
298   { "TABLE",            "TK_TABLE",        ALWAYS,           1      },
299   { "TEMP",             "TK_TEMP",         ALWAYS,           1      },
300   { "TEMPORARY",        "TK_TEMP",         ALWAYS,           1      },
301   { "THEN",             "TK_THEN",         ALWAYS,           3      },
302   { "TIES",             "TK_TIES",         WINDOWFUNC,       3      },
303   { "TO",               "TK_TO",           ALWAYS,           3      },
304   { "TRANSACTION",      "TK_TRANSACTION",  ALWAYS,           1      },
305   { "TRIGGER",          "TK_TRIGGER",      TRIGGER,          1      },
306   { "UNBOUNDED",        "TK_UNBOUNDED",    WINDOWFUNC,       3      },
307   { "UNION",            "TK_UNION",        COMPOUND,         3      },
308   { "UNIQUE",           "TK_UNIQUE",       ALWAYS,           1      },
309   { "UPDATE",           "TK_UPDATE",       ALWAYS,           10     },
310   { "USING",            "TK_USING",        ALWAYS,           8      },
311   { "VACUUM",           "TK_VACUUM",       VACUUM,           1      },
312   { "VALUES",           "TK_VALUES",       ALWAYS,           10     },
313   { "VIEW",             "TK_VIEW",         VIEW,             1      },
314   { "VIRTUAL",          "TK_VIRTUAL",      VTAB,             1      },
315   { "WHEN",             "TK_WHEN",         ALWAYS,           1      },
316   { "WHERE",            "TK_WHERE",        ALWAYS,           10     },
317   { "WINDOW",           "TK_WINDOW",       WINDOWFUNC,       3      },
318   { "WITH",             "TK_WITH",         CTE,              4      },
319   { "WITHOUT",          "TK_WITHOUT",      ALWAYS,           1      },
320 };
321 
322 /* Number of keywords */
323 static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]));
324 
325 /* Map all alphabetic characters into lower-case for hashing.  This is
326 ** only valid for alphabetics.  In particular it does not work for '_'
327 ** and so the hash cannot be on a keyword position that might be an '_'.
328 */
329 #define charMap(X)   (0x20|(X))
330 
331 /*
332 ** Comparision function for two Keyword records
333 */
keywordCompare1(const void * a,const void * b)334 static int keywordCompare1(const void *a, const void *b){
335   const Keyword *pA = (Keyword*)a;
336   const Keyword *pB = (Keyword*)b;
337   int n = pA->len - pB->len;
338   if( n==0 ){
339     n = strcmp(pA->zName, pB->zName);
340   }
341   assert( n!=0 );
342   return n;
343 }
keywordCompare2(const void * a,const void * b)344 static int keywordCompare2(const void *a, const void *b){
345   const Keyword *pA = (Keyword*)a;
346   const Keyword *pB = (Keyword*)b;
347   int n = pB->longestSuffix - pA->longestSuffix;
348   if( n==0 ){
349     n = strcmp(pA->zName, pB->zName);
350   }
351   assert( n!=0 );
352   return n;
353 }
keywordCompare3(const void * a,const void * b)354 static int keywordCompare3(const void *a, const void *b){
355   const Keyword *pA = (Keyword*)a;
356   const Keyword *pB = (Keyword*)b;
357   int n = pA->offset - pB->offset;
358   if( n==0 ) n = pB->id - pA->id;
359   assert( n!=0 );
360   return n;
361 }
362 
363 /*
364 ** Return a KeywordTable entry with the given id
365 */
findById(int id)366 static Keyword *findById(int id){
367   int i;
368   for(i=0; i<nKeyword; i++){
369     if( aKeywordTable[i].id==id ) break;
370   }
371   return &aKeywordTable[i];
372 }
373 
374 /*
375 ** If aKeyword[*pFrom-1].iNext has a higher priority that aKeyword[*pFrom-1]
376 ** itself, then swap them.
377 */
reorder(int * pFrom)378 static void reorder(int *pFrom){
379   int i = *pFrom - 1;
380   int j;
381   if( i<0 ) return;
382   j = aKeywordTable[i].iNext;
383   if( j==0 ) return;
384   j--;
385   if( aKeywordTable[i].priority >= aKeywordTable[j].priority ) return;
386   aKeywordTable[i].iNext = aKeywordTable[j].iNext;
387   aKeywordTable[j].iNext = i+1;
388   *pFrom = j+1;
389   reorder(&aKeywordTable[i].iNext);
390 }
391 
392 /* Parameter to the hash function
393 */
394 #define HASH_OP ^
395 #define HASH_CC '^'
396 #define HASH_C0 4
397 #define HASH_C1 3
398 #define HASH_C2 1
399 
400 /*
401 ** This routine does the work.  The generated code is printed on standard
402 ** output.
403 */
main(int argc,char ** argv)404 int main(int argc, char **argv){
405   int i, j, k, h;
406   int bestSize, bestCount;
407   int count;
408   int nChar;
409   int totalLen = 0;
410   int aKWHash[1000];  /* 1000 is much bigger than nKeyword */
411   char zKWText[2000];
412 
413   /* Remove entries from the list of keywords that have mask==0 */
414   for(i=j=0; i<nKeyword; i++){
415     if( aKeywordTable[i].mask==0 ) continue;
416     if( j<i ){
417       aKeywordTable[j] = aKeywordTable[i];
418     }
419     j++;
420   }
421   nKeyword = j;
422 
423   /* Fill in the lengths of strings and hashes for all entries. */
424   for(i=0; i<nKeyword; i++){
425     Keyword *p = &aKeywordTable[i];
426     p->len = (int)strlen(p->zName);
427     assert( p->len<sizeof(p->zOrigName) );
428     memcpy(p->zOrigName, p->zName, p->len+1);
429     totalLen += p->len;
430     p->hash = (charMap(p->zName[0])*HASH_C0) HASH_OP
431               (charMap(p->zName[p->len-1])*HASH_C1) HASH_OP
432               (p->len*HASH_C2);
433     p->id = i+1;
434   }
435 
436   /* Sort the table from shortest to longest keyword */
437   qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1);
438 
439   /* Look for short keywords embedded in longer keywords */
440   for(i=nKeyword-2; i>=0; i--){
441     Keyword *p = &aKeywordTable[i];
442     for(j=nKeyword-1; j>i && p->substrId==0; j--){
443       Keyword *pOther = &aKeywordTable[j];
444       if( pOther->substrId ) continue;
445       if( pOther->len<=p->len ) continue;
446       for(k=0; k<=pOther->len-p->len; k++){
447         if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){
448           p->substrId = pOther->id;
449           p->substrOffset = k;
450           break;
451         }
452       }
453     }
454   }
455 
456   /* Compute the longestSuffix value for every word */
457   for(i=0; i<nKeyword; i++){
458     Keyword *p = &aKeywordTable[i];
459     if( p->substrId ) continue;
460     for(j=0; j<nKeyword; j++){
461       Keyword *pOther;
462       if( j==i ) continue;
463       pOther = &aKeywordTable[j];
464       if( pOther->substrId ) continue;
465       for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){
466         if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
467           p->longestSuffix = k;
468         }
469       }
470     }
471   }
472 
473   /* Sort the table into reverse order by length */
474   qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2);
475 
476   /* Fill in the offset for all entries */
477   nChar = 0;
478   for(i=0; i<nKeyword; i++){
479     Keyword *p = &aKeywordTable[i];
480     if( p->offset>0 || p->substrId ) continue;
481     p->offset = nChar;
482     nChar += p->len;
483     for(k=p->len-1; k>=1; k--){
484       for(j=i+1; j<nKeyword; j++){
485         Keyword *pOther = &aKeywordTable[j];
486         if( pOther->offset>0 || pOther->substrId ) continue;
487         if( pOther->len<=k ) continue;
488         if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
489           p = pOther;
490           p->offset = nChar - k;
491           nChar = p->offset + p->len;
492           p->zName += k;
493           p->len -= k;
494           p->prefix = k;
495           j = i;
496           k = p->len;
497         }
498       }
499     }
500   }
501   for(i=0; i<nKeyword; i++){
502     Keyword *p = &aKeywordTable[i];
503     if( p->substrId ){
504       p->offset = findById(p->substrId)->offset + p->substrOffset;
505     }
506   }
507 
508   /* Sort the table by offset */
509   qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3);
510 
511   /* Figure out how big to make the hash table in order to minimize the
512   ** number of collisions */
513   bestSize = nKeyword;
514   bestCount = nKeyword*nKeyword;
515   for(i=nKeyword/2; i<=2*nKeyword; i++){
516     for(j=0; j<i; j++) aKWHash[j] = 0;
517     for(j=0; j<nKeyword; j++){
518       h = aKeywordTable[j].hash % i;
519       aKWHash[h] *= 2;
520       aKWHash[h]++;
521     }
522     for(j=count=0; j<i; j++) count += aKWHash[j];
523     if( count<bestCount ){
524       bestCount = count;
525       bestSize = i;
526     }
527   }
528 
529   /* Compute the hash */
530   for(i=0; i<bestSize; i++) aKWHash[i] = 0;
531   for(i=0; i<nKeyword; i++){
532     h = aKeywordTable[i].hash % bestSize;
533     aKeywordTable[i].iNext = aKWHash[h];
534     aKWHash[h] = i+1;
535     reorder(&aKWHash[h]);
536   }
537 
538   /* Begin generating code */
539   printf("%s", zHdr);
540   printf("/* Hash score: %d */\n", bestCount);
541   printf("/* zKWText[] encodes %d bytes of keyword text in %d bytes */\n",
542           totalLen + nKeyword, nChar+1 );
543   for(i=j=k=0; i<nKeyword; i++){
544     Keyword *p = &aKeywordTable[i];
545     if( p->substrId ) continue;
546     memcpy(&zKWText[k], p->zName, p->len);
547     k += p->len;
548     if( j+p->len>70 ){
549       printf("%*s */\n", 74-j, "");
550       j = 0;
551     }
552     if( j==0 ){
553       printf("/*   ");
554       j = 8;
555     }
556     printf("%s", p->zName);
557     j += p->len;
558   }
559   if( j>0 ){
560     printf("%*s */\n", 74-j, "");
561   }
562   printf("static const char zKWText[%d] = {\n", nChar);
563   zKWText[nChar] = 0;
564   for(i=j=0; i<k; i++){
565     if( j==0 ){
566       printf("  ");
567     }
568     if( zKWText[i]==0 ){
569       printf("0");
570     }else{
571       printf("'%c',", zKWText[i]);
572     }
573     j += 4;
574     if( j>68 ){
575       printf("\n");
576       j = 0;
577     }
578   }
579   if( j>0 ) printf("\n");
580   printf("};\n");
581 
582   printf("/* aKWHash[i] is the hash value for the i-th keyword */\n");
583   printf("static const unsigned char aKWHash[%d] = {\n", bestSize);
584   for(i=j=0; i<bestSize; i++){
585     if( j==0 ) printf("  ");
586     printf(" %3d,", aKWHash[i]);
587     j++;
588     if( j>12 ){
589       printf("\n");
590       j = 0;
591     }
592   }
593   printf("%s};\n", j==0 ? "" : "\n");
594 
595   printf("/* aKWNext[] forms the hash collision chain.  If aKWHash[i]==0\n");
596   printf("** then the i-th keyword has no more hash collisions.  Otherwise,\n");
597   printf("** the next keyword with the same hash is aKWHash[i]-1. */\n");
598   printf("static const unsigned char aKWNext[%d] = {\n", nKeyword);
599   for(i=j=0; i<nKeyword; i++){
600     if( j==0 ) printf("  ");
601     printf(" %3d,", aKeywordTable[i].iNext);
602     j++;
603     if( j>12 ){
604       printf("\n");
605       j = 0;
606     }
607   }
608   printf("%s};\n", j==0 ? "" : "\n");
609 
610   printf("/* aKWLen[i] is the length (in bytes) of the i-th keyword */\n");
611   printf("static const unsigned char aKWLen[%d] = {\n", nKeyword);
612   for(i=j=0; i<nKeyword; i++){
613     if( j==0 ) printf("  ");
614     printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix);
615     j++;
616     if( j>12 ){
617       printf("\n");
618       j = 0;
619     }
620   }
621   printf("%s};\n", j==0 ? "" : "\n");
622 
623   printf("/* aKWOffset[i] is the index into zKWText[] of the start of\n");
624   printf("** the text for the i-th keyword. */\n");
625   printf("static const unsigned short int aKWOffset[%d] = {\n", nKeyword);
626   for(i=j=0; i<nKeyword; i++){
627     if( j==0 ) printf("  ");
628     printf(" %3d,", aKeywordTable[i].offset);
629     j++;
630     if( j>12 ){
631       printf("\n");
632       j = 0;
633     }
634   }
635   printf("%s};\n", j==0 ? "" : "\n");
636 
637   printf("/* aKWCode[i] is the parser symbol code for the i-th keyword */\n");
638   printf("static const unsigned char aKWCode[%d] = {\n", nKeyword);
639   for(i=j=0; i<nKeyword; i++){
640     char *zToken = aKeywordTable[i].zTokenType;
641     if( j==0 ) printf("  ");
642     printf("%s,%*s", zToken, (int)(14-strlen(zToken)), "");
643     j++;
644     if( j>=5 ){
645       printf("\n");
646       j = 0;
647     }
648   }
649   printf("%s};\n", j==0 ? "" : "\n");
650   printf("/* Hash table decoded:\n");
651   for(i=0; i<bestSize; i++){
652     j = aKWHash[i];
653     printf("** %3d:", i);
654     while( j ){
655       printf(" %s", aKeywordTable[j-1].zOrigName);
656       j = aKeywordTable[j-1].iNext;
657     }
658     printf("\n");
659   }
660   printf("*/\n");
661   printf("/* Check to see if z[0..n-1] is a keyword. If it is, write the\n");
662   printf("** parser symbol code for that keyword into *pType.  Always\n");
663   printf("** return the integer n (the length of the token). */\n");
664   printf("static int keywordCode(const char *z, int n, int *pType){\n");
665   printf("  int i, j;\n");
666   printf("  const char *zKW;\n");
667   printf("  if( n>=2 ){\n");
668   printf("    i = ((charMap(z[0])*%d) %c", HASH_C0, HASH_CC);
669   printf(" (charMap(z[n-1])*%d) %c", HASH_C1, HASH_CC);
670   printf(" n*%d) %% %d;\n", HASH_C2, bestSize);
671   printf("    for(i=((int)aKWHash[i])-1; i>=0; i=((int)aKWNext[i])-1){\n");
672   printf("      if( aKWLen[i]!=n ) continue;\n");
673   printf("      zKW = &zKWText[aKWOffset[i]];\n");
674   printf("#ifdef SQLITE_ASCII\n");
675   printf("      if( (z[0]&~0x20)!=zKW[0] ) continue;\n");
676   printf("      if( (z[1]&~0x20)!=zKW[1] ) continue;\n");
677   printf("      j = 2;\n");
678   printf("      while( j<n && (z[j]&~0x20)==zKW[j] ){ j++; }\n");
679   printf("#endif\n");
680   printf("#ifdef SQLITE_EBCDIC\n");
681   printf("      if( toupper(z[0])!=zKW[0] ) continue;\n");
682   printf("      if( toupper(z[1])!=zKW[1] ) continue;\n");
683   printf("      j = 2;\n");
684   printf("      while( j<n && toupper(z[j])==zKW[j] ){ j++; }\n");
685   printf("#endif\n");
686   printf("      if( j<n ) continue;\n");
687   for(i=0; i<nKeyword; i++){
688     printf("      testcase( i==%d ); /* %s */\n",
689            i, aKeywordTable[i].zOrigName);
690   }
691   printf("      *pType = aKWCode[i];\n");
692   printf("      break;\n");
693   printf("    }\n");
694   printf("  }\n");
695   printf("  return n;\n");
696   printf("}\n");
697   printf("int sqlite3KeywordCode(const unsigned char *z, int n){\n");
698   printf("  int id = TK_ID;\n");
699   printf("  keywordCode((char*)z, n, &id);\n");
700   printf("  return id;\n");
701   printf("}\n");
702   printf("#define SQLITE_N_KEYWORD %d\n", nKeyword);
703   printf("int sqlite3_keyword_name(int i,const char **pzName,int *pnName){\n");
704   printf("  if( i<0 || i>=SQLITE_N_KEYWORD ) return SQLITE_ERROR;\n");
705   printf("  *pzName = zKWText + aKWOffset[i];\n");
706   printf("  *pnName = aKWLen[i];\n");
707   printf("  return SQLITE_OK;\n");
708   printf("}\n");
709   printf("int sqlite3_keyword_count(void){ return SQLITE_N_KEYWORD; }\n");
710   printf("int sqlite3_keyword_check(const char *zName, int nName){\n");
711   printf("  return TK_ID!=sqlite3KeywordCode((const u8*)zName, nName);\n");
712   printf("}\n");
713 
714   return 0;
715 }
716