1 /*
2 ** 2013-02-28
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains code to implement the next_char(A,T,F,W,C) SQL function.
14 **
15 ** The next_char(A,T,F,W,C) function finds all valid "next" characters for
16 ** string A given the vocabulary in T.F.  If the W value exists and is a
17 ** non-empty string, then it is an SQL expression that limits the entries
18 ** in T.F that will be considered.  If C exists and is a non-empty string,
19 ** then it is the name of the collating sequence to use for comparison.  If
20 **
21 ** Only the first three arguments are required.  If the C parameter is
22 ** omitted or is NULL or is an empty string, then the default collating
23 ** sequence of T.F is used for comparision.  If the W parameter is omitted
24 ** or is NULL or is an empty string, then no filtering of the output is
25 ** done.
26 **
27 ** The T.F column should be indexed using collation C or else this routine
28 ** will be quite slow.
29 **
30 ** For example, suppose an application has a dictionary like this:
31 **
32 **   CREATE TABLE dictionary(word TEXT UNIQUE);
33 **
34 ** Further suppose that for user keypad entry, it is desired to disable
35 ** (gray out) keys that are not valid as the next character.  If the
36 ** the user has previously entered (say) 'cha' then to find all allowed
37 ** next characters (and thereby determine when keys should not be grayed
38 ** out) run the following query:
39 **
40 **   SELECT next_char('cha','dictionary','word');
41 **
42 ** IMPLEMENTATION NOTES:
43 **
44 ** The next_char function is implemented using recursive SQL that makes
45 ** use of the table name and column name as part of a query.  If either
46 ** the table name or column name are keywords or contain special characters,
47 ** then they should be escaped.  For example:
48 **
49 **   SELECT next_char('cha','[dictionary]','[word]');
50 **
51 ** This also means that the table name can be a subquery:
52 **
53 **   SELECT next_char('cha','(SELECT word AS w FROM dictionary)','w');
54 */
55 #include "sqlite3ext.h"
56 SQLITE_EXTENSION_INIT1
57 #include <string.h>
58 
59 /*
60 ** A structure to hold context of the next_char() computation across
61 ** nested function calls.
62 */
63 typedef struct nextCharContext nextCharContext;
64 struct nextCharContext {
65   sqlite3 *db;                      /* Database connection */
66   sqlite3_stmt *pStmt;              /* Prepared statement used to query */
67   const unsigned char *zPrefix;     /* Prefix to scan */
68   int nPrefix;                      /* Size of zPrefix in bytes */
69   int nAlloc;                       /* Space allocated to aResult */
70   int nUsed;                        /* Space used in aResult */
71   unsigned int *aResult;            /* Array of next characters */
72   int mallocFailed;                 /* True if malloc fails */
73   int otherError;                   /* True for any other failure */
74 };
75 
76 /*
77 ** Append a result character if the character is not already in the
78 ** result.
79 */
nextCharAppend(nextCharContext * p,unsigned c)80 static void nextCharAppend(nextCharContext *p, unsigned c){
81   int i;
82   for(i=0; i<p->nUsed; i++){
83     if( p->aResult[i]==c ) return;
84   }
85   if( p->nUsed+1 > p->nAlloc ){
86     unsigned int *aNew;
87     int n = p->nAlloc*2 + 30;
88     aNew = sqlite3_realloc64(p->aResult, n*sizeof(unsigned int));
89     if( aNew==0 ){
90       p->mallocFailed = 1;
91       return;
92     }else{
93       p->aResult = aNew;
94       p->nAlloc = n;
95     }
96   }
97   p->aResult[p->nUsed++] = c;
98 }
99 
100 /*
101 ** Write a character into z[] as UTF8.  Return the number of bytes needed
102 ** to hold the character
103 */
writeUtf8(unsigned char * z,unsigned c)104 static int writeUtf8(unsigned char *z, unsigned c){
105   if( c<0x00080 ){
106     z[0] = (unsigned char)(c&0xff);
107     return 1;
108   }
109   if( c<0x00800 ){
110     z[0] = 0xC0 + (unsigned char)((c>>6)&0x1F);
111     z[1] = 0x80 + (unsigned char)(c & 0x3F);
112     return 2;
113   }
114   if( c<0x10000 ){
115     z[0] = 0xE0 + (unsigned char)((c>>12)&0x0F);
116     z[1] = 0x80 + (unsigned char)((c>>6) & 0x3F);
117     z[2] = 0x80 + (unsigned char)(c & 0x3F);
118     return 3;
119   }
120   z[0] = 0xF0 + (unsigned char)((c>>18) & 0x07);
121   z[1] = 0x80 + (unsigned char)((c>>12) & 0x3F);
122   z[2] = 0x80 + (unsigned char)((c>>6) & 0x3F);
123   z[3] = 0x80 + (unsigned char)(c & 0x3F);
124   return 4;
125 }
126 
127 /*
128 ** Read a UTF8 character out of z[] and write it into *pOut.  Return
129 ** the number of bytes in z[] that were used to construct the character.
130 */
readUtf8(const unsigned char * z,unsigned * pOut)131 static int readUtf8(const unsigned char *z, unsigned *pOut){
132   static const unsigned char validBits[] = {
133     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
134     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
135     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
136     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
137     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
138     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
139     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
140     0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
141   };
142   unsigned c = z[0];
143   if( c<0xc0 ){
144     *pOut = c;
145     return 1;
146   }else{
147     int n = 1;
148     c = validBits[c-0xc0];
149     while( (z[n] & 0xc0)==0x80 ){
150       c = (c<<6) + (0x3f & z[n++]);
151     }
152     if( c<0x80 || (c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){
153       c = 0xFFFD;
154     }
155     *pOut = c;
156     return n;
157   }
158 }
159 
160 /*
161 ** The nextCharContext structure has been set up.  Add all "next" characters
162 ** to the result set.
163 */
findNextChars(nextCharContext * p)164 static void findNextChars(nextCharContext *p){
165   unsigned cPrev = 0;
166   unsigned char zPrev[8];
167   int n, rc;
168 
169   for(;;){
170     sqlite3_bind_text(p->pStmt, 1, (char*)p->zPrefix, p->nPrefix,
171                       SQLITE_STATIC);
172     n = writeUtf8(zPrev, cPrev+1);
173     sqlite3_bind_text(p->pStmt, 2, (char*)zPrev, n, SQLITE_STATIC);
174     rc = sqlite3_step(p->pStmt);
175     if( rc==SQLITE_DONE ){
176       sqlite3_reset(p->pStmt);
177       return;
178     }else if( rc!=SQLITE_ROW ){
179       p->otherError = rc;
180       return;
181     }else{
182       const unsigned char *zOut = sqlite3_column_text(p->pStmt, 0);
183       unsigned cNext;
184       n = readUtf8(zOut+p->nPrefix, &cNext);
185       sqlite3_reset(p->pStmt);
186       nextCharAppend(p, cNext);
187       cPrev = cNext;
188       if( p->mallocFailed ) return;
189     }
190   }
191 }
192 
193 
194 /*
195 ** next_character(A,T,F,W)
196 **
197 ** Return a string composted of all next possible characters after
198 ** A for elements of T.F.  If W is supplied, then it is an SQL expression
199 ** that limits the elements in T.F that are considered.
200 */
nextCharFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)201 static void nextCharFunc(
202   sqlite3_context *context,
203   int argc,
204   sqlite3_value **argv
205 ){
206   nextCharContext c;
207   const unsigned char *zTable = sqlite3_value_text(argv[1]);
208   const unsigned char *zField = sqlite3_value_text(argv[2]);
209   const unsigned char *zWhere;
210   const unsigned char *zCollName;
211   char *zWhereClause = 0;
212   char *zColl = 0;
213   char *zSql;
214   int rc;
215 
216   memset(&c, 0, sizeof(c));
217   c.db = sqlite3_context_db_handle(context);
218   c.zPrefix = sqlite3_value_text(argv[0]);
219   c.nPrefix = sqlite3_value_bytes(argv[0]);
220   if( zTable==0 || zField==0 || c.zPrefix==0 ) return;
221   if( argc>=4
222    && (zWhere = sqlite3_value_text(argv[3]))!=0
223    && zWhere[0]!=0
224   ){
225     zWhereClause = sqlite3_mprintf("AND (%s)", zWhere);
226     if( zWhereClause==0 ){
227       sqlite3_result_error_nomem(context);
228       return;
229     }
230   }else{
231     zWhereClause = "";
232   }
233   if( argc>=5
234    && (zCollName = sqlite3_value_text(argv[4]))!=0
235    && zCollName[0]!=0
236   ){
237     zColl = sqlite3_mprintf("collate \"%w\"", zCollName);
238     if( zColl==0 ){
239       sqlite3_result_error_nomem(context);
240       if( zWhereClause[0] ) sqlite3_free(zWhereClause);
241       return;
242     }
243   }else{
244     zColl = "";
245   }
246   zSql = sqlite3_mprintf(
247     "SELECT %s FROM %s"
248     " WHERE %s>=(?1 || ?2) %s"
249     "   AND %s<=(?1 || char(1114111)) %s" /* 1114111 == 0x10ffff */
250     "   %s"
251     " ORDER BY 1 %s ASC LIMIT 1",
252     zField, zTable, zField, zColl, zField, zColl, zWhereClause, zColl
253   );
254   if( zWhereClause[0] ) sqlite3_free(zWhereClause);
255   if( zColl[0] ) sqlite3_free(zColl);
256   if( zSql==0 ){
257     sqlite3_result_error_nomem(context);
258     return;
259   }
260 
261   rc = sqlite3_prepare_v2(c.db, zSql, -1, &c.pStmt, 0);
262   sqlite3_free(zSql);
263   if( rc ){
264     sqlite3_result_error(context, sqlite3_errmsg(c.db), -1);
265     return;
266   }
267   findNextChars(&c);
268   if( c.mallocFailed ){
269     sqlite3_result_error_nomem(context);
270   }else{
271     unsigned char *pRes;
272     pRes = sqlite3_malloc64( c.nUsed*4 + 1 );
273     if( pRes==0 ){
274       sqlite3_result_error_nomem(context);
275     }else{
276       int i;
277       int n = 0;
278       for(i=0; i<c.nUsed; i++){
279         n += writeUtf8(pRes+n, c.aResult[i]);
280       }
281       pRes[n] = 0;
282       sqlite3_result_text(context, (const char*)pRes, n, sqlite3_free);
283     }
284   }
285   sqlite3_finalize(c.pStmt);
286   sqlite3_free(c.aResult);
287 }
288 
289 #ifdef _WIN32
290 __declspec(dllexport)
291 #endif
sqlite3_nextchar_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)292 int sqlite3_nextchar_init(
293   sqlite3 *db,
294   char **pzErrMsg,
295   const sqlite3_api_routines *pApi
296 ){
297   int rc = SQLITE_OK;
298   SQLITE_EXTENSION_INIT2(pApi);
299   (void)pzErrMsg;  /* Unused parameter */
300   rc = sqlite3_create_function(db, "next_char", 3,
301                                SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
302                                nextCharFunc, 0, 0);
303   if( rc==SQLITE_OK ){
304     rc = sqlite3_create_function(db, "next_char", 4,
305                                  SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
306                                  nextCharFunc, 0, 0);
307   }
308   if( rc==SQLITE_OK ){
309     rc = sqlite3_create_function(db, "next_char", 5,
310                                  SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
311                                  nextCharFunc, 0, 0);
312   }
313   return rc;
314 }
315