1 /*
2 ** 2014 Jun 09
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This is an SQLite module implementing full-text search.
14 */
15 
16 
17 #include "fts5Int.h"
18 
19 #define FTS5_DEFAULT_PAGE_SIZE   4050
20 #define FTS5_DEFAULT_AUTOMERGE      4
21 #define FTS5_DEFAULT_USERMERGE      4
22 #define FTS5_DEFAULT_CRISISMERGE   16
23 #define FTS5_DEFAULT_HASHSIZE    (1024*1024)
24 
25 /* Maximum allowed page size */
26 #define FTS5_MAX_PAGE_SIZE (64*1024)
27 
fts5_iswhitespace(char x)28 static int fts5_iswhitespace(char x){
29   return (x==' ');
30 }
31 
fts5_isopenquote(char x)32 static int fts5_isopenquote(char x){
33   return (x=='"' || x=='\'' || x=='[' || x=='`');
34 }
35 
36 /*
37 ** Argument pIn points to a character that is part of a nul-terminated
38 ** string. Return a pointer to the first character following *pIn in
39 ** the string that is not a white-space character.
40 */
fts5ConfigSkipWhitespace(const char * pIn)41 static const char *fts5ConfigSkipWhitespace(const char *pIn){
42   const char *p = pIn;
43   if( p ){
44     while( fts5_iswhitespace(*p) ){ p++; }
45   }
46   return p;
47 }
48 
49 /*
50 ** Argument pIn points to a character that is part of a nul-terminated
51 ** string. Return a pointer to the first character following *pIn in
52 ** the string that is not a "bareword" character.
53 */
fts5ConfigSkipBareword(const char * pIn)54 static const char *fts5ConfigSkipBareword(const char *pIn){
55   const char *p = pIn;
56   while ( sqlite3Fts5IsBareword(*p) ) p++;
57   if( p==pIn ) p = 0;
58   return p;
59 }
60 
fts5_isdigit(char a)61 static int fts5_isdigit(char a){
62   return (a>='0' && a<='9');
63 }
64 
65 
66 
fts5ConfigSkipLiteral(const char * pIn)67 static const char *fts5ConfigSkipLiteral(const char *pIn){
68   const char *p = pIn;
69   switch( *p ){
70     case 'n': case 'N':
71       if( sqlite3_strnicmp("null", p, 4)==0 ){
72         p = &p[4];
73       }else{
74         p = 0;
75       }
76       break;
77 
78     case 'x': case 'X':
79       p++;
80       if( *p=='\'' ){
81         p++;
82         while( (*p>='a' && *p<='f')
83             || (*p>='A' && *p<='F')
84             || (*p>='0' && *p<='9')
85             ){
86           p++;
87         }
88         if( *p=='\'' && 0==((p-pIn)%2) ){
89           p++;
90         }else{
91           p = 0;
92         }
93       }else{
94         p = 0;
95       }
96       break;
97 
98     case '\'':
99       p++;
100       while( p ){
101         if( *p=='\'' ){
102           p++;
103           if( *p!='\'' ) break;
104         }
105         p++;
106         if( *p==0 ) p = 0;
107       }
108       break;
109 
110     default:
111       /* maybe a number */
112       if( *p=='+' || *p=='-' ) p++;
113       while( fts5_isdigit(*p) ) p++;
114 
115       /* At this point, if the literal was an integer, the parse is
116       ** finished. Or, if it is a floating point value, it may continue
117       ** with either a decimal point or an 'E' character. */
118       if( *p=='.' && fts5_isdigit(p[1]) ){
119         p += 2;
120         while( fts5_isdigit(*p) ) p++;
121       }
122       if( p==pIn ) p = 0;
123 
124       break;
125   }
126 
127   return p;
128 }
129 
130 /*
131 ** The first character of the string pointed to by argument z is guaranteed
132 ** to be an open-quote character (see function fts5_isopenquote()).
133 **
134 ** This function searches for the corresponding close-quote character within
135 ** the string and, if found, dequotes the string in place and adds a new
136 ** nul-terminator byte.
137 **
138 ** If the close-quote is found, the value returned is the byte offset of
139 ** the character immediately following it. Or, if the close-quote is not
140 ** found, -1 is returned. If -1 is returned, the buffer is left in an
141 ** undefined state.
142 */
fts5Dequote(char * z)143 static int fts5Dequote(char *z){
144   char q;
145   int iIn = 1;
146   int iOut = 0;
147   q = z[0];
148 
149   /* Set stack variable q to the close-quote character */
150   assert( q=='[' || q=='\'' || q=='"' || q=='`' );
151   if( q=='[' ) q = ']';
152 
153   while( z[iIn] ){
154     if( z[iIn]==q ){
155       if( z[iIn+1]!=q ){
156         /* Character iIn was the close quote. */
157         iIn++;
158         break;
159       }else{
160         /* Character iIn and iIn+1 form an escaped quote character. Skip
161         ** the input cursor past both and copy a single quote character
162         ** to the output buffer. */
163         iIn += 2;
164         z[iOut++] = q;
165       }
166     }else{
167       z[iOut++] = z[iIn++];
168     }
169   }
170 
171   z[iOut] = '\0';
172   return iIn;
173 }
174 
175 /*
176 ** Convert an SQL-style quoted string into a normal string by removing
177 ** the quote characters.  The conversion is done in-place.  If the
178 ** input does not begin with a quote character, then this routine
179 ** is a no-op.
180 **
181 ** Examples:
182 **
183 **     "abc"   becomes   abc
184 **     'xyz'   becomes   xyz
185 **     [pqr]   becomes   pqr
186 **     `mno`   becomes   mno
187 */
sqlite3Fts5Dequote(char * z)188 void sqlite3Fts5Dequote(char *z){
189   char quote;                     /* Quote character (if any ) */
190 
191   assert( 0==fts5_iswhitespace(z[0]) );
192   quote = z[0];
193   if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
194     fts5Dequote(z);
195   }
196 }
197 
198 
199 struct Fts5Enum {
200   const char *zName;
201   int eVal;
202 };
203 typedef struct Fts5Enum Fts5Enum;
204 
fts5ConfigSetEnum(const Fts5Enum * aEnum,const char * zEnum,int * peVal)205 static int fts5ConfigSetEnum(
206   const Fts5Enum *aEnum,
207   const char *zEnum,
208   int *peVal
209 ){
210   int nEnum = (int)strlen(zEnum);
211   int i;
212   int iVal = -1;
213 
214   for(i=0; aEnum[i].zName; i++){
215     if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){
216       if( iVal>=0 ) return SQLITE_ERROR;
217       iVal = aEnum[i].eVal;
218     }
219   }
220 
221   *peVal = iVal;
222   return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
223 }
224 
225 /*
226 ** Parse a "special" CREATE VIRTUAL TABLE directive and update
227 ** configuration object pConfig as appropriate.
228 **
229 ** If successful, object pConfig is updated and SQLITE_OK returned. If
230 ** an error occurs, an SQLite error code is returned and an error message
231 ** may be left in *pzErr. It is the responsibility of the caller to
232 ** eventually free any such error message using sqlite3_free().
233 */
fts5ConfigParseSpecial(Fts5Global * pGlobal,Fts5Config * pConfig,const char * zCmd,const char * zArg,char ** pzErr)234 static int fts5ConfigParseSpecial(
235   Fts5Global *pGlobal,
236   Fts5Config *pConfig,            /* Configuration object to update */
237   const char *zCmd,               /* Special command to parse */
238   const char *zArg,               /* Argument to parse */
239   char **pzErr                    /* OUT: Error message */
240 ){
241   int rc = SQLITE_OK;
242   int nCmd = (int)strlen(zCmd);
243   if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
244     const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
245     const char *p;
246     int bFirst = 1;
247     if( pConfig->aPrefix==0 ){
248       pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
249       if( rc ) return rc;
250     }
251 
252     p = zArg;
253     while( 1 ){
254       int nPre = 0;
255 
256       while( p[0]==' ' ) p++;
257       if( bFirst==0 && p[0]==',' ){
258         p++;
259         while( p[0]==' ' ) p++;
260       }else if( p[0]=='\0' ){
261         break;
262       }
263       if( p[0]<'0' || p[0]>'9' ){
264         *pzErr = sqlite3_mprintf("malformed prefix=... directive");
265         rc = SQLITE_ERROR;
266         break;
267       }
268 
269       if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){
270         *pzErr = sqlite3_mprintf(
271             "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES
272         );
273         rc = SQLITE_ERROR;
274         break;
275       }
276 
277       while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
278         nPre = nPre*10 + (p[0] - '0');
279         p++;
280       }
281 
282       if( nPre<=0 || nPre>=1000 ){
283         *pzErr = sqlite3_mprintf("prefix length out of range (max 999)");
284         rc = SQLITE_ERROR;
285         break;
286       }
287 
288       pConfig->aPrefix[pConfig->nPrefix] = nPre;
289       pConfig->nPrefix++;
290       bFirst = 0;
291     }
292     assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES );
293     return rc;
294   }
295 
296   if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
297     const char *p = (const char*)zArg;
298     sqlite3_int64 nArg = strlen(zArg) + 1;
299     char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
300     char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
301     char *pSpace = pDel;
302 
303     if( azArg && pSpace ){
304       if( pConfig->pTok ){
305         *pzErr = sqlite3_mprintf("multiple tokenize=... directives");
306         rc = SQLITE_ERROR;
307       }else{
308         for(nArg=0; p && *p; nArg++){
309           const char *p2 = fts5ConfigSkipWhitespace(p);
310           if( *p2=='\'' ){
311             p = fts5ConfigSkipLiteral(p2);
312           }else{
313             p = fts5ConfigSkipBareword(p2);
314           }
315           if( p ){
316             memcpy(pSpace, p2, p-p2);
317             azArg[nArg] = pSpace;
318             sqlite3Fts5Dequote(pSpace);
319             pSpace += (p - p2) + 1;
320             p = fts5ConfigSkipWhitespace(p);
321           }
322         }
323         if( p==0 ){
324           *pzErr = sqlite3_mprintf("parse error in tokenize directive");
325           rc = SQLITE_ERROR;
326         }else{
327           rc = sqlite3Fts5GetTokenizer(pGlobal,
328               (const char**)azArg, (int)nArg, &pConfig->pTok, &pConfig->pTokApi,
329               pzErr
330           );
331         }
332       }
333     }
334 
335     sqlite3_free(azArg);
336     sqlite3_free(pDel);
337     return rc;
338   }
339 
340   if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
341     if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
342       *pzErr = sqlite3_mprintf("multiple content=... directives");
343       rc = SQLITE_ERROR;
344     }else{
345       if( zArg[0] ){
346         pConfig->eContent = FTS5_CONTENT_EXTERNAL;
347         pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
348       }else{
349         pConfig->eContent = FTS5_CONTENT_NONE;
350       }
351     }
352     return rc;
353   }
354 
355   if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
356     if( pConfig->zContentRowid ){
357       *pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
358       rc = SQLITE_ERROR;
359     }else{
360       pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
361     }
362     return rc;
363   }
364 
365   if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
366     if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
367       *pzErr = sqlite3_mprintf("malformed columnsize=... directive");
368       rc = SQLITE_ERROR;
369     }else{
370       pConfig->bColumnsize = (zArg[0]=='1');
371     }
372     return rc;
373   }
374 
375   if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){
376     const Fts5Enum aDetail[] = {
377       { "none", FTS5_DETAIL_NONE },
378       { "full", FTS5_DETAIL_FULL },
379       { "columns", FTS5_DETAIL_COLUMNS },
380       { 0, 0 }
381     };
382 
383     if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){
384       *pzErr = sqlite3_mprintf("malformed detail=... directive");
385     }
386     return rc;
387   }
388 
389   *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
390   return SQLITE_ERROR;
391 }
392 
393 /*
394 ** Allocate an instance of the default tokenizer ("simple") at
395 ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
396 ** code if an error occurs.
397 */
fts5ConfigDefaultTokenizer(Fts5Global * pGlobal,Fts5Config * pConfig)398 static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
399   assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
400   return sqlite3Fts5GetTokenizer(
401       pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0
402   );
403 }
404 
405 /*
406 ** Gobble up the first bareword or quoted word from the input buffer zIn.
407 ** Return a pointer to the character immediately following the last in
408 ** the gobbled word if successful, or a NULL pointer otherwise (failed
409 ** to find close-quote character).
410 **
411 ** Before returning, set pzOut to point to a new buffer containing a
412 ** nul-terminated, dequoted copy of the gobbled word. If the word was
413 ** quoted, *pbQuoted is also set to 1 before returning.
414 **
415 ** If *pRc is other than SQLITE_OK when this function is called, it is
416 ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
417 ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
418 ** set if a parse error (failed to find close quote) occurs.
419 */
fts5ConfigGobbleWord(int * pRc,const char * zIn,char ** pzOut,int * pbQuoted)420 static const char *fts5ConfigGobbleWord(
421   int *pRc,                       /* IN/OUT: Error code */
422   const char *zIn,                /* Buffer to gobble string/bareword from */
423   char **pzOut,                   /* OUT: malloc'd buffer containing str/bw */
424   int *pbQuoted                   /* OUT: Set to true if dequoting required */
425 ){
426   const char *zRet = 0;
427 
428   sqlite3_int64 nIn = strlen(zIn);
429   char *zOut = sqlite3_malloc64(nIn+1);
430 
431   assert( *pRc==SQLITE_OK );
432   *pbQuoted = 0;
433   *pzOut = 0;
434 
435   if( zOut==0 ){
436     *pRc = SQLITE_NOMEM;
437   }else{
438     memcpy(zOut, zIn, (size_t)(nIn+1));
439     if( fts5_isopenquote(zOut[0]) ){
440       int ii = fts5Dequote(zOut);
441       zRet = &zIn[ii];
442       *pbQuoted = 1;
443     }else{
444       zRet = fts5ConfigSkipBareword(zIn);
445       if( zRet ){
446         zOut[zRet-zIn] = '\0';
447       }
448     }
449   }
450 
451   if( zRet==0 ){
452     sqlite3_free(zOut);
453   }else{
454     *pzOut = zOut;
455   }
456 
457   return zRet;
458 }
459 
fts5ConfigParseColumn(Fts5Config * p,char * zCol,char * zArg,char ** pzErr)460 static int fts5ConfigParseColumn(
461   Fts5Config *p,
462   char *zCol,
463   char *zArg,
464   char **pzErr
465 ){
466   int rc = SQLITE_OK;
467   if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
468    || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
469   ){
470     *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
471     rc = SQLITE_ERROR;
472   }else if( zArg ){
473     if( 0==sqlite3_stricmp(zArg, "unindexed") ){
474       p->abUnindexed[p->nCol] = 1;
475     }else{
476       *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
477       rc = SQLITE_ERROR;
478     }
479   }
480 
481   p->azCol[p->nCol++] = zCol;
482   return rc;
483 }
484 
485 /*
486 ** Populate the Fts5Config.zContentExprlist string.
487 */
fts5ConfigMakeExprlist(Fts5Config * p)488 static int fts5ConfigMakeExprlist(Fts5Config *p){
489   int i;
490   int rc = SQLITE_OK;
491   Fts5Buffer buf = {0, 0, 0};
492 
493   sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
494   if( p->eContent!=FTS5_CONTENT_NONE ){
495     for(i=0; i<p->nCol; i++){
496       if( p->eContent==FTS5_CONTENT_EXTERNAL ){
497         sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
498       }else{
499         sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
500       }
501     }
502   }
503 
504   assert( p->zContentExprlist==0 );
505   p->zContentExprlist = (char*)buf.p;
506   return rc;
507 }
508 
509 /*
510 ** Arguments nArg/azArg contain the string arguments passed to the xCreate
511 ** or xConnect method of the virtual table. This function attempts to
512 ** allocate an instance of Fts5Config containing the results of parsing
513 ** those arguments.
514 **
515 ** If successful, SQLITE_OK is returned and *ppOut is set to point to the
516 ** new Fts5Config object. If an error occurs, an SQLite error code is
517 ** returned, *ppOut is set to NULL and an error message may be left in
518 ** *pzErr. It is the responsibility of the caller to eventually free any
519 ** such error message using sqlite3_free().
520 */
sqlite3Fts5ConfigParse(Fts5Global * pGlobal,sqlite3 * db,int nArg,const char ** azArg,Fts5Config ** ppOut,char ** pzErr)521 int sqlite3Fts5ConfigParse(
522   Fts5Global *pGlobal,
523   sqlite3 *db,
524   int nArg,                       /* Number of arguments */
525   const char **azArg,             /* Array of nArg CREATE VIRTUAL TABLE args */
526   Fts5Config **ppOut,             /* OUT: Results of parse */
527   char **pzErr                    /* OUT: Error message */
528 ){
529   int rc = SQLITE_OK;             /* Return code */
530   Fts5Config *pRet;               /* New object to return */
531   int i;
532   sqlite3_int64 nByte;
533 
534   *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
535   if( pRet==0 ) return SQLITE_NOMEM;
536   memset(pRet, 0, sizeof(Fts5Config));
537   pRet->db = db;
538   pRet->iCookie = -1;
539 
540   nByte = nArg * (sizeof(char*) + sizeof(u8));
541   pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
542   pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
543   pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
544   pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
545   pRet->bColumnsize = 1;
546   pRet->eDetail = FTS5_DETAIL_FULL;
547 #ifdef SQLITE_DEBUG
548   pRet->bPrefixIndex = 1;
549 #endif
550   if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
551     *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
552     rc = SQLITE_ERROR;
553   }
554 
555   for(i=3; rc==SQLITE_OK && i<nArg; i++){
556     const char *zOrig = azArg[i];
557     const char *z;
558     char *zOne = 0;
559     char *zTwo = 0;
560     int bOption = 0;
561     int bMustBeCol = 0;
562 
563     z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
564     z = fts5ConfigSkipWhitespace(z);
565     if( z && *z=='=' ){
566       bOption = 1;
567       z++;
568       if( bMustBeCol ) z = 0;
569     }
570     z = fts5ConfigSkipWhitespace(z);
571     if( z && z[0] ){
572       int bDummy;
573       z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
574       if( z && z[0] ) z = 0;
575     }
576 
577     if( rc==SQLITE_OK ){
578       if( z==0 ){
579         *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
580         rc = SQLITE_ERROR;
581       }else{
582         if( bOption ){
583           rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr);
584         }else{
585           rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
586           zOne = 0;
587         }
588       }
589     }
590 
591     sqlite3_free(zOne);
592     sqlite3_free(zTwo);
593   }
594 
595   /* If a tokenizer= option was successfully parsed, the tokenizer has
596   ** already been allocated. Otherwise, allocate an instance of the default
597   ** tokenizer (unicode61) now.  */
598   if( rc==SQLITE_OK && pRet->pTok==0 ){
599     rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
600   }
601 
602   /* If no zContent option was specified, fill in the default values. */
603   if( rc==SQLITE_OK && pRet->zContent==0 ){
604     const char *zTail = 0;
605     assert( pRet->eContent==FTS5_CONTENT_NORMAL
606          || pRet->eContent==FTS5_CONTENT_NONE
607     );
608     if( pRet->eContent==FTS5_CONTENT_NORMAL ){
609       zTail = "content";
610     }else if( pRet->bColumnsize ){
611       zTail = "docsize";
612     }
613 
614     if( zTail ){
615       pRet->zContent = sqlite3Fts5Mprintf(
616           &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
617       );
618     }
619   }
620 
621   if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
622     pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
623   }
624 
625   /* Formulate the zContentExprlist text */
626   if( rc==SQLITE_OK ){
627     rc = fts5ConfigMakeExprlist(pRet);
628   }
629 
630   if( rc!=SQLITE_OK ){
631     sqlite3Fts5ConfigFree(pRet);
632     *ppOut = 0;
633   }
634   return rc;
635 }
636 
637 /*
638 ** Free the configuration object passed as the only argument.
639 */
sqlite3Fts5ConfigFree(Fts5Config * pConfig)640 void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
641   if( pConfig ){
642     int i;
643     if( pConfig->pTok ){
644       pConfig->pTokApi->xDelete(pConfig->pTok);
645     }
646     sqlite3_free(pConfig->zDb);
647     sqlite3_free(pConfig->zName);
648     for(i=0; i<pConfig->nCol; i++){
649       sqlite3_free(pConfig->azCol[i]);
650     }
651     sqlite3_free(pConfig->azCol);
652     sqlite3_free(pConfig->aPrefix);
653     sqlite3_free(pConfig->zRank);
654     sqlite3_free(pConfig->zRankArgs);
655     sqlite3_free(pConfig->zContent);
656     sqlite3_free(pConfig->zContentRowid);
657     sqlite3_free(pConfig->zContentExprlist);
658     sqlite3_free(pConfig);
659   }
660 }
661 
662 /*
663 ** Call sqlite3_declare_vtab() based on the contents of the configuration
664 ** object passed as the only argument. Return SQLITE_OK if successful, or
665 ** an SQLite error code if an error occurs.
666 */
sqlite3Fts5ConfigDeclareVtab(Fts5Config * pConfig)667 int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
668   int i;
669   int rc = SQLITE_OK;
670   char *zSql;
671 
672   zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
673   for(i=0; zSql && i<pConfig->nCol; i++){
674     const char *zSep = (i==0?"":", ");
675     zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
676   }
677   zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
678       zSql, pConfig->zName, FTS5_RANK_NAME
679   );
680 
681   assert( zSql || rc==SQLITE_NOMEM );
682   if( zSql ){
683     rc = sqlite3_declare_vtab(pConfig->db, zSql);
684     sqlite3_free(zSql);
685   }
686 
687   return rc;
688 }
689 
690 /*
691 ** Tokenize the text passed via the second and third arguments.
692 **
693 ** The callback is invoked once for each token in the input text. The
694 ** arguments passed to it are, in order:
695 **
696 **     void *pCtx          // Copy of 4th argument to sqlite3Fts5Tokenize()
697 **     const char *pToken  // Pointer to buffer containing token
698 **     int nToken          // Size of token in bytes
699 **     int iStart          // Byte offset of start of token within input text
700 **     int iEnd            // Byte offset of end of token within input text
701 **     int iPos            // Position of token in input (first token is 0)
702 **
703 ** If the callback returns a non-zero value the tokenization is abandoned
704 ** and no further callbacks are issued.
705 **
706 ** This function returns SQLITE_OK if successful or an SQLite error code
707 ** if an error occurs. If the tokenization was abandoned early because
708 ** the callback returned SQLITE_DONE, this is not an error and this function
709 ** still returns SQLITE_OK. Or, if the tokenization was abandoned early
710 ** because the callback returned another non-zero value, it is assumed
711 ** to be an SQLite error code and returned to the caller.
712 */
sqlite3Fts5Tokenize(Fts5Config * pConfig,int flags,const char * pText,int nText,void * pCtx,int (* xToken)(void *,int,const char *,int,int,int))713 int sqlite3Fts5Tokenize(
714   Fts5Config *pConfig,            /* FTS5 Configuration object */
715   int flags,                      /* FTS5_TOKENIZE_* flags */
716   const char *pText, int nText,   /* Text to tokenize */
717   void *pCtx,                     /* Context passed to xToken() */
718   int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
719 ){
720   if( pText==0 ) return SQLITE_OK;
721   return pConfig->pTokApi->xTokenize(
722       pConfig->pTok, pCtx, flags, pText, nText, xToken
723   );
724 }
725 
726 /*
727 ** Argument pIn points to the first character in what is expected to be
728 ** a comma-separated list of SQL literals followed by a ')' character.
729 ** If it actually is this, return a pointer to the ')'. Otherwise, return
730 ** NULL to indicate a parse error.
731 */
fts5ConfigSkipArgs(const char * pIn)732 static const char *fts5ConfigSkipArgs(const char *pIn){
733   const char *p = pIn;
734 
735   while( 1 ){
736     p = fts5ConfigSkipWhitespace(p);
737     p = fts5ConfigSkipLiteral(p);
738     p = fts5ConfigSkipWhitespace(p);
739     if( p==0 || *p==')' ) break;
740     if( *p!=',' ){
741       p = 0;
742       break;
743     }
744     p++;
745   }
746 
747   return p;
748 }
749 
750 /*
751 ** Parameter zIn contains a rank() function specification. The format of
752 ** this is:
753 **
754 **   + Bareword (function name)
755 **   + Open parenthesis - "("
756 **   + Zero or more SQL literals in a comma separated list
757 **   + Close parenthesis - ")"
758 */
sqlite3Fts5ConfigParseRank(const char * zIn,char ** pzRank,char ** pzRankArgs)759 int sqlite3Fts5ConfigParseRank(
760   const char *zIn,                /* Input string */
761   char **pzRank,                  /* OUT: Rank function name */
762   char **pzRankArgs               /* OUT: Rank function arguments */
763 ){
764   const char *p = zIn;
765   const char *pRank;
766   char *zRank = 0;
767   char *zRankArgs = 0;
768   int rc = SQLITE_OK;
769 
770   *pzRank = 0;
771   *pzRankArgs = 0;
772 
773   if( p==0 ){
774     rc = SQLITE_ERROR;
775   }else{
776     p = fts5ConfigSkipWhitespace(p);
777     pRank = p;
778     p = fts5ConfigSkipBareword(p);
779 
780     if( p ){
781       zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
782       if( zRank ) memcpy(zRank, pRank, p-pRank);
783     }else{
784       rc = SQLITE_ERROR;
785     }
786 
787     if( rc==SQLITE_OK ){
788       p = fts5ConfigSkipWhitespace(p);
789       if( *p!='(' ) rc = SQLITE_ERROR;
790       p++;
791     }
792     if( rc==SQLITE_OK ){
793       const char *pArgs;
794       p = fts5ConfigSkipWhitespace(p);
795       pArgs = p;
796       if( *p!=')' ){
797         p = fts5ConfigSkipArgs(p);
798         if( p==0 ){
799           rc = SQLITE_ERROR;
800         }else{
801           zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
802           if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
803         }
804       }
805     }
806   }
807 
808   if( rc!=SQLITE_OK ){
809     sqlite3_free(zRank);
810     assert( zRankArgs==0 );
811   }else{
812     *pzRank = zRank;
813     *pzRankArgs = zRankArgs;
814   }
815   return rc;
816 }
817 
sqlite3Fts5ConfigSetValue(Fts5Config * pConfig,const char * zKey,sqlite3_value * pVal,int * pbBadkey)818 int sqlite3Fts5ConfigSetValue(
819   Fts5Config *pConfig,
820   const char *zKey,
821   sqlite3_value *pVal,
822   int *pbBadkey
823 ){
824   int rc = SQLITE_OK;
825 
826   if( 0==sqlite3_stricmp(zKey, "pgsz") ){
827     int pgsz = 0;
828     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
829       pgsz = sqlite3_value_int(pVal);
830     }
831     if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE ){
832       *pbBadkey = 1;
833     }else{
834       pConfig->pgsz = pgsz;
835     }
836   }
837 
838   else if( 0==sqlite3_stricmp(zKey, "hashsize") ){
839     int nHashSize = -1;
840     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
841       nHashSize = sqlite3_value_int(pVal);
842     }
843     if( nHashSize<=0 ){
844       *pbBadkey = 1;
845     }else{
846       pConfig->nHashSize = nHashSize;
847     }
848   }
849 
850   else if( 0==sqlite3_stricmp(zKey, "automerge") ){
851     int nAutomerge = -1;
852     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
853       nAutomerge = sqlite3_value_int(pVal);
854     }
855     if( nAutomerge<0 || nAutomerge>64 ){
856       *pbBadkey = 1;
857     }else{
858       if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
859       pConfig->nAutomerge = nAutomerge;
860     }
861   }
862 
863   else if( 0==sqlite3_stricmp(zKey, "usermerge") ){
864     int nUsermerge = -1;
865     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
866       nUsermerge = sqlite3_value_int(pVal);
867     }
868     if( nUsermerge<2 || nUsermerge>16 ){
869       *pbBadkey = 1;
870     }else{
871       pConfig->nUsermerge = nUsermerge;
872     }
873   }
874 
875   else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
876     int nCrisisMerge = -1;
877     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
878       nCrisisMerge = sqlite3_value_int(pVal);
879     }
880     if( nCrisisMerge<0 ){
881       *pbBadkey = 1;
882     }else{
883       if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
884       if( nCrisisMerge>=FTS5_MAX_SEGMENT ) nCrisisMerge = FTS5_MAX_SEGMENT-1;
885       pConfig->nCrisisMerge = nCrisisMerge;
886     }
887   }
888 
889   else if( 0==sqlite3_stricmp(zKey, "rank") ){
890     const char *zIn = (const char*)sqlite3_value_text(pVal);
891     char *zRank;
892     char *zRankArgs;
893     rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
894     if( rc==SQLITE_OK ){
895       sqlite3_free(pConfig->zRank);
896       sqlite3_free(pConfig->zRankArgs);
897       pConfig->zRank = zRank;
898       pConfig->zRankArgs = zRankArgs;
899     }else if( rc==SQLITE_ERROR ){
900       rc = SQLITE_OK;
901       *pbBadkey = 1;
902     }
903   }else{
904     *pbBadkey = 1;
905   }
906   return rc;
907 }
908 
909 /*
910 ** Load the contents of the %_config table into memory.
911 */
sqlite3Fts5ConfigLoad(Fts5Config * pConfig,int iCookie)912 int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
913   const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
914   char *zSql;
915   sqlite3_stmt *p = 0;
916   int rc = SQLITE_OK;
917   int iVersion = 0;
918 
919   /* Set default values */
920   pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
921   pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
922   pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE;
923   pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
924   pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE;
925 
926   zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
927   if( zSql ){
928     rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
929     sqlite3_free(zSql);
930   }
931 
932   assert( rc==SQLITE_OK || p==0 );
933   if( rc==SQLITE_OK ){
934     while( SQLITE_ROW==sqlite3_step(p) ){
935       const char *zK = (const char*)sqlite3_column_text(p, 0);
936       sqlite3_value *pVal = sqlite3_column_value(p, 1);
937       if( 0==sqlite3_stricmp(zK, "version") ){
938         iVersion = sqlite3_value_int(pVal);
939       }else{
940         int bDummy = 0;
941         sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
942       }
943     }
944     rc = sqlite3_finalize(p);
945   }
946 
947   if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){
948     rc = SQLITE_ERROR;
949     if( pConfig->pzErrmsg ){
950       assert( 0==*pConfig->pzErrmsg );
951       *pConfig->pzErrmsg = sqlite3_mprintf(
952           "invalid fts5 file format (found %d, expected %d) - run 'rebuild'",
953           iVersion, FTS5_CURRENT_VERSION
954       );
955     }
956   }
957 
958   if( rc==SQLITE_OK ){
959     pConfig->iCookie = iCookie;
960   }
961   return rc;
962 }
963