1 /*
2 ** 2008 Nov 28
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This module contains code that implements a parser for fts3 query strings
14 ** (the right-hand argument to the MATCH operator). Because the supported
15 ** syntax is relatively simple, the whole tokenizer/parser system is
16 ** hand-coded.
17 */
18 #include "fts3Int.h"
19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
20 
21 /*
22 ** By default, this module parses the legacy syntax that has been
23 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
24 ** is defined, then it uses the new syntax. The differences between
25 ** the new and the old syntaxes are:
26 **
27 **  a) The new syntax supports parenthesis. The old does not.
28 **
29 **  b) The new syntax supports the AND and NOT operators. The old does not.
30 **
31 **  c) The old syntax supports the "-" token qualifier. This is not
32 **     supported by the new syntax (it is replaced by the NOT operator).
33 **
34 **  d) When using the old syntax, the OR operator has a greater precedence
35 **     than an implicit AND. When using the new, both implicity and explicit
36 **     AND operators have a higher precedence than OR.
37 **
38 ** If compiled with SQLITE_TEST defined, then this module exports the
39 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
40 ** to zero causes the module to use the old syntax. If it is set to
41 ** non-zero the new syntax is activated. This is so both syntaxes can
42 ** be tested using a single build of testfixture.
43 **
44 ** The following describes the syntax supported by the fts3 MATCH
45 ** operator in a similar format to that used by the lemon parser
46 ** generator. This module does not use actually lemon, it uses a
47 ** custom parser.
48 **
49 **   query ::= andexpr (OR andexpr)*.
50 **
51 **   andexpr ::= notexpr (AND? notexpr)*.
52 **
53 **   notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
54 **   notexpr ::= LP query RP.
55 **
56 **   nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
57 **
58 **   distance_opt ::= .
59 **   distance_opt ::= / INTEGER.
60 **
61 **   phrase ::= TOKEN.
62 **   phrase ::= COLUMN:TOKEN.
63 **   phrase ::= "TOKEN TOKEN TOKEN...".
64 */
65 
66 #ifdef SQLITE_TEST
67 int sqlite3_fts3_enable_parentheses = 0;
68 #else
69 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
70 #  define sqlite3_fts3_enable_parentheses 1
71 # else
72 #  define sqlite3_fts3_enable_parentheses 0
73 # endif
74 #endif
75 
76 /*
77 ** Default span for NEAR operators.
78 */
79 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
80 
81 #include <string.h>
82 #include <assert.h>
83 
84 /*
85 ** isNot:
86 **   This variable is used by function getNextNode(). When getNextNode() is
87 **   called, it sets ParseContext.isNot to true if the 'next node' is a
88 **   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
89 **   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
90 **   zero.
91 */
92 typedef struct ParseContext ParseContext;
93 struct ParseContext {
94   sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
95   int iLangid;                        /* Language id used with tokenizer */
96   const char **azCol;                 /* Array of column names for fts3 table */
97   int bFts4;                          /* True to allow FTS4-only syntax */
98   int nCol;                           /* Number of entries in azCol[] */
99   int iDefaultCol;                    /* Default column to query */
100   int isNot;                          /* True if getNextNode() sees a unary - */
101   sqlite3_context *pCtx;              /* Write error message here */
102   int nNest;                          /* Number of nested brackets */
103 };
104 
105 /*
106 ** This function is equivalent to the standard isspace() function.
107 **
108 ** The standard isspace() can be awkward to use safely, because although it
109 ** is defined to accept an argument of type int, its behavior when passed
110 ** an integer that falls outside of the range of the unsigned char type
111 ** is undefined (and sometimes, "undefined" means segfault). This wrapper
112 ** is defined to accept an argument of type char, and always returns 0 for
113 ** any values that fall outside of the range of the unsigned char type (i.e.
114 ** negative values).
115 */
fts3isspace(char c)116 static int fts3isspace(char c){
117   return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
118 }
119 
120 /*
121 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
122 ** zero the memory before returning a pointer to it. If unsuccessful,
123 ** return NULL.
124 */
fts3MallocZero(sqlite3_int64 nByte)125 static void *fts3MallocZero(sqlite3_int64 nByte){
126   void *pRet = sqlite3_malloc64(nByte);
127   if( pRet ) memset(pRet, 0, nByte);
128   return pRet;
129 }
130 
sqlite3Fts3OpenTokenizer(sqlite3_tokenizer * pTokenizer,int iLangid,const char * z,int n,sqlite3_tokenizer_cursor ** ppCsr)131 int sqlite3Fts3OpenTokenizer(
132   sqlite3_tokenizer *pTokenizer,
133   int iLangid,
134   const char *z,
135   int n,
136   sqlite3_tokenizer_cursor **ppCsr
137 ){
138   sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
139   sqlite3_tokenizer_cursor *pCsr = 0;
140   int rc;
141 
142   rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
143   assert( rc==SQLITE_OK || pCsr==0 );
144   if( rc==SQLITE_OK ){
145     pCsr->pTokenizer = pTokenizer;
146     if( pModule->iVersion>=1 ){
147       rc = pModule->xLanguageid(pCsr, iLangid);
148       if( rc!=SQLITE_OK ){
149         pModule->xClose(pCsr);
150         pCsr = 0;
151       }
152     }
153   }
154   *ppCsr = pCsr;
155   return rc;
156 }
157 
158 /*
159 ** Function getNextNode(), which is called by fts3ExprParse(), may itself
160 ** call fts3ExprParse(). So this forward declaration is required.
161 */
162 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
163 
164 /*
165 ** Extract the next token from buffer z (length n) using the tokenizer
166 ** and other information (column names etc.) in pParse. Create an Fts3Expr
167 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
168 ** single token and set *ppExpr to point to it. If the end of the buffer is
169 ** reached before a token is found, set *ppExpr to zero. It is the
170 ** responsibility of the caller to eventually deallocate the allocated
171 ** Fts3Expr structure (if any) by passing it to sqlite3_free().
172 **
173 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
174 ** fails.
175 */
getNextToken(ParseContext * pParse,int iCol,const char * z,int n,Fts3Expr ** ppExpr,int * pnConsumed)176 static int getNextToken(
177   ParseContext *pParse,                   /* fts3 query parse context */
178   int iCol,                               /* Value for Fts3Phrase.iColumn */
179   const char *z, int n,                   /* Input string */
180   Fts3Expr **ppExpr,                      /* OUT: expression */
181   int *pnConsumed                         /* OUT: Number of bytes consumed */
182 ){
183   sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
184   sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
185   int rc;
186   sqlite3_tokenizer_cursor *pCursor;
187   Fts3Expr *pRet = 0;
188   int i = 0;
189 
190   /* Set variable i to the maximum number of bytes of input to tokenize. */
191   for(i=0; i<n; i++){
192     if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
193     if( z[i]=='"' ) break;
194   }
195 
196   *pnConsumed = i;
197   rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
198   if( rc==SQLITE_OK ){
199     const char *zToken;
200     int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
201     sqlite3_int64 nByte;                    /* total space to allocate */
202 
203     rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
204     if( rc==SQLITE_OK ){
205       nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
206       pRet = (Fts3Expr *)fts3MallocZero(nByte);
207       if( !pRet ){
208         rc = SQLITE_NOMEM;
209       }else{
210         pRet->eType = FTSQUERY_PHRASE;
211         pRet->pPhrase = (Fts3Phrase *)&pRet[1];
212         pRet->pPhrase->nToken = 1;
213         pRet->pPhrase->iColumn = iCol;
214         pRet->pPhrase->aToken[0].n = nToken;
215         pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
216         memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
217 
218         if( iEnd<n && z[iEnd]=='*' ){
219           pRet->pPhrase->aToken[0].isPrefix = 1;
220           iEnd++;
221         }
222 
223         while( 1 ){
224           if( !sqlite3_fts3_enable_parentheses
225            && iStart>0 && z[iStart-1]=='-'
226           ){
227             pParse->isNot = 1;
228             iStart--;
229           }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
230             pRet->pPhrase->aToken[0].bFirst = 1;
231             iStart--;
232           }else{
233             break;
234           }
235         }
236 
237       }
238       *pnConsumed = iEnd;
239     }else if( i && rc==SQLITE_DONE ){
240       rc = SQLITE_OK;
241     }
242 
243     pModule->xClose(pCursor);
244   }
245 
246   *ppExpr = pRet;
247   return rc;
248 }
249 
250 
251 /*
252 ** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
253 ** then free the old allocation.
254 */
fts3ReallocOrFree(void * pOrig,sqlite3_int64 nNew)255 static void *fts3ReallocOrFree(void *pOrig, sqlite3_int64 nNew){
256   void *pRet = sqlite3_realloc64(pOrig, nNew);
257   if( !pRet ){
258     sqlite3_free(pOrig);
259   }
260   return pRet;
261 }
262 
263 /*
264 ** Buffer zInput, length nInput, contains the contents of a quoted string
265 ** that appeared as part of an fts3 query expression. Neither quote character
266 ** is included in the buffer. This function attempts to tokenize the entire
267 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
268 ** containing the results.
269 **
270 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
271 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
272 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
273 ** to 0.
274 */
getNextString(ParseContext * pParse,const char * zInput,int nInput,Fts3Expr ** ppExpr)275 static int getNextString(
276   ParseContext *pParse,                   /* fts3 query parse context */
277   const char *zInput, int nInput,         /* Input string */
278   Fts3Expr **ppExpr                       /* OUT: expression */
279 ){
280   sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
281   sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
282   int rc;
283   Fts3Expr *p = 0;
284   sqlite3_tokenizer_cursor *pCursor = 0;
285   char *zTemp = 0;
286   int nTemp = 0;
287 
288   const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
289   int nToken = 0;
290 
291   /* The final Fts3Expr data structure, including the Fts3Phrase,
292   ** Fts3PhraseToken structures token buffers are all stored as a single
293   ** allocation so that the expression can be freed with a single call to
294   ** sqlite3_free(). Setting this up requires a two pass approach.
295   **
296   ** The first pass, in the block below, uses a tokenizer cursor to iterate
297   ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
298   ** to assemble data in two dynamic buffers:
299   **
300   **   Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
301   **             structure, followed by the array of Fts3PhraseToken
302   **             structures. This pass only populates the Fts3PhraseToken array.
303   **
304   **   Buffer zTemp: Contains copies of all tokens.
305   **
306   ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
307   ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
308   ** structures.
309   */
310   rc = sqlite3Fts3OpenTokenizer(
311       pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
312   if( rc==SQLITE_OK ){
313     int ii;
314     for(ii=0; rc==SQLITE_OK; ii++){
315       const char *zByte;
316       int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
317       rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
318       if( rc==SQLITE_OK ){
319         Fts3PhraseToken *pToken;
320 
321         p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
322         if( !p ) goto no_mem;
323 
324         zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
325         if( !zTemp ) goto no_mem;
326 
327         assert( nToken==ii );
328         pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
329         memset(pToken, 0, sizeof(Fts3PhraseToken));
330 
331         memcpy(&zTemp[nTemp], zByte, nByte);
332         nTemp += nByte;
333 
334         pToken->n = nByte;
335         pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
336         pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
337         nToken = ii+1;
338       }
339     }
340 
341     pModule->xClose(pCursor);
342     pCursor = 0;
343   }
344 
345   if( rc==SQLITE_DONE ){
346     int jj;
347     char *zBuf = 0;
348 
349     p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
350     if( !p ) goto no_mem;
351     memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
352     p->eType = FTSQUERY_PHRASE;
353     p->pPhrase = (Fts3Phrase *)&p[1];
354     p->pPhrase->iColumn = pParse->iDefaultCol;
355     p->pPhrase->nToken = nToken;
356 
357     zBuf = (char *)&p->pPhrase->aToken[nToken];
358     if( zTemp ){
359       memcpy(zBuf, zTemp, nTemp);
360       sqlite3_free(zTemp);
361     }else{
362       assert( nTemp==0 );
363     }
364 
365     for(jj=0; jj<p->pPhrase->nToken; jj++){
366       p->pPhrase->aToken[jj].z = zBuf;
367       zBuf += p->pPhrase->aToken[jj].n;
368     }
369     rc = SQLITE_OK;
370   }
371 
372   *ppExpr = p;
373   return rc;
374 no_mem:
375 
376   if( pCursor ){
377     pModule->xClose(pCursor);
378   }
379   sqlite3_free(zTemp);
380   sqlite3_free(p);
381   *ppExpr = 0;
382   return SQLITE_NOMEM;
383 }
384 
385 /*
386 ** The output variable *ppExpr is populated with an allocated Fts3Expr
387 ** structure, or set to 0 if the end of the input buffer is reached.
388 **
389 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
390 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
391 ** If SQLITE_ERROR is returned, pContext is populated with an error message.
392 */
getNextNode(ParseContext * pParse,const char * z,int n,Fts3Expr ** ppExpr,int * pnConsumed)393 static int getNextNode(
394   ParseContext *pParse,                   /* fts3 query parse context */
395   const char *z, int n,                   /* Input string */
396   Fts3Expr **ppExpr,                      /* OUT: expression */
397   int *pnConsumed                         /* OUT: Number of bytes consumed */
398 ){
399   static const struct Fts3Keyword {
400     char *z;                              /* Keyword text */
401     unsigned char n;                      /* Length of the keyword */
402     unsigned char parenOnly;              /* Only valid in paren mode */
403     unsigned char eType;                  /* Keyword code */
404   } aKeyword[] = {
405     { "OR" ,  2, 0, FTSQUERY_OR   },
406     { "AND",  3, 1, FTSQUERY_AND  },
407     { "NOT",  3, 1, FTSQUERY_NOT  },
408     { "NEAR", 4, 0, FTSQUERY_NEAR }
409   };
410   int ii;
411   int iCol;
412   int iColLen;
413   int rc;
414   Fts3Expr *pRet = 0;
415 
416   const char *zInput = z;
417   int nInput = n;
418 
419   pParse->isNot = 0;
420 
421   /* Skip over any whitespace before checking for a keyword, an open or
422   ** close bracket, or a quoted string.
423   */
424   while( nInput>0 && fts3isspace(*zInput) ){
425     nInput--;
426     zInput++;
427   }
428   if( nInput==0 ){
429     return SQLITE_DONE;
430   }
431 
432   /* See if we are dealing with a keyword. */
433   for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
434     const struct Fts3Keyword *pKey = &aKeyword[ii];
435 
436     if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
437       continue;
438     }
439 
440     if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
441       int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
442       int nKey = pKey->n;
443       char cNext;
444 
445       /* If this is a "NEAR" keyword, check for an explicit nearness. */
446       if( pKey->eType==FTSQUERY_NEAR ){
447         assert( nKey==4 );
448         if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
449           nKey += 1+sqlite3Fts3ReadInt(&zInput[nKey+1], &nNear);
450         }
451       }
452 
453       /* At this point this is probably a keyword. But for that to be true,
454       ** the next byte must contain either whitespace, an open or close
455       ** parenthesis, a quote character, or EOF.
456       */
457       cNext = zInput[nKey];
458       if( fts3isspace(cNext)
459        || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
460       ){
461         pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
462         if( !pRet ){
463           return SQLITE_NOMEM;
464         }
465         pRet->eType = pKey->eType;
466         pRet->nNear = nNear;
467         *ppExpr = pRet;
468         *pnConsumed = (int)((zInput - z) + nKey);
469         return SQLITE_OK;
470       }
471 
472       /* Turns out that wasn't a keyword after all. This happens if the
473       ** user has supplied a token such as "ORacle". Continue.
474       */
475     }
476   }
477 
478   /* See if we are dealing with a quoted phrase. If this is the case, then
479   ** search for the closing quote and pass the whole string to getNextString()
480   ** for processing. This is easy to do, as fts3 has no syntax for escaping
481   ** a quote character embedded in a string.
482   */
483   if( *zInput=='"' ){
484     for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
485     *pnConsumed = (int)((zInput - z) + ii + 1);
486     if( ii==nInput ){
487       return SQLITE_ERROR;
488     }
489     return getNextString(pParse, &zInput[1], ii-1, ppExpr);
490   }
491 
492   if( sqlite3_fts3_enable_parentheses ){
493     if( *zInput=='(' ){
494       int nConsumed = 0;
495       pParse->nNest++;
496 #if !defined(SQLITE_MAX_EXPR_DEPTH)
497       if( pParse->nNest>1000 ) return SQLITE_ERROR;
498 #elif SQLITE_MAX_EXPR_DEPTH>0
499       if( pParse->nNest>SQLITE_MAX_EXPR_DEPTH ) return SQLITE_ERROR;
500 #endif
501       rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed);
502       *pnConsumed = (int)(zInput - z) + 1 + nConsumed;
503       return rc;
504     }else if( *zInput==')' ){
505       pParse->nNest--;
506       *pnConsumed = (int)((zInput - z) + 1);
507       *ppExpr = 0;
508       return SQLITE_DONE;
509     }
510   }
511 
512   /* If control flows to this point, this must be a regular token, or
513   ** the end of the input. Read a regular token using the sqlite3_tokenizer
514   ** interface. Before doing so, figure out if there is an explicit
515   ** column specifier for the token.
516   **
517   ** TODO: Strangely, it is not possible to associate a column specifier
518   ** with a quoted phrase, only with a single token. Not sure if this was
519   ** an implementation artifact or an intentional decision when fts3 was
520   ** first implemented. Whichever it was, this module duplicates the
521   ** limitation.
522   */
523   iCol = pParse->iDefaultCol;
524   iColLen = 0;
525   for(ii=0; ii<pParse->nCol; ii++){
526     const char *zStr = pParse->azCol[ii];
527     int nStr = (int)strlen(zStr);
528     if( nInput>nStr && zInput[nStr]==':'
529      && sqlite3_strnicmp(zStr, zInput, nStr)==0
530     ){
531       iCol = ii;
532       iColLen = (int)((zInput - z) + nStr + 1);
533       break;
534     }
535   }
536   rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
537   *pnConsumed += iColLen;
538   return rc;
539 }
540 
541 /*
542 ** The argument is an Fts3Expr structure for a binary operator (any type
543 ** except an FTSQUERY_PHRASE). Return an integer value representing the
544 ** precedence of the operator. Lower values have a higher precedence (i.e.
545 ** group more tightly). For example, in the C language, the == operator
546 ** groups more tightly than ||, and would therefore have a higher precedence.
547 **
548 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
549 ** is defined), the order of the operators in precedence from highest to
550 ** lowest is:
551 **
552 **   NEAR
553 **   NOT
554 **   AND (including implicit ANDs)
555 **   OR
556 **
557 ** Note that when using the old query syntax, the OR operator has a higher
558 ** precedence than the AND operator.
559 */
opPrecedence(Fts3Expr * p)560 static int opPrecedence(Fts3Expr *p){
561   assert( p->eType!=FTSQUERY_PHRASE );
562   if( sqlite3_fts3_enable_parentheses ){
563     return p->eType;
564   }else if( p->eType==FTSQUERY_NEAR ){
565     return 1;
566   }else if( p->eType==FTSQUERY_OR ){
567     return 2;
568   }
569   assert( p->eType==FTSQUERY_AND );
570   return 3;
571 }
572 
573 /*
574 ** Argument ppHead contains a pointer to the current head of a query
575 ** expression tree being parsed. pPrev is the expression node most recently
576 ** inserted into the tree. This function adds pNew, which is always a binary
577 ** operator node, into the expression tree based on the relative precedence
578 ** of pNew and the existing nodes of the tree. This may result in the head
579 ** of the tree changing, in which case *ppHead is set to the new root node.
580 */
insertBinaryOperator(Fts3Expr ** ppHead,Fts3Expr * pPrev,Fts3Expr * pNew)581 static void insertBinaryOperator(
582   Fts3Expr **ppHead,       /* Pointer to the root node of a tree */
583   Fts3Expr *pPrev,         /* Node most recently inserted into the tree */
584   Fts3Expr *pNew           /* New binary node to insert into expression tree */
585 ){
586   Fts3Expr *pSplit = pPrev;
587   while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
588     pSplit = pSplit->pParent;
589   }
590 
591   if( pSplit->pParent ){
592     assert( pSplit->pParent->pRight==pSplit );
593     pSplit->pParent->pRight = pNew;
594     pNew->pParent = pSplit->pParent;
595   }else{
596     *ppHead = pNew;
597   }
598   pNew->pLeft = pSplit;
599   pSplit->pParent = pNew;
600 }
601 
602 /*
603 ** Parse the fts3 query expression found in buffer z, length n. This function
604 ** returns either when the end of the buffer is reached or an unmatched
605 ** closing bracket - ')' - is encountered.
606 **
607 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
608 ** parsed form of the expression and *pnConsumed is set to the number of
609 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
610 ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
611 */
fts3ExprParse(ParseContext * pParse,const char * z,int n,Fts3Expr ** ppExpr,int * pnConsumed)612 static int fts3ExprParse(
613   ParseContext *pParse,                   /* fts3 query parse context */
614   const char *z, int n,                   /* Text of MATCH query */
615   Fts3Expr **ppExpr,                      /* OUT: Parsed query structure */
616   int *pnConsumed                         /* OUT: Number of bytes consumed */
617 ){
618   Fts3Expr *pRet = 0;
619   Fts3Expr *pPrev = 0;
620   Fts3Expr *pNotBranch = 0;               /* Only used in legacy parse mode */
621   int nIn = n;
622   const char *zIn = z;
623   int rc = SQLITE_OK;
624   int isRequirePhrase = 1;
625 
626   while( rc==SQLITE_OK ){
627     Fts3Expr *p = 0;
628     int nByte = 0;
629 
630     rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
631     assert( nByte>0 || (rc!=SQLITE_OK && p==0) );
632     if( rc==SQLITE_OK ){
633       if( p ){
634         int isPhrase;
635 
636         if( !sqlite3_fts3_enable_parentheses
637             && p->eType==FTSQUERY_PHRASE && pParse->isNot
638         ){
639           /* Create an implicit NOT operator. */
640           Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
641           if( !pNot ){
642             sqlite3Fts3ExprFree(p);
643             rc = SQLITE_NOMEM;
644             goto exprparse_out;
645           }
646           pNot->eType = FTSQUERY_NOT;
647           pNot->pRight = p;
648           p->pParent = pNot;
649           if( pNotBranch ){
650             pNot->pLeft = pNotBranch;
651             pNotBranch->pParent = pNot;
652           }
653           pNotBranch = pNot;
654           p = pPrev;
655         }else{
656           int eType = p->eType;
657           isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
658 
659           /* The isRequirePhrase variable is set to true if a phrase or
660           ** an expression contained in parenthesis is required. If a
661           ** binary operator (AND, OR, NOT or NEAR) is encounted when
662           ** isRequirePhrase is set, this is a syntax error.
663           */
664           if( !isPhrase && isRequirePhrase ){
665             sqlite3Fts3ExprFree(p);
666             rc = SQLITE_ERROR;
667             goto exprparse_out;
668           }
669 
670           if( isPhrase && !isRequirePhrase ){
671             /* Insert an implicit AND operator. */
672             Fts3Expr *pAnd;
673             assert( pRet && pPrev );
674             pAnd = fts3MallocZero(sizeof(Fts3Expr));
675             if( !pAnd ){
676               sqlite3Fts3ExprFree(p);
677               rc = SQLITE_NOMEM;
678               goto exprparse_out;
679             }
680             pAnd->eType = FTSQUERY_AND;
681             insertBinaryOperator(&pRet, pPrev, pAnd);
682             pPrev = pAnd;
683           }
684 
685           /* This test catches attempts to make either operand of a NEAR
686            ** operator something other than a phrase. For example, either of
687            ** the following:
688            **
689            **    (bracketed expression) NEAR phrase
690            **    phrase NEAR (bracketed expression)
691            **
692            ** Return an error in either case.
693            */
694           if( pPrev && (
695             (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
696          || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
697           )){
698             sqlite3Fts3ExprFree(p);
699             rc = SQLITE_ERROR;
700             goto exprparse_out;
701           }
702 
703           if( isPhrase ){
704             if( pRet ){
705               assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
706               pPrev->pRight = p;
707               p->pParent = pPrev;
708             }else{
709               pRet = p;
710             }
711           }else{
712             insertBinaryOperator(&pRet, pPrev, p);
713           }
714           isRequirePhrase = !isPhrase;
715         }
716         pPrev = p;
717       }
718       assert( nByte>0 );
719     }
720     assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
721     nIn -= nByte;
722     zIn += nByte;
723   }
724 
725   if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
726     rc = SQLITE_ERROR;
727   }
728 
729   if( rc==SQLITE_DONE ){
730     rc = SQLITE_OK;
731     if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
732       if( !pRet ){
733         rc = SQLITE_ERROR;
734       }else{
735         Fts3Expr *pIter = pNotBranch;
736         while( pIter->pLeft ){
737           pIter = pIter->pLeft;
738         }
739         pIter->pLeft = pRet;
740         pRet->pParent = pIter;
741         pRet = pNotBranch;
742       }
743     }
744   }
745   *pnConsumed = n - nIn;
746 
747 exprparse_out:
748   if( rc!=SQLITE_OK ){
749     sqlite3Fts3ExprFree(pRet);
750     sqlite3Fts3ExprFree(pNotBranch);
751     pRet = 0;
752   }
753   *ppExpr = pRet;
754   return rc;
755 }
756 
757 /*
758 ** Return SQLITE_ERROR if the maximum depth of the expression tree passed
759 ** as the only argument is more than nMaxDepth.
760 */
fts3ExprCheckDepth(Fts3Expr * p,int nMaxDepth)761 static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
762   int rc = SQLITE_OK;
763   if( p ){
764     if( nMaxDepth<0 ){
765       rc = SQLITE_TOOBIG;
766     }else{
767       rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
768       if( rc==SQLITE_OK ){
769         rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
770       }
771     }
772   }
773   return rc;
774 }
775 
776 /*
777 ** This function attempts to transform the expression tree at (*pp) to
778 ** an equivalent but more balanced form. The tree is modified in place.
779 ** If successful, SQLITE_OK is returned and (*pp) set to point to the
780 ** new root expression node.
781 **
782 ** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
783 **
784 ** Otherwise, if an error occurs, an SQLite error code is returned and
785 ** expression (*pp) freed.
786 */
fts3ExprBalance(Fts3Expr ** pp,int nMaxDepth)787 static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
788   int rc = SQLITE_OK;             /* Return code */
789   Fts3Expr *pRoot = *pp;          /* Initial root node */
790   Fts3Expr *pFree = 0;            /* List of free nodes. Linked by pParent. */
791   int eType = pRoot->eType;       /* Type of node in this tree */
792 
793   if( nMaxDepth==0 ){
794     rc = SQLITE_ERROR;
795   }
796 
797   if( rc==SQLITE_OK ){
798     if( (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
799       Fts3Expr **apLeaf;
800       apLeaf = (Fts3Expr **)sqlite3_malloc64(sizeof(Fts3Expr *) * nMaxDepth);
801       if( 0==apLeaf ){
802         rc = SQLITE_NOMEM;
803       }else{
804         memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
805       }
806 
807       if( rc==SQLITE_OK ){
808         int i;
809         Fts3Expr *p;
810 
811         /* Set $p to point to the left-most leaf in the tree of eType nodes. */
812         for(p=pRoot; p->eType==eType; p=p->pLeft){
813           assert( p->pParent==0 || p->pParent->pLeft==p );
814           assert( p->pLeft && p->pRight );
815         }
816 
817         /* This loop runs once for each leaf in the tree of eType nodes. */
818         while( 1 ){
819           int iLvl;
820           Fts3Expr *pParent = p->pParent;     /* Current parent of p */
821 
822           assert( pParent==0 || pParent->pLeft==p );
823           p->pParent = 0;
824           if( pParent ){
825             pParent->pLeft = 0;
826           }else{
827             pRoot = 0;
828           }
829           rc = fts3ExprBalance(&p, nMaxDepth-1);
830           if( rc!=SQLITE_OK ) break;
831 
832           for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
833             if( apLeaf[iLvl]==0 ){
834               apLeaf[iLvl] = p;
835               p = 0;
836             }else{
837               assert( pFree );
838               pFree->pLeft = apLeaf[iLvl];
839               pFree->pRight = p;
840               pFree->pLeft->pParent = pFree;
841               pFree->pRight->pParent = pFree;
842 
843               p = pFree;
844               pFree = pFree->pParent;
845               p->pParent = 0;
846               apLeaf[iLvl] = 0;
847             }
848           }
849           if( p ){
850             sqlite3Fts3ExprFree(p);
851             rc = SQLITE_TOOBIG;
852             break;
853           }
854 
855           /* If that was the last leaf node, break out of the loop */
856           if( pParent==0 ) break;
857 
858           /* Set $p to point to the next leaf in the tree of eType nodes */
859           for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
860 
861           /* Remove pParent from the original tree. */
862           assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
863           pParent->pRight->pParent = pParent->pParent;
864           if( pParent->pParent ){
865             pParent->pParent->pLeft = pParent->pRight;
866           }else{
867             assert( pParent==pRoot );
868             pRoot = pParent->pRight;
869           }
870 
871           /* Link pParent into the free node list. It will be used as an
872           ** internal node of the new tree.  */
873           pParent->pParent = pFree;
874           pFree = pParent;
875         }
876 
877         if( rc==SQLITE_OK ){
878           p = 0;
879           for(i=0; i<nMaxDepth; i++){
880             if( apLeaf[i] ){
881               if( p==0 ){
882                 p = apLeaf[i];
883                 p->pParent = 0;
884               }else{
885                 assert( pFree!=0 );
886                 pFree->pRight = p;
887                 pFree->pLeft = apLeaf[i];
888                 pFree->pLeft->pParent = pFree;
889                 pFree->pRight->pParent = pFree;
890 
891                 p = pFree;
892                 pFree = pFree->pParent;
893                 p->pParent = 0;
894               }
895             }
896           }
897           pRoot = p;
898         }else{
899           /* An error occurred. Delete the contents of the apLeaf[] array
900           ** and pFree list. Everything else is cleaned up by the call to
901           ** sqlite3Fts3ExprFree(pRoot) below.  */
902           Fts3Expr *pDel;
903           for(i=0; i<nMaxDepth; i++){
904             sqlite3Fts3ExprFree(apLeaf[i]);
905           }
906           while( (pDel=pFree)!=0 ){
907             pFree = pDel->pParent;
908             sqlite3_free(pDel);
909           }
910         }
911 
912         assert( pFree==0 );
913         sqlite3_free( apLeaf );
914       }
915     }else if( eType==FTSQUERY_NOT ){
916       Fts3Expr *pLeft = pRoot->pLeft;
917       Fts3Expr *pRight = pRoot->pRight;
918 
919       pRoot->pLeft = 0;
920       pRoot->pRight = 0;
921       pLeft->pParent = 0;
922       pRight->pParent = 0;
923 
924       rc = fts3ExprBalance(&pLeft, nMaxDepth-1);
925       if( rc==SQLITE_OK ){
926         rc = fts3ExprBalance(&pRight, nMaxDepth-1);
927       }
928 
929       if( rc!=SQLITE_OK ){
930         sqlite3Fts3ExprFree(pRight);
931         sqlite3Fts3ExprFree(pLeft);
932       }else{
933         assert( pLeft && pRight );
934         pRoot->pLeft = pLeft;
935         pLeft->pParent = pRoot;
936         pRoot->pRight = pRight;
937         pRight->pParent = pRoot;
938       }
939     }
940   }
941 
942   if( rc!=SQLITE_OK ){
943     sqlite3Fts3ExprFree(pRoot);
944     pRoot = 0;
945   }
946   *pp = pRoot;
947   return rc;
948 }
949 
950 /*
951 ** This function is similar to sqlite3Fts3ExprParse(), with the following
952 ** differences:
953 **
954 **   1. It does not do expression rebalancing.
955 **   2. It does not check that the expression does not exceed the
956 **      maximum allowable depth.
957 **   3. Even if it fails, *ppExpr may still be set to point to an
958 **      expression tree. It should be deleted using sqlite3Fts3ExprFree()
959 **      in this case.
960 */
fts3ExprParseUnbalanced(sqlite3_tokenizer * pTokenizer,int iLangid,char ** azCol,int bFts4,int nCol,int iDefaultCol,const char * z,int n,Fts3Expr ** ppExpr)961 static int fts3ExprParseUnbalanced(
962   sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
963   int iLangid,                        /* Language id for tokenizer */
964   char **azCol,                       /* Array of column names for fts3 table */
965   int bFts4,                          /* True to allow FTS4-only syntax */
966   int nCol,                           /* Number of entries in azCol[] */
967   int iDefaultCol,                    /* Default column to query */
968   const char *z, int n,               /* Text of MATCH query */
969   Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
970 ){
971   int nParsed;
972   int rc;
973   ParseContext sParse;
974 
975   memset(&sParse, 0, sizeof(ParseContext));
976   sParse.pTokenizer = pTokenizer;
977   sParse.iLangid = iLangid;
978   sParse.azCol = (const char **)azCol;
979   sParse.nCol = nCol;
980   sParse.iDefaultCol = iDefaultCol;
981   sParse.bFts4 = bFts4;
982   if( z==0 ){
983     *ppExpr = 0;
984     return SQLITE_OK;
985   }
986   if( n<0 ){
987     n = (int)strlen(z);
988   }
989   rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
990   assert( rc==SQLITE_OK || *ppExpr==0 );
991 
992   /* Check for mismatched parenthesis */
993   if( rc==SQLITE_OK && sParse.nNest ){
994     rc = SQLITE_ERROR;
995   }
996 
997   return rc;
998 }
999 
1000 /*
1001 ** Parameters z and n contain a pointer to and length of a buffer containing
1002 ** an fts3 query expression, respectively. This function attempts to parse the
1003 ** query expression and create a tree of Fts3Expr structures representing the
1004 ** parsed expression. If successful, *ppExpr is set to point to the head
1005 ** of the parsed expression tree and SQLITE_OK is returned. If an error
1006 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
1007 ** error) is returned and *ppExpr is set to 0.
1008 **
1009 ** If parameter n is a negative number, then z is assumed to point to a
1010 ** nul-terminated string and the length is determined using strlen().
1011 **
1012 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
1013 ** use to normalize query tokens while parsing the expression. The azCol[]
1014 ** array, which is assumed to contain nCol entries, should contain the names
1015 ** of each column in the target fts3 table, in order from left to right.
1016 ** Column names must be nul-terminated strings.
1017 **
1018 ** The iDefaultCol parameter should be passed the index of the table column
1019 ** that appears on the left-hand-side of the MATCH operator (the default
1020 ** column to match against for tokens for which a column name is not explicitly
1021 ** specified as part of the query string), or -1 if tokens may by default
1022 ** match any table column.
1023 */
sqlite3Fts3ExprParse(sqlite3_tokenizer * pTokenizer,int iLangid,char ** azCol,int bFts4,int nCol,int iDefaultCol,const char * z,int n,Fts3Expr ** ppExpr,char ** pzErr)1024 int sqlite3Fts3ExprParse(
1025   sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
1026   int iLangid,                        /* Language id for tokenizer */
1027   char **azCol,                       /* Array of column names for fts3 table */
1028   int bFts4,                          /* True to allow FTS4-only syntax */
1029   int nCol,                           /* Number of entries in azCol[] */
1030   int iDefaultCol,                    /* Default column to query */
1031   const char *z, int n,               /* Text of MATCH query */
1032   Fts3Expr **ppExpr,                  /* OUT: Parsed query structure */
1033   char **pzErr                        /* OUT: Error message (sqlite3_malloc) */
1034 ){
1035   int rc = fts3ExprParseUnbalanced(
1036       pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
1037   );
1038 
1039   /* Rebalance the expression. And check that its depth does not exceed
1040   ** SQLITE_FTS3_MAX_EXPR_DEPTH.  */
1041   if( rc==SQLITE_OK && *ppExpr ){
1042     rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
1043     if( rc==SQLITE_OK ){
1044       rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
1045     }
1046   }
1047 
1048   if( rc!=SQLITE_OK ){
1049     sqlite3Fts3ExprFree(*ppExpr);
1050     *ppExpr = 0;
1051     if( rc==SQLITE_TOOBIG ){
1052       sqlite3Fts3ErrMsg(pzErr,
1053           "FTS expression tree is too large (maximum depth %d)",
1054           SQLITE_FTS3_MAX_EXPR_DEPTH
1055       );
1056       rc = SQLITE_ERROR;
1057     }else if( rc==SQLITE_ERROR ){
1058       sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z);
1059     }
1060   }
1061 
1062   return rc;
1063 }
1064 
1065 /*
1066 ** Free a single node of an expression tree.
1067 */
fts3FreeExprNode(Fts3Expr * p)1068 static void fts3FreeExprNode(Fts3Expr *p){
1069   assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
1070   sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
1071   sqlite3_free(p->aMI);
1072   sqlite3_free(p);
1073 }
1074 
1075 /*
1076 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
1077 **
1078 ** This function would be simpler if it recursively called itself. But
1079 ** that would mean passing a sufficiently large expression to ExprParse()
1080 ** could cause a stack overflow.
1081 */
sqlite3Fts3ExprFree(Fts3Expr * pDel)1082 void sqlite3Fts3ExprFree(Fts3Expr *pDel){
1083   Fts3Expr *p;
1084   assert( pDel==0 || pDel->pParent==0 );
1085   for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
1086     assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
1087   }
1088   while( p ){
1089     Fts3Expr *pParent = p->pParent;
1090     fts3FreeExprNode(p);
1091     if( pParent && p==pParent->pLeft && pParent->pRight ){
1092       p = pParent->pRight;
1093       while( p && (p->pLeft || p->pRight) ){
1094         assert( p==p->pParent->pRight || p==p->pParent->pLeft );
1095         p = (p->pLeft ? p->pLeft : p->pRight);
1096       }
1097     }else{
1098       p = pParent;
1099     }
1100   }
1101 }
1102 
1103 /****************************************************************************
1104 *****************************************************************************
1105 ** Everything after this point is just test code.
1106 */
1107 
1108 #ifdef SQLITE_TEST
1109 
1110 #include <stdio.h>
1111 
1112 /*
1113 ** Return a pointer to a buffer containing a text representation of the
1114 ** expression passed as the first argument. The buffer is obtained from
1115 ** sqlite3_malloc(). It is the responsibility of the caller to use
1116 ** sqlite3_free() to release the memory. If an OOM condition is encountered,
1117 ** NULL is returned.
1118 **
1119 ** If the second argument is not NULL, then its contents are prepended to
1120 ** the returned expression text and then freed using sqlite3_free().
1121 */
exprToString(Fts3Expr * pExpr,char * zBuf)1122 static char *exprToString(Fts3Expr *pExpr, char *zBuf){
1123   if( pExpr==0 ){
1124     return sqlite3_mprintf("");
1125   }
1126   switch( pExpr->eType ){
1127     case FTSQUERY_PHRASE: {
1128       Fts3Phrase *pPhrase = pExpr->pPhrase;
1129       int i;
1130       zBuf = sqlite3_mprintf(
1131           "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
1132       for(i=0; zBuf && i<pPhrase->nToken; i++){
1133         zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
1134             pPhrase->aToken[i].n, pPhrase->aToken[i].z,
1135             (pPhrase->aToken[i].isPrefix?"+":"")
1136         );
1137       }
1138       return zBuf;
1139     }
1140 
1141     case FTSQUERY_NEAR:
1142       zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
1143       break;
1144     case FTSQUERY_NOT:
1145       zBuf = sqlite3_mprintf("%zNOT ", zBuf);
1146       break;
1147     case FTSQUERY_AND:
1148       zBuf = sqlite3_mprintf("%zAND ", zBuf);
1149       break;
1150     case FTSQUERY_OR:
1151       zBuf = sqlite3_mprintf("%zOR ", zBuf);
1152       break;
1153   }
1154 
1155   if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
1156   if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
1157   if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
1158 
1159   if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
1160   if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
1161 
1162   return zBuf;
1163 }
1164 
1165 /*
1166 ** This is the implementation of a scalar SQL function used to test the
1167 ** expression parser. It should be called as follows:
1168 **
1169 **   fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
1170 **
1171 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
1172 ** to parse the query expression (see README.tokenizers). The second argument
1173 ** is the query expression to parse. Each subsequent argument is the name
1174 ** of a column of the fts3 table that the query expression may refer to.
1175 ** For example:
1176 **
1177 **   SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
1178 */
fts3ExprTestCommon(int bRebalance,sqlite3_context * context,int argc,sqlite3_value ** argv)1179 static void fts3ExprTestCommon(
1180   int bRebalance,
1181   sqlite3_context *context,
1182   int argc,
1183   sqlite3_value **argv
1184 ){
1185   sqlite3_tokenizer *pTokenizer = 0;
1186   int rc;
1187   char **azCol = 0;
1188   const char *zExpr;
1189   int nExpr;
1190   int nCol;
1191   int ii;
1192   Fts3Expr *pExpr;
1193   char *zBuf = 0;
1194   Fts3Hash *pHash = (Fts3Hash*)sqlite3_user_data(context);
1195   const char *zTokenizer = 0;
1196   char *zErr = 0;
1197 
1198   if( argc<3 ){
1199     sqlite3_result_error(context,
1200         "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
1201     );
1202     return;
1203   }
1204 
1205   zTokenizer = (const char*)sqlite3_value_text(argv[0]);
1206   rc = sqlite3Fts3InitTokenizer(pHash, zTokenizer, &pTokenizer, &zErr);
1207   if( rc!=SQLITE_OK ){
1208     if( rc==SQLITE_NOMEM ){
1209       sqlite3_result_error_nomem(context);
1210     }else{
1211       sqlite3_result_error(context, zErr, -1);
1212     }
1213     sqlite3_free(zErr);
1214     return;
1215   }
1216 
1217   zExpr = (const char *)sqlite3_value_text(argv[1]);
1218   nExpr = sqlite3_value_bytes(argv[1]);
1219   nCol = argc-2;
1220   azCol = (char **)sqlite3_malloc64(nCol*sizeof(char *));
1221   if( !azCol ){
1222     sqlite3_result_error_nomem(context);
1223     goto exprtest_out;
1224   }
1225   for(ii=0; ii<nCol; ii++){
1226     azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
1227   }
1228 
1229   if( bRebalance ){
1230     char *zDummy = 0;
1231     rc = sqlite3Fts3ExprParse(
1232         pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy
1233     );
1234     assert( rc==SQLITE_OK || pExpr==0 );
1235     sqlite3_free(zDummy);
1236   }else{
1237     rc = fts3ExprParseUnbalanced(
1238         pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
1239     );
1240   }
1241 
1242   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
1243     sqlite3Fts3ExprFree(pExpr);
1244     sqlite3_result_error(context, "Error parsing expression", -1);
1245   }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
1246     sqlite3_result_error_nomem(context);
1247   }else{
1248     sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
1249     sqlite3_free(zBuf);
1250   }
1251 
1252   sqlite3Fts3ExprFree(pExpr);
1253 
1254 exprtest_out:
1255   if( pTokenizer ){
1256     rc = pTokenizer->pModule->xDestroy(pTokenizer);
1257   }
1258   sqlite3_free(azCol);
1259 }
1260 
fts3ExprTest(sqlite3_context * context,int argc,sqlite3_value ** argv)1261 static void fts3ExprTest(
1262   sqlite3_context *context,
1263   int argc,
1264   sqlite3_value **argv
1265 ){
1266   fts3ExprTestCommon(0, context, argc, argv);
1267 }
fts3ExprTestRebalance(sqlite3_context * context,int argc,sqlite3_value ** argv)1268 static void fts3ExprTestRebalance(
1269   sqlite3_context *context,
1270   int argc,
1271   sqlite3_value **argv
1272 ){
1273   fts3ExprTestCommon(1, context, argc, argv);
1274 }
1275 
1276 /*
1277 ** Register the query expression parser test function fts3_exprtest()
1278 ** with database connection db.
1279 */
sqlite3Fts3ExprInitTestInterface(sqlite3 * db,Fts3Hash * pHash)1280 int sqlite3Fts3ExprInitTestInterface(sqlite3 *db, Fts3Hash *pHash){
1281   int rc = sqlite3_create_function(
1282       db, "fts3_exprtest", -1, SQLITE_UTF8, (void*)pHash, fts3ExprTest, 0, 0
1283   );
1284   if( rc==SQLITE_OK ){
1285     rc = sqlite3_create_function(db, "fts3_exprtest_rebalance",
1286         -1, SQLITE_UTF8, (void*)pHash, fts3ExprTestRebalance, 0, 0
1287     );
1288   }
1289   return rc;
1290 }
1291 
1292 #endif
1293 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
1294