1 /*
2 ** 2009 Oct 23
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 */
13 
14 #include "fts3Int.h"
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
16 
17 #include <string.h>
18 #include <assert.h>
19 
20 /*
21 ** Characters that may appear in the second argument to matchinfo().
22 */
23 #define FTS3_MATCHINFO_NPHRASE   'p'        /* 1 value */
24 #define FTS3_MATCHINFO_NCOL      'c'        /* 1 value */
25 #define FTS3_MATCHINFO_NDOC      'n'        /* 1 value */
26 #define FTS3_MATCHINFO_AVGLENGTH 'a'        /* nCol values */
27 #define FTS3_MATCHINFO_LENGTH    'l'        /* nCol values */
28 #define FTS3_MATCHINFO_LCS       's'        /* nCol values */
29 #define FTS3_MATCHINFO_HITS      'x'        /* 3*nCol*nPhrase values */
30 #define FTS3_MATCHINFO_LHITS     'y'        /* nCol*nPhrase values */
31 #define FTS3_MATCHINFO_LHITS_BM  'b'        /* nCol*nPhrase values */
32 
33 /*
34 ** The default value for the second argument to matchinfo().
35 */
36 #define FTS3_MATCHINFO_DEFAULT   "pcx"
37 
38 
39 /*
40 ** Used as an fts3ExprIterate() context when loading phrase doclists to
41 ** Fts3Expr.aDoclist[]/nDoclist.
42 */
43 typedef struct LoadDoclistCtx LoadDoclistCtx;
44 struct LoadDoclistCtx {
45   Fts3Cursor *pCsr;               /* FTS3 Cursor */
46   int nPhrase;                    /* Number of phrases seen so far */
47   int nToken;                     /* Number of tokens seen so far */
48 };
49 
50 /*
51 ** The following types are used as part of the implementation of the
52 ** fts3BestSnippet() routine.
53 */
54 typedef struct SnippetIter SnippetIter;
55 typedef struct SnippetPhrase SnippetPhrase;
56 typedef struct SnippetFragment SnippetFragment;
57 
58 struct SnippetIter {
59   Fts3Cursor *pCsr;               /* Cursor snippet is being generated from */
60   int iCol;                       /* Extract snippet from this column */
61   int nSnippet;                   /* Requested snippet length (in tokens) */
62   int nPhrase;                    /* Number of phrases in query */
63   SnippetPhrase *aPhrase;         /* Array of size nPhrase */
64   int iCurrent;                   /* First token of current snippet */
65 };
66 
67 struct SnippetPhrase {
68   int nToken;                     /* Number of tokens in phrase */
69   char *pList;                    /* Pointer to start of phrase position list */
70   int iHead;                      /* Next value in position list */
71   char *pHead;                    /* Position list data following iHead */
72   int iTail;                      /* Next value in trailing position list */
73   char *pTail;                    /* Position list data following iTail */
74 };
75 
76 struct SnippetFragment {
77   int iCol;                       /* Column snippet is extracted from */
78   int iPos;                       /* Index of first token in snippet */
79   u64 covered;                    /* Mask of query phrases covered */
80   u64 hlmask;                     /* Mask of snippet terms to highlight */
81 };
82 
83 /*
84 ** This type is used as an fts3ExprIterate() context object while
85 ** accumulating the data returned by the matchinfo() function.
86 */
87 typedef struct MatchInfo MatchInfo;
88 struct MatchInfo {
89   Fts3Cursor *pCursor;            /* FTS3 Cursor */
90   int nCol;                       /* Number of columns in table */
91   int nPhrase;                    /* Number of matchable phrases in query */
92   sqlite3_int64 nDoc;             /* Number of docs in database */
93   char flag;
94   u32 *aMatchinfo;                /* Pre-allocated buffer */
95 };
96 
97 /*
98 ** An instance of this structure is used to manage a pair of buffers, each
99 ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
100 ** for details.
101 */
102 struct MatchinfoBuffer {
103   u8 aRef[3];
104   int nElem;
105   int bGlobal;                    /* Set if global data is loaded */
106   char *zMatchinfo;
107   u32 aMatchinfo[1];
108 };
109 
110 
111 /*
112 ** The snippet() and offsets() functions both return text values. An instance
113 ** of the following structure is used to accumulate those values while the
114 ** functions are running. See fts3StringAppend() for details.
115 */
116 typedef struct StrBuffer StrBuffer;
117 struct StrBuffer {
118   char *z;                        /* Pointer to buffer containing string */
119   int n;                          /* Length of z in bytes (excl. nul-term) */
120   int nAlloc;                     /* Allocated size of buffer z in bytes */
121 };
122 
123 
124 /*************************************************************************
125 ** Start of MatchinfoBuffer code.
126 */
127 
128 /*
129 ** Allocate a two-slot MatchinfoBuffer object.
130 */
fts3MIBufferNew(size_t nElem,const char * zMatchinfo)131 static MatchinfoBuffer *fts3MIBufferNew(size_t nElem, const char *zMatchinfo){
132   MatchinfoBuffer *pRet;
133   sqlite3_int64 nByte = sizeof(u32) * (2*(sqlite3_int64)nElem + 1)
134                            + sizeof(MatchinfoBuffer);
135   sqlite3_int64 nStr = strlen(zMatchinfo);
136 
137   pRet = sqlite3_malloc64(nByte + nStr+1);
138   if( pRet ){
139     memset(pRet, 0, nByte);
140     pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet;
141     pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0]
142                                       + sizeof(u32)*((int)nElem+1);
143     pRet->nElem = (int)nElem;
144     pRet->zMatchinfo = ((char*)pRet) + nByte;
145     memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1);
146     pRet->aRef[0] = 1;
147   }
148 
149   return pRet;
150 }
151 
fts3MIBufferFree(void * p)152 static void fts3MIBufferFree(void *p){
153   MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]);
154 
155   assert( (u32*)p==&pBuf->aMatchinfo[1]
156        || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2]
157   );
158   if( (u32*)p==&pBuf->aMatchinfo[1] ){
159     pBuf->aRef[1] = 0;
160   }else{
161     pBuf->aRef[2] = 0;
162   }
163 
164   if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){
165     sqlite3_free(pBuf);
166   }
167 }
168 
fts3MIBufferAlloc(MatchinfoBuffer * p,u32 ** paOut)169 static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){
170   void (*xRet)(void*) = 0;
171   u32 *aOut = 0;
172 
173   if( p->aRef[1]==0 ){
174     p->aRef[1] = 1;
175     aOut = &p->aMatchinfo[1];
176     xRet = fts3MIBufferFree;
177   }
178   else if( p->aRef[2]==0 ){
179     p->aRef[2] = 1;
180     aOut = &p->aMatchinfo[p->nElem+2];
181     xRet = fts3MIBufferFree;
182   }else{
183     aOut = (u32*)sqlite3_malloc64(p->nElem * sizeof(u32));
184     if( aOut ){
185       xRet = sqlite3_free;
186       if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32));
187     }
188   }
189 
190   *paOut = aOut;
191   return xRet;
192 }
193 
fts3MIBufferSetGlobal(MatchinfoBuffer * p)194 static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){
195   p->bGlobal = 1;
196   memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32));
197 }
198 
199 /*
200 ** Free a MatchinfoBuffer object allocated using fts3MIBufferNew()
201 */
sqlite3Fts3MIBufferFree(MatchinfoBuffer * p)202 void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){
203   if( p ){
204     assert( p->aRef[0]==1 );
205     p->aRef[0] = 0;
206     if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){
207       sqlite3_free(p);
208     }
209   }
210 }
211 
212 /*
213 ** End of MatchinfoBuffer code.
214 *************************************************************************/
215 
216 
217 /*
218 ** This function is used to help iterate through a position-list. A position
219 ** list is a list of unique integers, sorted from smallest to largest. Each
220 ** element of the list is represented by an FTS3 varint that takes the value
221 ** of the difference between the current element and the previous one plus
222 ** two. For example, to store the position-list:
223 **
224 **     4 9 113
225 **
226 ** the three varints:
227 **
228 **     6 7 106
229 **
230 ** are encoded.
231 **
232 ** When this function is called, *pp points to the start of an element of
233 ** the list. *piPos contains the value of the previous entry in the list.
234 ** After it returns, *piPos contains the value of the next element of the
235 ** list and *pp is advanced to the following varint.
236 */
fts3GetDeltaPosition(char ** pp,int * piPos)237 static void fts3GetDeltaPosition(char **pp, int *piPos){
238   int iVal;
239   *pp += fts3GetVarint32(*pp, &iVal);
240   *piPos += (iVal-2);
241 }
242 
243 /*
244 ** Helper function for fts3ExprIterate() (see below).
245 */
fts3ExprIterate2(Fts3Expr * pExpr,int * piPhrase,int (* x)(Fts3Expr *,int,void *),void * pCtx)246 static int fts3ExprIterate2(
247   Fts3Expr *pExpr,                /* Expression to iterate phrases of */
248   int *piPhrase,                  /* Pointer to phrase counter */
249   int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
250   void *pCtx                      /* Second argument to pass to callback */
251 ){
252   int rc;                         /* Return code */
253   int eType = pExpr->eType;     /* Type of expression node pExpr */
254 
255   if( eType!=FTSQUERY_PHRASE ){
256     assert( pExpr->pLeft && pExpr->pRight );
257     rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
258     if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
259       rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
260     }
261   }else{
262     rc = x(pExpr, *piPhrase, pCtx);
263     (*piPhrase)++;
264   }
265   return rc;
266 }
267 
268 /*
269 ** Iterate through all phrase nodes in an FTS3 query, except those that
270 ** are part of a sub-tree that is the right-hand-side of a NOT operator.
271 ** For each phrase node found, the supplied callback function is invoked.
272 **
273 ** If the callback function returns anything other than SQLITE_OK,
274 ** the iteration is abandoned and the error code returned immediately.
275 ** Otherwise, SQLITE_OK is returned after a callback has been made for
276 ** all eligible phrase nodes.
277 */
fts3ExprIterate(Fts3Expr * pExpr,int (* x)(Fts3Expr *,int,void *),void * pCtx)278 static int fts3ExprIterate(
279   Fts3Expr *pExpr,                /* Expression to iterate phrases of */
280   int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
281   void *pCtx                      /* Second argument to pass to callback */
282 ){
283   int iPhrase = 0;                /* Variable used as the phrase counter */
284   return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
285 }
286 
287 
288 /*
289 ** This is an fts3ExprIterate() callback used while loading the doclists
290 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
291 ** fts3ExprLoadDoclists().
292 */
fts3ExprLoadDoclistsCb(Fts3Expr * pExpr,int iPhrase,void * ctx)293 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
294   int rc = SQLITE_OK;
295   Fts3Phrase *pPhrase = pExpr->pPhrase;
296   LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
297 
298   UNUSED_PARAMETER(iPhrase);
299 
300   p->nPhrase++;
301   p->nToken += pPhrase->nToken;
302 
303   return rc;
304 }
305 
306 /*
307 ** Load the doclists for each phrase in the query associated with FTS3 cursor
308 ** pCsr.
309 **
310 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
311 ** phrases in the expression (all phrases except those directly or
312 ** indirectly descended from the right-hand-side of a NOT operator). If
313 ** pnToken is not NULL, then it is set to the number of tokens in all
314 ** matchable phrases of the expression.
315 */
fts3ExprLoadDoclists(Fts3Cursor * pCsr,int * pnPhrase,int * pnToken)316 static int fts3ExprLoadDoclists(
317   Fts3Cursor *pCsr,               /* Fts3 cursor for current query */
318   int *pnPhrase,                  /* OUT: Number of phrases in query */
319   int *pnToken                    /* OUT: Number of tokens in query */
320 ){
321   int rc;                         /* Return Code */
322   LoadDoclistCtx sCtx = {0,0,0};  /* Context for fts3ExprIterate() */
323   sCtx.pCsr = pCsr;
324   rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
325   if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
326   if( pnToken ) *pnToken = sCtx.nToken;
327   return rc;
328 }
329 
fts3ExprPhraseCountCb(Fts3Expr * pExpr,int iPhrase,void * ctx)330 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
331   (*(int *)ctx)++;
332   pExpr->iPhrase = iPhrase;
333   return SQLITE_OK;
334 }
fts3ExprPhraseCount(Fts3Expr * pExpr)335 static int fts3ExprPhraseCount(Fts3Expr *pExpr){
336   int nPhrase = 0;
337   (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
338   return nPhrase;
339 }
340 
341 /*
342 ** Advance the position list iterator specified by the first two
343 ** arguments so that it points to the first element with a value greater
344 ** than or equal to parameter iNext.
345 */
fts3SnippetAdvance(char ** ppIter,int * piIter,int iNext)346 static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
347   char *pIter = *ppIter;
348   if( pIter ){
349     int iIter = *piIter;
350 
351     while( iIter<iNext ){
352       if( 0==(*pIter & 0xFE) ){
353         iIter = -1;
354         pIter = 0;
355         break;
356       }
357       fts3GetDeltaPosition(&pIter, &iIter);
358     }
359 
360     *piIter = iIter;
361     *ppIter = pIter;
362   }
363 }
364 
365 /*
366 ** Advance the snippet iterator to the next candidate snippet.
367 */
fts3SnippetNextCandidate(SnippetIter * pIter)368 static int fts3SnippetNextCandidate(SnippetIter *pIter){
369   int i;                          /* Loop counter */
370 
371   if( pIter->iCurrent<0 ){
372     /* The SnippetIter object has just been initialized. The first snippet
373     ** candidate always starts at offset 0 (even if this candidate has a
374     ** score of 0.0).
375     */
376     pIter->iCurrent = 0;
377 
378     /* Advance the 'head' iterator of each phrase to the first offset that
379     ** is greater than or equal to (iNext+nSnippet).
380     */
381     for(i=0; i<pIter->nPhrase; i++){
382       SnippetPhrase *pPhrase = &pIter->aPhrase[i];
383       fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
384     }
385   }else{
386     int iStart;
387     int iEnd = 0x7FFFFFFF;
388 
389     for(i=0; i<pIter->nPhrase; i++){
390       SnippetPhrase *pPhrase = &pIter->aPhrase[i];
391       if( pPhrase->pHead && pPhrase->iHead<iEnd ){
392         iEnd = pPhrase->iHead;
393       }
394     }
395     if( iEnd==0x7FFFFFFF ){
396       return 1;
397     }
398 
399     pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
400     for(i=0; i<pIter->nPhrase; i++){
401       SnippetPhrase *pPhrase = &pIter->aPhrase[i];
402       fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
403       fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
404     }
405   }
406 
407   return 0;
408 }
409 
410 /*
411 ** Retrieve information about the current candidate snippet of snippet
412 ** iterator pIter.
413 */
fts3SnippetDetails(SnippetIter * pIter,u64 mCovered,int * piToken,int * piScore,u64 * pmCover,u64 * pmHighlight)414 static void fts3SnippetDetails(
415   SnippetIter *pIter,             /* Snippet iterator */
416   u64 mCovered,                   /* Bitmask of phrases already covered */
417   int *piToken,                   /* OUT: First token of proposed snippet */
418   int *piScore,                   /* OUT: "Score" for this snippet */
419   u64 *pmCover,                   /* OUT: Bitmask of phrases covered */
420   u64 *pmHighlight                /* OUT: Bitmask of terms to highlight */
421 ){
422   int iStart = pIter->iCurrent;   /* First token of snippet */
423   int iScore = 0;                 /* Score of this snippet */
424   int i;                          /* Loop counter */
425   u64 mCover = 0;                 /* Mask of phrases covered by this snippet */
426   u64 mHighlight = 0;             /* Mask of tokens to highlight in snippet */
427 
428   for(i=0; i<pIter->nPhrase; i++){
429     SnippetPhrase *pPhrase = &pIter->aPhrase[i];
430     if( pPhrase->pTail ){
431       char *pCsr = pPhrase->pTail;
432       int iCsr = pPhrase->iTail;
433 
434       while( iCsr<(iStart+pIter->nSnippet) && iCsr>=iStart ){
435         int j;
436         u64 mPhrase = (u64)1 << (i%64);
437         u64 mPos = (u64)1 << (iCsr - iStart);
438         assert( iCsr>=iStart && (iCsr - iStart)<=64 );
439         assert( i>=0 );
440         if( (mCover|mCovered)&mPhrase ){
441           iScore++;
442         }else{
443           iScore += 1000;
444         }
445         mCover |= mPhrase;
446 
447         for(j=0; j<pPhrase->nToken; j++){
448           mHighlight |= (mPos>>j);
449         }
450 
451         if( 0==(*pCsr & 0x0FE) ) break;
452         fts3GetDeltaPosition(&pCsr, &iCsr);
453       }
454     }
455   }
456 
457   /* Set the output variables before returning. */
458   *piToken = iStart;
459   *piScore = iScore;
460   *pmCover = mCover;
461   *pmHighlight = mHighlight;
462 }
463 
464 /*
465 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
466 ** Each invocation populates an element of the SnippetIter.aPhrase[] array.
467 */
fts3SnippetFindPositions(Fts3Expr * pExpr,int iPhrase,void * ctx)468 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
469   SnippetIter *p = (SnippetIter *)ctx;
470   SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
471   char *pCsr;
472   int rc;
473 
474   pPhrase->nToken = pExpr->pPhrase->nToken;
475   rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
476   assert( rc==SQLITE_OK || pCsr==0 );
477   if( pCsr ){
478     int iFirst = 0;
479     pPhrase->pList = pCsr;
480     fts3GetDeltaPosition(&pCsr, &iFirst);
481     if( iFirst<0 ){
482       rc = FTS_CORRUPT_VTAB;
483     }else{
484       pPhrase->pHead = pCsr;
485       pPhrase->pTail = pCsr;
486       pPhrase->iHead = iFirst;
487       pPhrase->iTail = iFirst;
488     }
489   }else{
490     assert( rc!=SQLITE_OK || (
491        pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0
492     ));
493   }
494 
495   return rc;
496 }
497 
498 /*
499 ** Select the fragment of text consisting of nFragment contiguous tokens
500 ** from column iCol that represent the "best" snippet. The best snippet
501 ** is the snippet with the highest score, where scores are calculated
502 ** by adding:
503 **
504 **   (a) +1 point for each occurrence of a matchable phrase in the snippet.
505 **
506 **   (b) +1000 points for the first occurrence of each matchable phrase in
507 **       the snippet for which the corresponding mCovered bit is not set.
508 **
509 ** The selected snippet parameters are stored in structure *pFragment before
510 ** returning. The score of the selected snippet is stored in *piScore
511 ** before returning.
512 */
fts3BestSnippet(int nSnippet,Fts3Cursor * pCsr,int iCol,u64 mCovered,u64 * pmSeen,SnippetFragment * pFragment,int * piScore)513 static int fts3BestSnippet(
514   int nSnippet,                   /* Desired snippet length */
515   Fts3Cursor *pCsr,               /* Cursor to create snippet for */
516   int iCol,                       /* Index of column to create snippet from */
517   u64 mCovered,                   /* Mask of phrases already covered */
518   u64 *pmSeen,                    /* IN/OUT: Mask of phrases seen */
519   SnippetFragment *pFragment,     /* OUT: Best snippet found */
520   int *piScore                    /* OUT: Score of snippet pFragment */
521 ){
522   int rc;                         /* Return Code */
523   int nList;                      /* Number of phrases in expression */
524   SnippetIter sIter;              /* Iterates through snippet candidates */
525   sqlite3_int64 nByte;            /* Number of bytes of space to allocate */
526   int iBestScore = -1;            /* Best snippet score found so far */
527   int i;                          /* Loop counter */
528 
529   memset(&sIter, 0, sizeof(sIter));
530 
531   /* Iterate through the phrases in the expression to count them. The same
532   ** callback makes sure the doclists are loaded for each phrase.
533   */
534   rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
535   if( rc!=SQLITE_OK ){
536     return rc;
537   }
538 
539   /* Now that it is known how many phrases there are, allocate and zero
540   ** the required space using malloc().
541   */
542   nByte = sizeof(SnippetPhrase) * nList;
543   sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc64(nByte);
544   if( !sIter.aPhrase ){
545     return SQLITE_NOMEM;
546   }
547   memset(sIter.aPhrase, 0, nByte);
548 
549   /* Initialize the contents of the SnippetIter object. Then iterate through
550   ** the set of phrases in the expression to populate the aPhrase[] array.
551   */
552   sIter.pCsr = pCsr;
553   sIter.iCol = iCol;
554   sIter.nSnippet = nSnippet;
555   sIter.nPhrase = nList;
556   sIter.iCurrent = -1;
557   rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter);
558   if( rc==SQLITE_OK ){
559 
560     /* Set the *pmSeen output variable. */
561     for(i=0; i<nList; i++){
562       if( sIter.aPhrase[i].pHead ){
563         *pmSeen |= (u64)1 << (i%64);
564       }
565     }
566 
567     /* Loop through all candidate snippets. Store the best snippet in
568      ** *pFragment. Store its associated 'score' in iBestScore.
569      */
570     pFragment->iCol = iCol;
571     while( !fts3SnippetNextCandidate(&sIter) ){
572       int iPos;
573       int iScore;
574       u64 mCover;
575       u64 mHighlite;
576       fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite);
577       assert( iScore>=0 );
578       if( iScore>iBestScore ){
579         pFragment->iPos = iPos;
580         pFragment->hlmask = mHighlite;
581         pFragment->covered = mCover;
582         iBestScore = iScore;
583       }
584     }
585 
586     *piScore = iBestScore;
587   }
588   sqlite3_free(sIter.aPhrase);
589   return rc;
590 }
591 
592 
593 /*
594 ** Append a string to the string-buffer passed as the first argument.
595 **
596 ** If nAppend is negative, then the length of the string zAppend is
597 ** determined using strlen().
598 */
fts3StringAppend(StrBuffer * pStr,const char * zAppend,int nAppend)599 static int fts3StringAppend(
600   StrBuffer *pStr,                /* Buffer to append to */
601   const char *zAppend,            /* Pointer to data to append to buffer */
602   int nAppend                     /* Size of zAppend in bytes (or -1) */
603 ){
604   if( nAppend<0 ){
605     nAppend = (int)strlen(zAppend);
606   }
607 
608   /* If there is insufficient space allocated at StrBuffer.z, use realloc()
609   ** to grow the buffer until so that it is big enough to accomadate the
610   ** appended data.
611   */
612   if( pStr->n+nAppend+1>=pStr->nAlloc ){
613     sqlite3_int64 nAlloc = pStr->nAlloc+(sqlite3_int64)nAppend+100;
614     char *zNew = sqlite3_realloc64(pStr->z, nAlloc);
615     if( !zNew ){
616       return SQLITE_NOMEM;
617     }
618     pStr->z = zNew;
619     pStr->nAlloc = nAlloc;
620   }
621   assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
622 
623   /* Append the data to the string buffer. */
624   memcpy(&pStr->z[pStr->n], zAppend, nAppend);
625   pStr->n += nAppend;
626   pStr->z[pStr->n] = '\0';
627 
628   return SQLITE_OK;
629 }
630 
631 /*
632 ** The fts3BestSnippet() function often selects snippets that end with a
633 ** query term. That is, the final term of the snippet is always a term
634 ** that requires highlighting. For example, if 'X' is a highlighted term
635 ** and '.' is a non-highlighted term, BestSnippet() may select:
636 **
637 **     ........X.....X
638 **
639 ** This function "shifts" the beginning of the snippet forward in the
640 ** document so that there are approximately the same number of
641 ** non-highlighted terms to the right of the final highlighted term as there
642 ** are to the left of the first highlighted term. For example, to this:
643 **
644 **     ....X.....X....
645 **
646 ** This is done as part of extracting the snippet text, not when selecting
647 ** the snippet. Snippet selection is done based on doclists only, so there
648 ** is no way for fts3BestSnippet() to know whether or not the document
649 ** actually contains terms that follow the final highlighted term.
650 */
fts3SnippetShift(Fts3Table * pTab,int iLangid,int nSnippet,const char * zDoc,int nDoc,int * piPos,u64 * pHlmask)651 static int fts3SnippetShift(
652   Fts3Table *pTab,                /* FTS3 table snippet comes from */
653   int iLangid,                    /* Language id to use in tokenizing */
654   int nSnippet,                   /* Number of tokens desired for snippet */
655   const char *zDoc,               /* Document text to extract snippet from */
656   int nDoc,                       /* Size of buffer zDoc in bytes */
657   int *piPos,                     /* IN/OUT: First token of snippet */
658   u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
659 ){
660   u64 hlmask = *pHlmask;          /* Local copy of initial highlight-mask */
661 
662   if( hlmask ){
663     int nLeft;                    /* Tokens to the left of first highlight */
664     int nRight;                   /* Tokens to the right of last highlight */
665     int nDesired;                 /* Ideal number of tokens to shift forward */
666 
667     for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
668     for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
669     assert( (nSnippet-1-nRight)<=63 && (nSnippet-1-nRight)>=0 );
670     nDesired = (nLeft-nRight)/2;
671 
672     /* Ideally, the start of the snippet should be pushed forward in the
673     ** document nDesired tokens. This block checks if there are actually
674     ** nDesired tokens to the right of the snippet. If so, *piPos and
675     ** *pHlMask are updated to shift the snippet nDesired tokens to the
676     ** right. Otherwise, the snippet is shifted by the number of tokens
677     ** available.
678     */
679     if( nDesired>0 ){
680       int nShift;                 /* Number of tokens to shift snippet by */
681       int iCurrent = 0;           /* Token counter */
682       int rc;                     /* Return Code */
683       sqlite3_tokenizer_module *pMod;
684       sqlite3_tokenizer_cursor *pC;
685       pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
686 
687       /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
688       ** or more tokens in zDoc/nDoc.
689       */
690       rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
691       if( rc!=SQLITE_OK ){
692         return rc;
693       }
694       while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
695         const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
696         rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
697       }
698       pMod->xClose(pC);
699       if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
700 
701       nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
702       assert( nShift<=nDesired );
703       if( nShift>0 ){
704         *piPos += nShift;
705         *pHlmask = hlmask >> nShift;
706       }
707     }
708   }
709   return SQLITE_OK;
710 }
711 
712 /*
713 ** Extract the snippet text for fragment pFragment from cursor pCsr and
714 ** append it to string buffer pOut.
715 */
fts3SnippetText(Fts3Cursor * pCsr,SnippetFragment * pFragment,int iFragment,int isLast,int nSnippet,const char * zOpen,const char * zClose,const char * zEllipsis,StrBuffer * pOut)716 static int fts3SnippetText(
717   Fts3Cursor *pCsr,               /* FTS3 Cursor */
718   SnippetFragment *pFragment,     /* Snippet to extract */
719   int iFragment,                  /* Fragment number */
720   int isLast,                     /* True for final fragment in snippet */
721   int nSnippet,                   /* Number of tokens in extracted snippet */
722   const char *zOpen,              /* String inserted before highlighted term */
723   const char *zClose,             /* String inserted after highlighted term */
724   const char *zEllipsis,          /* String inserted between snippets */
725   StrBuffer *pOut                 /* Write output here */
726 ){
727   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
728   int rc;                         /* Return code */
729   const char *zDoc;               /* Document text to extract snippet from */
730   int nDoc;                       /* Size of zDoc in bytes */
731   int iCurrent = 0;               /* Current token number of document */
732   int iEnd = 0;                   /* Byte offset of end of current token */
733   int isShiftDone = 0;            /* True after snippet is shifted */
734   int iPos = pFragment->iPos;     /* First token of snippet */
735   u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
736   int iCol = pFragment->iCol+1;   /* Query column to extract text from */
737   sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
738   sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */
739 
740   zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
741   if( zDoc==0 ){
742     if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
743       return SQLITE_NOMEM;
744     }
745     return SQLITE_OK;
746   }
747   nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
748 
749   /* Open a token cursor on the document. */
750   pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
751   rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
752   if( rc!=SQLITE_OK ){
753     return rc;
754   }
755 
756   while( rc==SQLITE_OK ){
757     const char *ZDUMMY;           /* Dummy argument used with tokenizer */
758     int DUMMY1 = -1;              /* Dummy argument used with tokenizer */
759     int iBegin = 0;               /* Offset in zDoc of start of token */
760     int iFin = 0;                 /* Offset in zDoc of end of token */
761     int isHighlight = 0;          /* True for highlighted terms */
762 
763     /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
764     ** in the FTS code the variable that the third argument to xNext points to
765     ** is initialized to zero before the first (*but not necessarily
766     ** subsequent*) call to xNext(). This is done for a particular application
767     ** that needs to know whether or not the tokenizer is being used for
768     ** snippet generation or for some other purpose.
769     **
770     ** Extreme care is required when writing code to depend on this
771     ** initialization. It is not a documented part of the tokenizer interface.
772     ** If a tokenizer is used directly by any code outside of FTS, this
773     ** convention might not be respected.  */
774     rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
775     if( rc!=SQLITE_OK ){
776       if( rc==SQLITE_DONE ){
777         /* Special case - the last token of the snippet is also the last token
778         ** of the column. Append any punctuation that occurred between the end
779         ** of the previous token and the end of the document to the output.
780         ** Then break out of the loop. */
781         rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
782       }
783       break;
784     }
785     if( iCurrent<iPos ){ continue; }
786 
787     if( !isShiftDone ){
788       int n = nDoc - iBegin;
789       rc = fts3SnippetShift(
790           pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
791       );
792       isShiftDone = 1;
793 
794       /* Now that the shift has been done, check if the initial "..." are
795       ** required. They are required if (a) this is not the first fragment,
796       ** or (b) this fragment does not begin at position 0 of its column.
797       */
798       if( rc==SQLITE_OK ){
799         if( iPos>0 || iFragment>0 ){
800           rc = fts3StringAppend(pOut, zEllipsis, -1);
801         }else if( iBegin ){
802           rc = fts3StringAppend(pOut, zDoc, iBegin);
803         }
804       }
805       if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
806     }
807 
808     if( iCurrent>=(iPos+nSnippet) ){
809       if( isLast ){
810         rc = fts3StringAppend(pOut, zEllipsis, -1);
811       }
812       break;
813     }
814 
815     /* Set isHighlight to true if this term should be highlighted. */
816     isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
817 
818     if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
819     if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
820     if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
821     if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
822 
823     iEnd = iFin;
824   }
825 
826   pMod->xClose(pC);
827   return rc;
828 }
829 
830 
831 /*
832 ** This function is used to count the entries in a column-list (a
833 ** delta-encoded list of term offsets within a single column of a single
834 ** row). When this function is called, *ppCollist should point to the
835 ** beginning of the first varint in the column-list (the varint that
836 ** contains the position of the first matching term in the column data).
837 ** Before returning, *ppCollist is set to point to the first byte after
838 ** the last varint in the column-list (either the 0x00 signifying the end
839 ** of the position-list, or the 0x01 that precedes the column number of
840 ** the next column in the position-list).
841 **
842 ** The number of elements in the column-list is returned.
843 */
fts3ColumnlistCount(char ** ppCollist)844 static int fts3ColumnlistCount(char **ppCollist){
845   char *pEnd = *ppCollist;
846   char c = 0;
847   int nEntry = 0;
848 
849   /* A column-list is terminated by either a 0x01 or 0x00. */
850   while( 0xFE & (*pEnd | c) ){
851     c = *pEnd++ & 0x80;
852     if( !c ) nEntry++;
853   }
854 
855   *ppCollist = pEnd;
856   return nEntry;
857 }
858 
859 /*
860 ** This function gathers 'y' or 'b' data for a single phrase.
861 */
fts3ExprLHits(Fts3Expr * pExpr,MatchInfo * p)862 static int fts3ExprLHits(
863   Fts3Expr *pExpr,                /* Phrase expression node */
864   MatchInfo *p                    /* Matchinfo context */
865 ){
866   Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
867   int iStart;
868   Fts3Phrase *pPhrase = pExpr->pPhrase;
869   char *pIter = pPhrase->doclist.pList;
870   int iCol = 0;
871 
872   assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS );
873   if( p->flag==FTS3_MATCHINFO_LHITS ){
874     iStart = pExpr->iPhrase * p->nCol;
875   }else{
876     iStart = pExpr->iPhrase * ((p->nCol + 31) / 32);
877   }
878 
879   while( 1 ){
880     int nHit = fts3ColumnlistCount(&pIter);
881     if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
882       if( p->flag==FTS3_MATCHINFO_LHITS ){
883         p->aMatchinfo[iStart + iCol] = (u32)nHit;
884       }else if( nHit ){
885         p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F));
886       }
887     }
888     assert( *pIter==0x00 || *pIter==0x01 );
889     if( *pIter!=0x01 ) break;
890     pIter++;
891     pIter += fts3GetVarint32(pIter, &iCol);
892     if( iCol>=p->nCol ) return FTS_CORRUPT_VTAB;
893   }
894   return SQLITE_OK;
895 }
896 
897 /*
898 ** Gather the results for matchinfo directives 'y' and 'b'.
899 */
fts3ExprLHitGather(Fts3Expr * pExpr,MatchInfo * p)900 static int fts3ExprLHitGather(
901   Fts3Expr *pExpr,
902   MatchInfo *p
903 ){
904   int rc = SQLITE_OK;
905   assert( (pExpr->pLeft==0)==(pExpr->pRight==0) );
906   if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){
907     if( pExpr->pLeft ){
908       rc = fts3ExprLHitGather(pExpr->pLeft, p);
909       if( rc==SQLITE_OK ) rc = fts3ExprLHitGather(pExpr->pRight, p);
910     }else{
911       rc = fts3ExprLHits(pExpr, p);
912     }
913   }
914   return rc;
915 }
916 
917 /*
918 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats
919 ** for a single query.
920 **
921 ** fts3ExprIterate() callback to load the 'global' elements of a
922 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
923 ** of the matchinfo array that are constant for all rows returned by the
924 ** current query.
925 **
926 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
927 ** function populates Matchinfo.aMatchinfo[] as follows:
928 **
929 **   for(iCol=0; iCol<nCol; iCol++){
930 **     aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
931 **     aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
932 **   }
933 **
934 ** where X is the number of matches for phrase iPhrase is column iCol of all
935 ** rows of the table. Y is the number of rows for which column iCol contains
936 ** at least one instance of phrase iPhrase.
937 **
938 ** If the phrase pExpr consists entirely of deferred tokens, then all X and
939 ** Y values are set to nDoc, where nDoc is the number of documents in the
940 ** file system. This is done because the full-text index doclist is required
941 ** to calculate these values properly, and the full-text index doclist is
942 ** not available for deferred tokens.
943 */
fts3ExprGlobalHitsCb(Fts3Expr * pExpr,int iPhrase,void * pCtx)944 static int fts3ExprGlobalHitsCb(
945   Fts3Expr *pExpr,                /* Phrase expression node */
946   int iPhrase,                    /* Phrase number (numbered from zero) */
947   void *pCtx                      /* Pointer to MatchInfo structure */
948 ){
949   MatchInfo *p = (MatchInfo *)pCtx;
950   return sqlite3Fts3EvalPhraseStats(
951       p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
952   );
953 }
954 
955 /*
956 ** fts3ExprIterate() callback used to collect the "local" part of the
957 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
958 ** array that are different for each row returned by the query.
959 */
fts3ExprLocalHitsCb(Fts3Expr * pExpr,int iPhrase,void * pCtx)960 static int fts3ExprLocalHitsCb(
961   Fts3Expr *pExpr,                /* Phrase expression node */
962   int iPhrase,                    /* Phrase number */
963   void *pCtx                      /* Pointer to MatchInfo structure */
964 ){
965   int rc = SQLITE_OK;
966   MatchInfo *p = (MatchInfo *)pCtx;
967   int iStart = iPhrase * p->nCol * 3;
968   int i;
969 
970   for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
971     char *pCsr;
972     rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
973     if( pCsr ){
974       p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
975     }else{
976       p->aMatchinfo[iStart+i*3] = 0;
977     }
978   }
979 
980   return rc;
981 }
982 
fts3MatchinfoCheck(Fts3Table * pTab,char cArg,char ** pzErr)983 static int fts3MatchinfoCheck(
984   Fts3Table *pTab,
985   char cArg,
986   char **pzErr
987 ){
988   if( (cArg==FTS3_MATCHINFO_NPHRASE)
989    || (cArg==FTS3_MATCHINFO_NCOL)
990    || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
991    || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
992    || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
993    || (cArg==FTS3_MATCHINFO_LCS)
994    || (cArg==FTS3_MATCHINFO_HITS)
995    || (cArg==FTS3_MATCHINFO_LHITS)
996    || (cArg==FTS3_MATCHINFO_LHITS_BM)
997   ){
998     return SQLITE_OK;
999   }
1000   sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg);
1001   return SQLITE_ERROR;
1002 }
1003 
fts3MatchinfoSize(MatchInfo * pInfo,char cArg)1004 static size_t fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
1005   size_t nVal;                      /* Number of integers output by cArg */
1006 
1007   switch( cArg ){
1008     case FTS3_MATCHINFO_NDOC:
1009     case FTS3_MATCHINFO_NPHRASE:
1010     case FTS3_MATCHINFO_NCOL:
1011       nVal = 1;
1012       break;
1013 
1014     case FTS3_MATCHINFO_AVGLENGTH:
1015     case FTS3_MATCHINFO_LENGTH:
1016     case FTS3_MATCHINFO_LCS:
1017       nVal = pInfo->nCol;
1018       break;
1019 
1020     case FTS3_MATCHINFO_LHITS:
1021       nVal = pInfo->nCol * pInfo->nPhrase;
1022       break;
1023 
1024     case FTS3_MATCHINFO_LHITS_BM:
1025       nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32);
1026       break;
1027 
1028     default:
1029       assert( cArg==FTS3_MATCHINFO_HITS );
1030       nVal = pInfo->nCol * pInfo->nPhrase * 3;
1031       break;
1032   }
1033 
1034   return nVal;
1035 }
1036 
fts3MatchinfoSelectDoctotal(Fts3Table * pTab,sqlite3_stmt ** ppStmt,sqlite3_int64 * pnDoc,const char ** paLen,const char ** ppEnd)1037 static int fts3MatchinfoSelectDoctotal(
1038   Fts3Table *pTab,
1039   sqlite3_stmt **ppStmt,
1040   sqlite3_int64 *pnDoc,
1041   const char **paLen,
1042   const char **ppEnd
1043 ){
1044   sqlite3_stmt *pStmt;
1045   const char *a;
1046   const char *pEnd;
1047   sqlite3_int64 nDoc;
1048   int n;
1049 
1050 
1051   if( !*ppStmt ){
1052     int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
1053     if( rc!=SQLITE_OK ) return rc;
1054   }
1055   pStmt = *ppStmt;
1056   assert( sqlite3_data_count(pStmt)==1 );
1057 
1058   n = sqlite3_column_bytes(pStmt, 0);
1059   a = sqlite3_column_blob(pStmt, 0);
1060   if( a==0 ){
1061     return FTS_CORRUPT_VTAB;
1062   }
1063   pEnd = a + n;
1064   a += sqlite3Fts3GetVarintBounded(a, pEnd, &nDoc);
1065   if( nDoc<=0 || a>pEnd ){
1066     return FTS_CORRUPT_VTAB;
1067   }
1068   *pnDoc = nDoc;
1069 
1070   if( paLen ) *paLen = a;
1071   if( ppEnd ) *ppEnd = pEnd;
1072   return SQLITE_OK;
1073 }
1074 
1075 /*
1076 ** An instance of the following structure is used to store state while
1077 ** iterating through a multi-column position-list corresponding to the
1078 ** hits for a single phrase on a single row in order to calculate the
1079 ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
1080 */
1081 typedef struct LcsIterator LcsIterator;
1082 struct LcsIterator {
1083   Fts3Expr *pExpr;                /* Pointer to phrase expression */
1084   int iPosOffset;                 /* Tokens count up to end of this phrase */
1085   char *pRead;                    /* Cursor used to iterate through aDoclist */
1086   int iPos;                       /* Current position */
1087 };
1088 
1089 /*
1090 ** If LcsIterator.iCol is set to the following value, the iterator has
1091 ** finished iterating through all offsets for all columns.
1092 */
1093 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
1094 
fts3MatchinfoLcsCb(Fts3Expr * pExpr,int iPhrase,void * pCtx)1095 static int fts3MatchinfoLcsCb(
1096   Fts3Expr *pExpr,                /* Phrase expression node */
1097   int iPhrase,                    /* Phrase number (numbered from zero) */
1098   void *pCtx                      /* Pointer to MatchInfo structure */
1099 ){
1100   LcsIterator *aIter = (LcsIterator *)pCtx;
1101   aIter[iPhrase].pExpr = pExpr;
1102   return SQLITE_OK;
1103 }
1104 
1105 /*
1106 ** Advance the iterator passed as an argument to the next position. Return
1107 ** 1 if the iterator is at EOF or if it now points to the start of the
1108 ** position list for the next column.
1109 */
fts3LcsIteratorAdvance(LcsIterator * pIter)1110 static int fts3LcsIteratorAdvance(LcsIterator *pIter){
1111   char *pRead = pIter->pRead;
1112   sqlite3_int64 iRead;
1113   int rc = 0;
1114 
1115   pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1116   if( iRead==0 || iRead==1 ){
1117     pRead = 0;
1118     rc = 1;
1119   }else{
1120     pIter->iPos += (int)(iRead-2);
1121   }
1122 
1123   pIter->pRead = pRead;
1124   return rc;
1125 }
1126 
1127 /*
1128 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
1129 **
1130 ** If the call is successful, the longest-common-substring lengths for each
1131 ** column are written into the first nCol elements of the pInfo->aMatchinfo[]
1132 ** array before returning. SQLITE_OK is returned in this case.
1133 **
1134 ** Otherwise, if an error occurs, an SQLite error code is returned and the
1135 ** data written to the first nCol elements of pInfo->aMatchinfo[] is
1136 ** undefined.
1137 */
fts3MatchinfoLcs(Fts3Cursor * pCsr,MatchInfo * pInfo)1138 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
1139   LcsIterator *aIter;
1140   int i;
1141   int iCol;
1142   int nToken = 0;
1143   int rc = SQLITE_OK;
1144 
1145   /* Allocate and populate the array of LcsIterator objects. The array
1146   ** contains one element for each matchable phrase in the query.
1147   **/
1148   aIter = sqlite3_malloc64(sizeof(LcsIterator) * pCsr->nPhrase);
1149   if( !aIter ) return SQLITE_NOMEM;
1150   memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
1151   (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
1152 
1153   for(i=0; i<pInfo->nPhrase; i++){
1154     LcsIterator *pIter = &aIter[i];
1155     nToken -= pIter->pExpr->pPhrase->nToken;
1156     pIter->iPosOffset = nToken;
1157   }
1158 
1159   for(iCol=0; iCol<pInfo->nCol; iCol++){
1160     int nLcs = 0;                 /* LCS value for this column */
1161     int nLive = 0;                /* Number of iterators in aIter not at EOF */
1162 
1163     for(i=0; i<pInfo->nPhrase; i++){
1164       LcsIterator *pIt = &aIter[i];
1165       rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
1166       if( rc!=SQLITE_OK ) goto matchinfo_lcs_out;
1167       if( pIt->pRead ){
1168         pIt->iPos = pIt->iPosOffset;
1169         fts3LcsIteratorAdvance(pIt);
1170         if( pIt->pRead==0 ){
1171           rc = FTS_CORRUPT_VTAB;
1172           goto matchinfo_lcs_out;
1173         }
1174         nLive++;
1175       }
1176     }
1177 
1178     while( nLive>0 ){
1179       LcsIterator *pAdv = 0;      /* The iterator to advance by one position */
1180       int nThisLcs = 0;           /* LCS for the current iterator positions */
1181 
1182       for(i=0; i<pInfo->nPhrase; i++){
1183         LcsIterator *pIter = &aIter[i];
1184         if( pIter->pRead==0 ){
1185           /* This iterator is already at EOF for this column. */
1186           nThisLcs = 0;
1187         }else{
1188           if( pAdv==0 || pIter->iPos<pAdv->iPos ){
1189             pAdv = pIter;
1190           }
1191           if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
1192             nThisLcs++;
1193           }else{
1194             nThisLcs = 1;
1195           }
1196           if( nThisLcs>nLcs ) nLcs = nThisLcs;
1197         }
1198       }
1199       if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
1200     }
1201 
1202     pInfo->aMatchinfo[iCol] = nLcs;
1203   }
1204 
1205  matchinfo_lcs_out:
1206   sqlite3_free(aIter);
1207   return rc;
1208 }
1209 
1210 /*
1211 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
1212 ** be returned by the matchinfo() function. Argument zArg contains the
1213 ** format string passed as the second argument to matchinfo (or the
1214 ** default value "pcx" if no second argument was specified). The format
1215 ** string has already been validated and the pInfo->aMatchinfo[] array
1216 ** is guaranteed to be large enough for the output.
1217 **
1218 ** If bGlobal is true, then populate all fields of the matchinfo() output.
1219 ** If it is false, then assume that those fields that do not change between
1220 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
1221 ** have already been populated.
1222 **
1223 ** Return SQLITE_OK if successful, or an SQLite error code if an error
1224 ** occurs. If a value other than SQLITE_OK is returned, the state the
1225 ** pInfo->aMatchinfo[] buffer is left in is undefined.
1226 */
fts3MatchinfoValues(Fts3Cursor * pCsr,int bGlobal,MatchInfo * pInfo,const char * zArg)1227 static int fts3MatchinfoValues(
1228   Fts3Cursor *pCsr,               /* FTS3 cursor object */
1229   int bGlobal,                    /* True to grab the global stats */
1230   MatchInfo *pInfo,               /* Matchinfo context object */
1231   const char *zArg                /* Matchinfo format string */
1232 ){
1233   int rc = SQLITE_OK;
1234   int i;
1235   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1236   sqlite3_stmt *pSelect = 0;
1237 
1238   for(i=0; rc==SQLITE_OK && zArg[i]; i++){
1239     pInfo->flag = zArg[i];
1240     switch( zArg[i] ){
1241       case FTS3_MATCHINFO_NPHRASE:
1242         if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
1243         break;
1244 
1245       case FTS3_MATCHINFO_NCOL:
1246         if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
1247         break;
1248 
1249       case FTS3_MATCHINFO_NDOC:
1250         if( bGlobal ){
1251           sqlite3_int64 nDoc = 0;
1252           rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0, 0);
1253           pInfo->aMatchinfo[0] = (u32)nDoc;
1254         }
1255         break;
1256 
1257       case FTS3_MATCHINFO_AVGLENGTH:
1258         if( bGlobal ){
1259           sqlite3_int64 nDoc;     /* Number of rows in table */
1260           const char *a;          /* Aggregate column length array */
1261           const char *pEnd;       /* First byte past end of length array */
1262 
1263           rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a, &pEnd);
1264           if( rc==SQLITE_OK ){
1265             int iCol;
1266             for(iCol=0; iCol<pInfo->nCol; iCol++){
1267               u32 iVal;
1268               sqlite3_int64 nToken;
1269               a += sqlite3Fts3GetVarint(a, &nToken);
1270               if( a>pEnd ){
1271                 rc = SQLITE_CORRUPT_VTAB;
1272                 break;
1273               }
1274               iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
1275               pInfo->aMatchinfo[iCol] = iVal;
1276             }
1277           }
1278         }
1279         break;
1280 
1281       case FTS3_MATCHINFO_LENGTH: {
1282         sqlite3_stmt *pSelectDocsize = 0;
1283         rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
1284         if( rc==SQLITE_OK ){
1285           int iCol;
1286           const char *a = sqlite3_column_blob(pSelectDocsize, 0);
1287           const char *pEnd = a + sqlite3_column_bytes(pSelectDocsize, 0);
1288           for(iCol=0; iCol<pInfo->nCol; iCol++){
1289             sqlite3_int64 nToken;
1290             a += sqlite3Fts3GetVarintBounded(a, pEnd, &nToken);
1291             if( a>pEnd ){
1292               rc = SQLITE_CORRUPT_VTAB;
1293               break;
1294             }
1295             pInfo->aMatchinfo[iCol] = (u32)nToken;
1296           }
1297         }
1298         sqlite3_reset(pSelectDocsize);
1299         break;
1300       }
1301 
1302       case FTS3_MATCHINFO_LCS:
1303         rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1304         if( rc==SQLITE_OK ){
1305           rc = fts3MatchinfoLcs(pCsr, pInfo);
1306         }
1307         break;
1308 
1309       case FTS3_MATCHINFO_LHITS_BM:
1310       case FTS3_MATCHINFO_LHITS: {
1311         size_t nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32);
1312         memset(pInfo->aMatchinfo, 0, nZero);
1313         rc = fts3ExprLHitGather(pCsr->pExpr, pInfo);
1314         break;
1315       }
1316 
1317       default: {
1318         Fts3Expr *pExpr;
1319         assert( zArg[i]==FTS3_MATCHINFO_HITS );
1320         pExpr = pCsr->pExpr;
1321         rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1322         if( rc!=SQLITE_OK ) break;
1323         if( bGlobal ){
1324           if( pCsr->pDeferred ){
1325             rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc,0,0);
1326             if( rc!=SQLITE_OK ) break;
1327           }
1328           rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
1329           sqlite3Fts3EvalTestDeferred(pCsr, &rc);
1330           if( rc!=SQLITE_OK ) break;
1331         }
1332         (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
1333         break;
1334       }
1335     }
1336 
1337     pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
1338   }
1339 
1340   sqlite3_reset(pSelect);
1341   return rc;
1342 }
1343 
1344 
1345 /*
1346 ** Populate pCsr->aMatchinfo[] with data for the current row. The
1347 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
1348 */
fts3GetMatchinfo(sqlite3_context * pCtx,Fts3Cursor * pCsr,const char * zArg)1349 static void fts3GetMatchinfo(
1350   sqlite3_context *pCtx,        /* Return results here */
1351   Fts3Cursor *pCsr,               /* FTS3 Cursor object */
1352   const char *zArg                /* Second argument to matchinfo() function */
1353 ){
1354   MatchInfo sInfo;
1355   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1356   int rc = SQLITE_OK;
1357   int bGlobal = 0;                /* Collect 'global' stats as well as local */
1358 
1359   u32 *aOut = 0;
1360   void (*xDestroyOut)(void*) = 0;
1361 
1362   memset(&sInfo, 0, sizeof(MatchInfo));
1363   sInfo.pCursor = pCsr;
1364   sInfo.nCol = pTab->nColumn;
1365 
1366   /* If there is cached matchinfo() data, but the format string for the
1367   ** cache does not match the format string for this request, discard
1368   ** the cached data. */
1369   if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){
1370     sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
1371     pCsr->pMIBuffer = 0;
1372   }
1373 
1374   /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
1375   ** matchinfo function has been called for this query. In this case
1376   ** allocate the array used to accumulate the matchinfo data and
1377   ** initialize those elements that are constant for every row.
1378   */
1379   if( pCsr->pMIBuffer==0 ){
1380     size_t nMatchinfo = 0;        /* Number of u32 elements in match-info */
1381     int i;                        /* Used to iterate through zArg */
1382 
1383     /* Determine the number of phrases in the query */
1384     pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
1385     sInfo.nPhrase = pCsr->nPhrase;
1386 
1387     /* Determine the number of integers in the buffer returned by this call. */
1388     for(i=0; zArg[i]; i++){
1389       char *zErr = 0;
1390       if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
1391         sqlite3_result_error(pCtx, zErr, -1);
1392         sqlite3_free(zErr);
1393         return;
1394       }
1395       nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
1396     }
1397 
1398     /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
1399     pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg);
1400     if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM;
1401 
1402     pCsr->isMatchinfoNeeded = 1;
1403     bGlobal = 1;
1404   }
1405 
1406   if( rc==SQLITE_OK ){
1407     xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut);
1408     if( xDestroyOut==0 ){
1409       rc = SQLITE_NOMEM;
1410     }
1411   }
1412 
1413   if( rc==SQLITE_OK ){
1414     sInfo.aMatchinfo = aOut;
1415     sInfo.nPhrase = pCsr->nPhrase;
1416     rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
1417     if( bGlobal ){
1418       fts3MIBufferSetGlobal(pCsr->pMIBuffer);
1419     }
1420   }
1421 
1422   if( rc!=SQLITE_OK ){
1423     sqlite3_result_error_code(pCtx, rc);
1424     if( xDestroyOut ) xDestroyOut(aOut);
1425   }else{
1426     int n = pCsr->pMIBuffer->nElem * sizeof(u32);
1427     sqlite3_result_blob(pCtx, aOut, n, xDestroyOut);
1428   }
1429 }
1430 
1431 /*
1432 ** Implementation of snippet() function.
1433 */
sqlite3Fts3Snippet(sqlite3_context * pCtx,Fts3Cursor * pCsr,const char * zStart,const char * zEnd,const char * zEllipsis,int iCol,int nToken)1434 void sqlite3Fts3Snippet(
1435   sqlite3_context *pCtx,          /* SQLite function call context */
1436   Fts3Cursor *pCsr,               /* Cursor object */
1437   const char *zStart,             /* Snippet start text - "<b>" */
1438   const char *zEnd,               /* Snippet end text - "</b>" */
1439   const char *zEllipsis,          /* Snippet ellipsis text - "<b>...</b>" */
1440   int iCol,                       /* Extract snippet from this column */
1441   int nToken                      /* Approximate number of tokens in snippet */
1442 ){
1443   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1444   int rc = SQLITE_OK;
1445   int i;
1446   StrBuffer res = {0, 0, 0};
1447 
1448   /* The returned text includes up to four fragments of text extracted from
1449   ** the data in the current row. The first iteration of the for(...) loop
1450   ** below attempts to locate a single fragment of text nToken tokens in
1451   ** size that contains at least one instance of all phrases in the query
1452   ** expression that appear in the current row. If such a fragment of text
1453   ** cannot be found, the second iteration of the loop attempts to locate
1454   ** a pair of fragments, and so on.
1455   */
1456   int nSnippet = 0;               /* Number of fragments in this snippet */
1457   SnippetFragment aSnippet[4];    /* Maximum of 4 fragments per snippet */
1458   int nFToken = -1;               /* Number of tokens in each fragment */
1459 
1460   if( !pCsr->pExpr ){
1461     sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1462     return;
1463   }
1464 
1465   /* Limit the snippet length to 64 tokens. */
1466   if( nToken<-64 ) nToken = -64;
1467   if( nToken>+64 ) nToken = +64;
1468 
1469   for(nSnippet=1; 1; nSnippet++){
1470 
1471     int iSnip;                    /* Loop counter 0..nSnippet-1 */
1472     u64 mCovered = 0;             /* Bitmask of phrases covered by snippet */
1473     u64 mSeen = 0;                /* Bitmask of phrases seen by BestSnippet() */
1474 
1475     if( nToken>=0 ){
1476       nFToken = (nToken+nSnippet-1) / nSnippet;
1477     }else{
1478       nFToken = -1 * nToken;
1479     }
1480 
1481     for(iSnip=0; iSnip<nSnippet; iSnip++){
1482       int iBestScore = -1;        /* Best score of columns checked so far */
1483       int iRead;                  /* Used to iterate through columns */
1484       SnippetFragment *pFragment = &aSnippet[iSnip];
1485 
1486       memset(pFragment, 0, sizeof(*pFragment));
1487 
1488       /* Loop through all columns of the table being considered for snippets.
1489       ** If the iCol argument to this function was negative, this means all
1490       ** columns of the FTS3 table. Otherwise, only column iCol is considered.
1491       */
1492       for(iRead=0; iRead<pTab->nColumn; iRead++){
1493         SnippetFragment sF = {0, 0, 0, 0};
1494         int iS = 0;
1495         if( iCol>=0 && iRead!=iCol ) continue;
1496 
1497         /* Find the best snippet of nFToken tokens in column iRead. */
1498         rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
1499         if( rc!=SQLITE_OK ){
1500           goto snippet_out;
1501         }
1502         if( iS>iBestScore ){
1503           *pFragment = sF;
1504           iBestScore = iS;
1505         }
1506       }
1507 
1508       mCovered |= pFragment->covered;
1509     }
1510 
1511     /* If all query phrases seen by fts3BestSnippet() are present in at least
1512     ** one of the nSnippet snippet fragments, break out of the loop.
1513     */
1514     assert( (mCovered&mSeen)==mCovered );
1515     if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
1516   }
1517 
1518   assert( nFToken>0 );
1519 
1520   for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
1521     rc = fts3SnippetText(pCsr, &aSnippet[i],
1522         i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
1523     );
1524   }
1525 
1526  snippet_out:
1527   sqlite3Fts3SegmentsClose(pTab);
1528   if( rc!=SQLITE_OK ){
1529     sqlite3_result_error_code(pCtx, rc);
1530     sqlite3_free(res.z);
1531   }else{
1532     sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
1533   }
1534 }
1535 
1536 
1537 typedef struct TermOffset TermOffset;
1538 typedef struct TermOffsetCtx TermOffsetCtx;
1539 
1540 struct TermOffset {
1541   char *pList;                    /* Position-list */
1542   int iPos;                       /* Position just read from pList */
1543   int iOff;                       /* Offset of this term from read positions */
1544 };
1545 
1546 struct TermOffsetCtx {
1547   Fts3Cursor *pCsr;
1548   int iCol;                       /* Column of table to populate aTerm for */
1549   int iTerm;
1550   sqlite3_int64 iDocid;
1551   TermOffset *aTerm;
1552 };
1553 
1554 /*
1555 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1556 */
fts3ExprTermOffsetInit(Fts3Expr * pExpr,int iPhrase,void * ctx)1557 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
1558   TermOffsetCtx *p = (TermOffsetCtx *)ctx;
1559   int nTerm;                      /* Number of tokens in phrase */
1560   int iTerm;                      /* For looping through nTerm phrase terms */
1561   char *pList;                    /* Pointer to position list for phrase */
1562   int iPos = 0;                   /* First position in position-list */
1563   int rc;
1564 
1565   UNUSED_PARAMETER(iPhrase);
1566   rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
1567   nTerm = pExpr->pPhrase->nToken;
1568   if( pList ){
1569     fts3GetDeltaPosition(&pList, &iPos);
1570     assert_fts3_nc( iPos>=0 );
1571   }
1572 
1573   for(iTerm=0; iTerm<nTerm; iTerm++){
1574     TermOffset *pT = &p->aTerm[p->iTerm++];
1575     pT->iOff = nTerm-iTerm-1;
1576     pT->pList = pList;
1577     pT->iPos = iPos;
1578   }
1579 
1580   return rc;
1581 }
1582 
1583 /*
1584 ** Implementation of offsets() function.
1585 */
sqlite3Fts3Offsets(sqlite3_context * pCtx,Fts3Cursor * pCsr)1586 void sqlite3Fts3Offsets(
1587   sqlite3_context *pCtx,          /* SQLite function call context */
1588   Fts3Cursor *pCsr                /* Cursor object */
1589 ){
1590   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1591   sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
1592   int rc;                         /* Return Code */
1593   int nToken;                     /* Number of tokens in query */
1594   int iCol;                       /* Column currently being processed */
1595   StrBuffer res = {0, 0, 0};      /* Result string */
1596   TermOffsetCtx sCtx;             /* Context for fts3ExprTermOffsetInit() */
1597 
1598   if( !pCsr->pExpr ){
1599     sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1600     return;
1601   }
1602 
1603   memset(&sCtx, 0, sizeof(sCtx));
1604   assert( pCsr->isRequireSeek==0 );
1605 
1606   /* Count the number of terms in the query */
1607   rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
1608   if( rc!=SQLITE_OK ) goto offsets_out;
1609 
1610   /* Allocate the array of TermOffset iterators. */
1611   sCtx.aTerm = (TermOffset *)sqlite3_malloc64(sizeof(TermOffset)*nToken);
1612   if( 0==sCtx.aTerm ){
1613     rc = SQLITE_NOMEM;
1614     goto offsets_out;
1615   }
1616   sCtx.iDocid = pCsr->iPrevId;
1617   sCtx.pCsr = pCsr;
1618 
1619   /* Loop through the table columns, appending offset information to
1620   ** string-buffer res for each column.
1621   */
1622   for(iCol=0; iCol<pTab->nColumn; iCol++){
1623     sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
1624     const char *ZDUMMY;           /* Dummy argument used with xNext() */
1625     int NDUMMY = 0;               /* Dummy argument used with xNext() */
1626     int iStart = 0;
1627     int iEnd = 0;
1628     int iCurrent = 0;
1629     const char *zDoc;
1630     int nDoc;
1631 
1632     /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
1633     ** no way that this operation can fail, so the return code from
1634     ** fts3ExprIterate() can be discarded.
1635     */
1636     sCtx.iCol = iCol;
1637     sCtx.iTerm = 0;
1638     (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
1639 
1640     /* Retreive the text stored in column iCol. If an SQL NULL is stored
1641     ** in column iCol, jump immediately to the next iteration of the loop.
1642     ** If an OOM occurs while retrieving the data (this can happen if SQLite
1643     ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1644     ** to the caller.
1645     */
1646     zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
1647     nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
1648     if( zDoc==0 ){
1649       if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
1650         continue;
1651       }
1652       rc = SQLITE_NOMEM;
1653       goto offsets_out;
1654     }
1655 
1656     /* Initialize a tokenizer iterator to iterate through column iCol. */
1657     rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
1658         zDoc, nDoc, &pC
1659     );
1660     if( rc!=SQLITE_OK ) goto offsets_out;
1661 
1662     rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1663     while( rc==SQLITE_OK ){
1664       int i;                      /* Used to loop through terms */
1665       int iMinPos = 0x7FFFFFFF;   /* Position of next token */
1666       TermOffset *pTerm = 0;      /* TermOffset associated with next token */
1667 
1668       for(i=0; i<nToken; i++){
1669         TermOffset *pT = &sCtx.aTerm[i];
1670         if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
1671           iMinPos = pT->iPos-pT->iOff;
1672           pTerm = pT;
1673         }
1674       }
1675 
1676       if( !pTerm ){
1677         /* All offsets for this column have been gathered. */
1678         rc = SQLITE_DONE;
1679       }else{
1680         assert_fts3_nc( iCurrent<=iMinPos );
1681         if( 0==(0xFE&*pTerm->pList) ){
1682           pTerm->pList = 0;
1683         }else{
1684           fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
1685         }
1686         while( rc==SQLITE_OK && iCurrent<iMinPos ){
1687           rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1688         }
1689         if( rc==SQLITE_OK ){
1690           char aBuffer[64];
1691           sqlite3_snprintf(sizeof(aBuffer), aBuffer,
1692               "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
1693           );
1694           rc = fts3StringAppend(&res, aBuffer, -1);
1695         }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
1696           rc = FTS_CORRUPT_VTAB;
1697         }
1698       }
1699     }
1700     if( rc==SQLITE_DONE ){
1701       rc = SQLITE_OK;
1702     }
1703 
1704     pMod->xClose(pC);
1705     if( rc!=SQLITE_OK ) goto offsets_out;
1706   }
1707 
1708  offsets_out:
1709   sqlite3_free(sCtx.aTerm);
1710   assert( rc!=SQLITE_DONE );
1711   sqlite3Fts3SegmentsClose(pTab);
1712   if( rc!=SQLITE_OK ){
1713     sqlite3_result_error_code(pCtx,  rc);
1714     sqlite3_free(res.z);
1715   }else{
1716     sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
1717   }
1718   return;
1719 }
1720 
1721 /*
1722 ** Implementation of matchinfo() function.
1723 */
sqlite3Fts3Matchinfo(sqlite3_context * pContext,Fts3Cursor * pCsr,const char * zArg)1724 void sqlite3Fts3Matchinfo(
1725   sqlite3_context *pContext,      /* Function call context */
1726   Fts3Cursor *pCsr,               /* FTS3 table cursor */
1727   const char *zArg                /* Second arg to matchinfo() function */
1728 ){
1729   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1730   const char *zFormat;
1731 
1732   if( zArg ){
1733     zFormat = zArg;
1734   }else{
1735     zFormat = FTS3_MATCHINFO_DEFAULT;
1736   }
1737 
1738   if( !pCsr->pExpr ){
1739     sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
1740     return;
1741   }else{
1742     /* Retrieve matchinfo() data. */
1743     fts3GetMatchinfo(pContext, pCsr, zFormat);
1744     sqlite3Fts3SegmentsClose(pTab);
1745   }
1746 }
1747 
1748 #endif
1749