1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
17 */
18 
19 
20 #include "fts5Int.h"
21 
22 /*
23 ** Overview:
24 **
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
28 **
29 **   * all segment b-tree leaf data is stored in fixed size page records
30 **     (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 **     taken to ensure it is possible to iterate in either direction through
32 **     the entries in a doclist, or to seek to a specific entry within a
33 **     doclist, without loading it into memory.
34 **
35 **   * large doclists that span many pages have associated "doclist index"
36 **     records that contain a copy of the first rowid on each page spanned by
37 **     the doclist. This is used to speed up seek operations, and merges of
38 **     large doclists with very small doclists.
39 **
40 **   * extra fields in the "structure record" record the state of ongoing
41 **     incremental merge operations.
42 **
43 */
44 
45 
46 #define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */
48 
49 #define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */
50 
51 #define FTS5_MAIN_PREFIX '0'
52 
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
55 #endif
56 
57 /*
58 ** Details:
59 **
60 ** The %_data table managed by this module,
61 **
62 **     CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
63 **
64 ** , contains the following 5 types of records. See the comments surrounding
65 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
66 ** assigned to each fo them.
67 **
68 ** 1. Structure Records:
69 **
70 **   The set of segments that make up an index - the index structure - are
71 **   recorded in a single record within the %_data table. The record consists
72 **   of a single 32-bit configuration cookie value followed by a list of
73 **   SQLite varints. If the FTS table features more than one index (because
74 **   there are one or more prefix indexes), it is guaranteed that all share
75 **   the same cookie value.
76 **
77 **   Immediately following the configuration cookie, the record begins with
78 **   three varints:
79 **
80 **     + number of levels,
81 **     + total number of segments on all levels,
82 **     + value of write counter.
83 **
84 **   Then, for each level from 0 to nMax:
85 **
86 **     + number of input segments in ongoing merge.
87 **     + total number of segments in level.
88 **     + for each segment from oldest to newest:
89 **         + segment id (always > 0)
90 **         + first leaf page number (often 1, always greater than 0)
91 **         + final leaf page number
92 **
93 ** 2. The Averages Record:
94 **
95 **   A single record within the %_data table. The data is a list of varints.
96 **   The first value is the number of rows in the index. Then, for each column
97 **   from left to right, the total number of tokens in the column for all
98 **   rows of the table.
99 **
100 ** 3. Segment leaves:
101 **
102 **   TERM/DOCLIST FORMAT:
103 **
104 **     Most of each segment leaf is taken up by term/doclist data. The
105 **     general format of term/doclist, starting with the first term
106 **     on the leaf page, is:
107 **
108 **         varint : size of first term
109 **         blob:    first term data
110 **         doclist: first doclist
111 **         zero-or-more {
112 **           varint:  number of bytes in common with previous term
113 **           varint:  number of bytes of new term data (nNew)
114 **           blob:    nNew bytes of new term data
115 **           doclist: next doclist
116 **         }
117 **
118 **     doclist format:
119 **
120 **         varint:  first rowid
121 **         poslist: first poslist
122 **         zero-or-more {
123 **           varint:  rowid delta (always > 0)
124 **           poslist: next poslist
125 **         }
126 **
127 **     poslist format:
128 **
129 **         varint: size of poslist in bytes multiplied by 2, not including
130 **                 this field. Plus 1 if this entry carries the "delete" flag.
131 **         collist: collist for column 0
132 **         zero-or-more {
133 **           0x01 byte
134 **           varint: column number (I)
135 **           collist: collist for column I
136 **         }
137 **
138 **     collist format:
139 **
140 **         varint: first offset + 2
141 **         zero-or-more {
142 **           varint: offset delta + 2
143 **         }
144 **
145 **   PAGE FORMAT
146 **
147 **     Each leaf page begins with a 4-byte header containing 2 16-bit
148 **     unsigned integer fields in big-endian format. They are:
149 **
150 **       * The byte offset of the first rowid on the page, if it exists
151 **         and occurs before the first term (otherwise 0).
152 **
153 **       * The byte offset of the start of the page footer. If the page
154 **         footer is 0 bytes in size, then this field is the same as the
155 **         size of the leaf page in bytes.
156 **
157 **     The page footer consists of a single varint for each term located
158 **     on the page. Each varint is the byte offset of the current term
159 **     within the page, delta-compressed against the previous value. In
160 **     other words, the first varint in the footer is the byte offset of
161 **     the first term, the second is the byte offset of the second less that
162 **     of the first, and so on.
163 **
164 **     The term/doclist format described above is accurate if the entire
165 **     term/doclist data fits on a single leaf page. If this is not the case,
166 **     the format is changed in two ways:
167 **
168 **       + if the first rowid on a page occurs before the first term, it
169 **         is stored as a literal value:
170 **
171 **             varint:  first rowid
172 **
173 **       + the first term on each page is stored in the same way as the
174 **         very first term of the segment:
175 **
176 **             varint : size of first term
177 **             blob:    first term data
178 **
179 ** 5. Segment doclist indexes:
180 **
181 **   Doclist indexes are themselves b-trees, however they usually consist of
182 **   a single leaf record only. The format of each doclist index leaf page
183 **   is:
184 **
185 **     * Flags byte. Bits are:
186 **         0x01: Clear if leaf is also the root page, otherwise set.
187 **
188 **     * Page number of fts index leaf page. As a varint.
189 **
190 **     * First rowid on page indicated by previous field. As a varint.
191 **
192 **     * A list of varints, one for each subsequent termless page. A
193 **       positive delta if the termless page contains at least one rowid,
194 **       or an 0x00 byte otherwise.
195 **
196 **   Internal doclist index nodes are:
197 **
198 **     * Flags byte. Bits are:
199 **         0x01: Clear for root page, otherwise set.
200 **
201 **     * Page number of first child page. As a varint.
202 **
203 **     * Copy of first rowid on page indicated by previous field. As a varint.
204 **
205 **     * A list of delta-encoded varints - the first rowid on each subsequent
206 **       child page.
207 **
208 */
209 
210 /*
211 ** Rowids for the averages and structure records in the %_data table.
212 */
213 #define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
214 #define FTS5_STRUCTURE_ROWID   10    /* The structure record */
215 
216 /*
217 ** Macros determining the rowids used by segment leaves and dlidx leaves
218 ** and nodes. All nodes and leaves are stored in the %_data table with large
219 ** positive rowids.
220 **
221 ** Each segment has a unique non-zero 16-bit id.
222 **
223 ** The rowid for each segment leaf is found by passing the segment id and
224 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
225 ** sequentially starting from 1.
226 */
227 #define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
228 #define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
229 #define FTS5_DATA_HEIGHT_B  5     /* Max dlidx tree height of 32 */
230 #define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */
231 
232 #define fts5_dri(segid, dlidx, height, pgno) (                                 \
233  ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
234  ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
235  ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
236  ((i64)(pgno))                                                                 \
237 )
238 
239 #define FTS5_SEGMENT_ROWID(segid, pgno)       fts5_dri(segid, 0, 0, pgno)
240 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
241 
242 /*
243 ** Maximum segments permitted in a single index
244 */
245 #define FTS5_MAX_SEGMENT 2000
246 
247 #ifdef SQLITE_DEBUG
sqlite3Fts5Corrupt()248 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
249 #endif
250 
251 
252 /*
253 ** Each time a blob is read from the %_data table, it is padded with this
254 ** many zero bytes. This makes it easier to decode the various record formats
255 ** without overreading if the records are corrupt.
256 */
257 #define FTS5_DATA_ZERO_PADDING 8
258 #define FTS5_DATA_PADDING 20
259 
260 typedef struct Fts5Data Fts5Data;
261 typedef struct Fts5DlidxIter Fts5DlidxIter;
262 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
263 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
264 typedef struct Fts5Iter Fts5Iter;
265 typedef struct Fts5PageWriter Fts5PageWriter;
266 typedef struct Fts5SegIter Fts5SegIter;
267 typedef struct Fts5DoclistIter Fts5DoclistIter;
268 typedef struct Fts5SegWriter Fts5SegWriter;
269 typedef struct Fts5Structure Fts5Structure;
270 typedef struct Fts5StructureLevel Fts5StructureLevel;
271 typedef struct Fts5StructureSegment Fts5StructureSegment;
272 
273 struct Fts5Data {
274   u8 *p;                          /* Pointer to buffer containing record */
275   int nn;                         /* Size of record in bytes */
276   int szLeaf;                     /* Size of leaf without page-index */
277 };
278 
279 /*
280 ** One object per %_data table.
281 */
282 struct Fts5Index {
283   Fts5Config *pConfig;            /* Virtual table configuration */
284   char *zDataTbl;                 /* Name of %_data table */
285   int nWorkUnit;                  /* Leaf pages in a "unit" of work */
286 
287   /*
288   ** Variables related to the accumulation of tokens and doclists within the
289   ** in-memory hash tables before they are flushed to disk.
290   */
291   Fts5Hash *pHash;                /* Hash table for in-memory data */
292   int nPendingData;               /* Current bytes of pending data */
293   i64 iWriteRowid;                /* Rowid for current doc being written */
294   int bDelete;                    /* Current write is a delete */
295 
296   /* Error state. */
297   int rc;                         /* Current error code */
298 
299   /* State used by the fts5DataXXX() functions. */
300   sqlite3_blob *pReader;          /* RO incr-blob open on %_data table */
301   sqlite3_stmt *pWriter;          /* "INSERT ... %_data VALUES(?,?)" */
302   sqlite3_stmt *pDeleter;         /* "DELETE FROM %_data ... id>=? AND id<=?" */
303   sqlite3_stmt *pIdxWriter;       /* "INSERT ... %_idx VALUES(?,?,?,?)" */
304   sqlite3_stmt *pIdxDeleter;      /* "DELETE FROM %_idx WHERE segid=? */
305   sqlite3_stmt *pIdxSelect;
306   int nRead;                      /* Total number of blocks read */
307 
308   sqlite3_stmt *pDataVersion;
309   i64 iStructVersion;             /* data_version when pStruct read */
310   Fts5Structure *pStruct;         /* Current db structure (or NULL) */
311 };
312 
313 struct Fts5DoclistIter {
314   u8 *aEof;                       /* Pointer to 1 byte past end of doclist */
315 
316   /* Output variables. aPoslist==0 at EOF */
317   i64 iRowid;
318   u8 *aPoslist;
319   int nPoslist;
320   int nSize;
321 };
322 
323 /*
324 ** The contents of the "structure" record for each index are represented
325 ** using an Fts5Structure record in memory. Which uses instances of the
326 ** other Fts5StructureXXX types as components.
327 */
328 struct Fts5StructureSegment {
329   int iSegid;                     /* Segment id */
330   int pgnoFirst;                  /* First leaf page number in segment */
331   int pgnoLast;                   /* Last leaf page number in segment */
332 };
333 struct Fts5StructureLevel {
334   int nMerge;                     /* Number of segments in incr-merge */
335   int nSeg;                       /* Total number of segments on level */
336   Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */
337 };
338 struct Fts5Structure {
339   int nRef;                       /* Object reference count */
340   u64 nWriteCounter;              /* Total leaves written to level 0 */
341   int nSegment;                   /* Total segments in this structure */
342   int nLevel;                     /* Number of levels in this index */
343   Fts5StructureLevel aLevel[1];   /* Array of nLevel level objects */
344 };
345 
346 /*
347 ** An object of type Fts5SegWriter is used to write to segments.
348 */
349 struct Fts5PageWriter {
350   int pgno;                       /* Page number for this page */
351   int iPrevPgidx;                 /* Previous value written into pgidx */
352   Fts5Buffer buf;                 /* Buffer containing leaf data */
353   Fts5Buffer pgidx;               /* Buffer containing page-index */
354   Fts5Buffer term;                /* Buffer containing previous term on page */
355 };
356 struct Fts5DlidxWriter {
357   int pgno;                       /* Page number for this page */
358   int bPrevValid;                 /* True if iPrev is valid */
359   i64 iPrev;                      /* Previous rowid value written to page */
360   Fts5Buffer buf;                 /* Buffer containing page data */
361 };
362 struct Fts5SegWriter {
363   int iSegid;                     /* Segid to write to */
364   Fts5PageWriter writer;          /* PageWriter object */
365   i64 iPrevRowid;                 /* Previous rowid written to current leaf */
366   u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
367   u8 bFirstRowidInPage;           /* True if next rowid is first in page */
368   /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
369   u8 bFirstTermInPage;            /* True if next term will be first in leaf */
370   int nLeafWritten;               /* Number of leaf pages written */
371   int nEmpty;                     /* Number of contiguous term-less nodes */
372 
373   int nDlidx;                     /* Allocated size of aDlidx[] array */
374   Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */
375 
376   /* Values to insert into the %_idx table */
377   Fts5Buffer btterm;              /* Next term to insert into %_idx table */
378   int iBtPage;                    /* Page number corresponding to btterm */
379 };
380 
381 typedef struct Fts5CResult Fts5CResult;
382 struct Fts5CResult {
383   u16 iFirst;                     /* aSeg[] index of firstest iterator */
384   u8 bTermEq;                     /* True if the terms are equal */
385 };
386 
387 /*
388 ** Object for iterating through a single segment, visiting each term/rowid
389 ** pair in the segment.
390 **
391 ** pSeg:
392 **   The segment to iterate through.
393 **
394 ** iLeafPgno:
395 **   Current leaf page number within segment.
396 **
397 ** iLeafOffset:
398 **   Byte offset within the current leaf that is the first byte of the
399 **   position list data (one byte passed the position-list size field).
400 **   rowid field of the current entry. Usually this is the size field of the
401 **   position list data. The exception is if the rowid for the current entry
402 **   is the last thing on the leaf page.
403 **
404 ** pLeaf:
405 **   Buffer containing current leaf page data. Set to NULL at EOF.
406 **
407 ** iTermLeafPgno, iTermLeafOffset:
408 **   Leaf page number containing the last term read from the segment. And
409 **   the offset immediately following the term data.
410 **
411 ** flags:
412 **   Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
413 **
414 **   FTS5_SEGITER_ONETERM:
415 **     If set, set the iterator to point to EOF after the current doclist
416 **     has been exhausted. Do not proceed to the next term in the segment.
417 **
418 **   FTS5_SEGITER_REVERSE:
419 **     This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
420 **     it is set, iterate through rowid in descending order instead of the
421 **     default ascending order.
422 **
423 ** iRowidOffset/nRowidOffset/aRowidOffset:
424 **     These are used if the FTS5_SEGITER_REVERSE flag is set.
425 **
426 **     For each rowid on the page corresponding to the current term, the
427 **     corresponding aRowidOffset[] entry is set to the byte offset of the
428 **     start of the "position-list-size" field within the page.
429 **
430 ** iTermIdx:
431 **     Index of current term on iTermLeafPgno.
432 */
433 struct Fts5SegIter {
434   Fts5StructureSegment *pSeg;     /* Segment to iterate through */
435   int flags;                      /* Mask of configuration flags */
436   int iLeafPgno;                  /* Current leaf page number */
437   Fts5Data *pLeaf;                /* Current leaf data */
438   Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
439   int iLeafOffset;                /* Byte offset within current leaf */
440 
441   /* Next method */
442   void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
443 
444   /* The page and offset from which the current term was read. The offset
445   ** is the offset of the first rowid in the current doclist.  */
446   int iTermLeafPgno;
447   int iTermLeafOffset;
448 
449   int iPgidxOff;                  /* Next offset in pgidx */
450   int iEndofDoclist;
451 
452   /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
453   int iRowidOffset;               /* Current entry in aRowidOffset[] */
454   int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
455   int *aRowidOffset;              /* Array of offset to rowid fields */
456 
457   Fts5DlidxIter *pDlidx;          /* If there is a doclist-index */
458 
459   /* Variables populated based on current entry. */
460   Fts5Buffer term;                /* Current term */
461   i64 iRowid;                     /* Current rowid */
462   int nPos;                       /* Number of bytes in current position list */
463   u8 bDel;                        /* True if the delete flag is set */
464 };
465 
466 /*
467 ** Argument is a pointer to an Fts5Data structure that contains a
468 ** leaf page.
469 */
470 #define ASSERT_SZLEAF_OK(x) assert( \
471     (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
472 )
473 
474 #define FTS5_SEGITER_ONETERM 0x01
475 #define FTS5_SEGITER_REVERSE 0x02
476 
477 /*
478 ** Argument is a pointer to an Fts5Data structure that contains a leaf
479 ** page. This macro evaluates to true if the leaf contains no terms, or
480 ** false if it contains at least one term.
481 */
482 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
483 
484 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
485 
486 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
487 
488 /*
489 ** Object for iterating through the merged results of one or more segments,
490 ** visiting each term/rowid pair in the merged data.
491 **
492 ** nSeg is always a power of two greater than or equal to the number of
493 ** segments that this object is merging data from. Both the aSeg[] and
494 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
495 ** with zeroed objects - these are handled as if they were iterators opened
496 ** on empty segments.
497 **
498 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
499 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
500 ** comparison in this context is the index of the iterator that currently
501 ** points to the smaller term/rowid combination. Iterators at EOF are
502 ** considered to be greater than all other iterators.
503 **
504 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
505 ** the smallest key overall. aFirst[0] is unused.
506 **
507 ** poslist:
508 **   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
509 **   There is no way to tell if this is populated or not.
510 */
511 struct Fts5Iter {
512   Fts5IndexIter base;             /* Base class containing output vars */
513 
514   Fts5Index *pIndex;              /* Index that owns this iterator */
515   Fts5Structure *pStruct;         /* Database structure for this iterator */
516   Fts5Buffer poslist;             /* Buffer containing current poslist */
517   Fts5Colset *pColset;            /* Restrict matches to these columns */
518 
519   /* Invoked to set output variables. */
520   void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
521 
522   int nSeg;                       /* Size of aSeg[] array */
523   int bRev;                       /* True to iterate in reverse order */
524   u8 bSkipEmpty;                  /* True to skip deleted entries */
525 
526   i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
527   Fts5CResult *aFirst;            /* Current merge state (see above) */
528   Fts5SegIter aSeg[1];            /* Array of segment iterators */
529 };
530 
531 
532 /*
533 ** An instance of the following type is used to iterate through the contents
534 ** of a doclist-index record.
535 **
536 ** pData:
537 **   Record containing the doclist-index data.
538 **
539 ** bEof:
540 **   Set to true once iterator has reached EOF.
541 **
542 ** iOff:
543 **   Set to the current offset within record pData.
544 */
545 struct Fts5DlidxLvl {
546   Fts5Data *pData;              /* Data for current page of this level */
547   int iOff;                     /* Current offset into pData */
548   int bEof;                     /* At EOF already */
549   int iFirstOff;                /* Used by reverse iterators */
550 
551   /* Output variables */
552   int iLeafPgno;                /* Page number of current leaf page */
553   i64 iRowid;                   /* First rowid on leaf iLeafPgno */
554 };
555 struct Fts5DlidxIter {
556   int nLvl;
557   int iSegid;
558   Fts5DlidxLvl aLvl[1];
559 };
560 
fts5PutU16(u8 * aOut,u16 iVal)561 static void fts5PutU16(u8 *aOut, u16 iVal){
562   aOut[0] = (iVal>>8);
563   aOut[1] = (iVal&0xFF);
564 }
565 
fts5GetU16(const u8 * aIn)566 static u16 fts5GetU16(const u8 *aIn){
567   return ((u16)aIn[0] << 8) + aIn[1];
568 }
569 
570 /*
571 ** Allocate and return a buffer at least nByte bytes in size.
572 **
573 ** If an OOM error is encountered, return NULL and set the error code in
574 ** the Fts5Index handle passed as the first argument.
575 */
fts5IdxMalloc(Fts5Index * p,int nByte)576 static void *fts5IdxMalloc(Fts5Index *p, int nByte){
577   return sqlite3Fts5MallocZero(&p->rc, nByte);
578 }
579 
580 /*
581 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
582 **
583 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
584 ** +ve if pRight is smaller than pLeft. In other words:
585 **
586 **     res = *pLeft - *pRight
587 */
588 #ifdef SQLITE_DEBUG
fts5BufferCompareBlob(Fts5Buffer * pLeft,const u8 * pRight,int nRight)589 static int fts5BufferCompareBlob(
590   Fts5Buffer *pLeft,              /* Left hand side of comparison */
591   const u8 *pRight, int nRight    /* Right hand side of comparison */
592 ){
593   int nCmp = MIN(pLeft->n, nRight);
594   int res = memcmp(pLeft->p, pRight, nCmp);
595   return (res==0 ? (pLeft->n - nRight) : res);
596 }
597 #endif
598 
599 /*
600 ** Compare the contents of the two buffers using memcmp(). If one buffer
601 ** is a prefix of the other, it is considered the lesser.
602 **
603 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
604 ** +ve if pRight is smaller than pLeft. In other words:
605 **
606 **     res = *pLeft - *pRight
607 */
fts5BufferCompare(Fts5Buffer * pLeft,Fts5Buffer * pRight)608 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
609   int nCmp = MIN(pLeft->n, pRight->n);
610   int res = memcmp(pLeft->p, pRight->p, nCmp);
611   return (res==0 ? (pLeft->n - pRight->n) : res);
612 }
613 
fts5LeafFirstTermOff(Fts5Data * pLeaf)614 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
615   int ret;
616   fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
617   return ret;
618 }
619 
620 /*
621 ** Close the read-only blob handle, if it is open.
622 */
fts5CloseReader(Fts5Index * p)623 static void fts5CloseReader(Fts5Index *p){
624   if( p->pReader ){
625     sqlite3_blob *pReader = p->pReader;
626     p->pReader = 0;
627     sqlite3_blob_close(pReader);
628   }
629 }
630 
631 /*
632 ** Retrieve a record from the %_data table.
633 **
634 ** If an error occurs, NULL is returned and an error left in the
635 ** Fts5Index object.
636 */
fts5DataRead(Fts5Index * p,i64 iRowid)637 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
638   Fts5Data *pRet = 0;
639   if( p->rc==SQLITE_OK ){
640     int rc = SQLITE_OK;
641 
642     if( p->pReader ){
643       /* This call may return SQLITE_ABORT if there has been a savepoint
644       ** rollback since it was last used. In this case a new blob handle
645       ** is required.  */
646       sqlite3_blob *pBlob = p->pReader;
647       p->pReader = 0;
648       rc = sqlite3_blob_reopen(pBlob, iRowid);
649       assert( p->pReader==0 );
650       p->pReader = pBlob;
651       if( rc!=SQLITE_OK ){
652         fts5CloseReader(p);
653       }
654       if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
655     }
656 
657     /* If the blob handle is not open at this point, open it and seek
658     ** to the requested entry.  */
659     if( p->pReader==0 && rc==SQLITE_OK ){
660       Fts5Config *pConfig = p->pConfig;
661       rc = sqlite3_blob_open(pConfig->db,
662           pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
663       );
664     }
665 
666     /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
667     ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
668     ** All the reasons those functions might return SQLITE_ERROR - missing
669     ** table, missing row, non-blob/text in block column - indicate
670     ** backing store corruption.  */
671     if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
672 
673     if( rc==SQLITE_OK ){
674       u8 *aOut = 0;               /* Read blob data into this buffer */
675       int nByte = sqlite3_blob_bytes(p->pReader);
676       int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
677       pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
678       if( pRet ){
679         pRet->nn = nByte;
680         aOut = pRet->p = (u8*)&pRet[1];
681       }else{
682         rc = SQLITE_NOMEM;
683       }
684 
685       if( rc==SQLITE_OK ){
686         rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
687       }
688       if( rc!=SQLITE_OK ){
689         sqlite3_free(pRet);
690         pRet = 0;
691       }else{
692         /* TODO1: Fix this */
693         pRet->szLeaf = fts5GetU16(&pRet->p[2]);
694       }
695     }
696     p->rc = rc;
697     p->nRead++;
698   }
699 
700   assert( (pRet==0)==(p->rc!=SQLITE_OK) );
701   return pRet;
702 }
703 
704 /*
705 ** Release a reference to data record returned by an earlier call to
706 ** fts5DataRead().
707 */
fts5DataRelease(Fts5Data * pData)708 static void fts5DataRelease(Fts5Data *pData){
709   sqlite3_free(pData);
710 }
711 
fts5LeafRead(Fts5Index * p,i64 iRowid)712 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
713   Fts5Data *pRet = fts5DataRead(p, iRowid);
714   if( pRet ){
715     if( pRet->szLeaf>pRet->nn ){
716       p->rc = FTS5_CORRUPT;
717       fts5DataRelease(pRet);
718       pRet = 0;
719     }
720   }
721   return pRet;
722 }
723 
fts5IndexPrepareStmt(Fts5Index * p,sqlite3_stmt ** ppStmt,char * zSql)724 static int fts5IndexPrepareStmt(
725   Fts5Index *p,
726   sqlite3_stmt **ppStmt,
727   char *zSql
728 ){
729   if( p->rc==SQLITE_OK ){
730     if( zSql ){
731       p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
732                                  SQLITE_PREPARE_PERSISTENT, ppStmt, 0);
733     }else{
734       p->rc = SQLITE_NOMEM;
735     }
736   }
737   sqlite3_free(zSql);
738   return p->rc;
739 }
740 
741 
742 /*
743 ** INSERT OR REPLACE a record into the %_data table.
744 */
fts5DataWrite(Fts5Index * p,i64 iRowid,const u8 * pData,int nData)745 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
746   if( p->rc!=SQLITE_OK ) return;
747 
748   if( p->pWriter==0 ){
749     Fts5Config *pConfig = p->pConfig;
750     fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
751           "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
752           pConfig->zDb, pConfig->zName
753     ));
754     if( p->rc ) return;
755   }
756 
757   sqlite3_bind_int64(p->pWriter, 1, iRowid);
758   sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
759   sqlite3_step(p->pWriter);
760   p->rc = sqlite3_reset(p->pWriter);
761 }
762 
763 /*
764 ** Execute the following SQL:
765 **
766 **     DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
767 */
fts5DataDelete(Fts5Index * p,i64 iFirst,i64 iLast)768 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
769   if( p->rc!=SQLITE_OK ) return;
770 
771   if( p->pDeleter==0 ){
772     int rc;
773     Fts5Config *pConfig = p->pConfig;
774     char *zSql = sqlite3_mprintf(
775         "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
776           pConfig->zDb, pConfig->zName
777     );
778     if( zSql==0 ){
779       rc = SQLITE_NOMEM;
780     }else{
781       rc = sqlite3_prepare_v3(pConfig->db, zSql, -1,
782                               SQLITE_PREPARE_PERSISTENT, &p->pDeleter, 0);
783       sqlite3_free(zSql);
784     }
785     if( rc!=SQLITE_OK ){
786       p->rc = rc;
787       return;
788     }
789   }
790 
791   sqlite3_bind_int64(p->pDeleter, 1, iFirst);
792   sqlite3_bind_int64(p->pDeleter, 2, iLast);
793   sqlite3_step(p->pDeleter);
794   p->rc = sqlite3_reset(p->pDeleter);
795 }
796 
797 /*
798 ** Remove all records associated with segment iSegid.
799 */
fts5DataRemoveSegment(Fts5Index * p,int iSegid)800 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
801   i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
802   i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
803   fts5DataDelete(p, iFirst, iLast);
804   if( p->pIdxDeleter==0 ){
805     Fts5Config *pConfig = p->pConfig;
806     fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
807           "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
808           pConfig->zDb, pConfig->zName
809     ));
810   }
811   if( p->rc==SQLITE_OK ){
812     sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
813     sqlite3_step(p->pIdxDeleter);
814     p->rc = sqlite3_reset(p->pIdxDeleter);
815   }
816 }
817 
818 /*
819 ** Release a reference to an Fts5Structure object returned by an earlier
820 ** call to fts5StructureRead() or fts5StructureDecode().
821 */
fts5StructureRelease(Fts5Structure * pStruct)822 static void fts5StructureRelease(Fts5Structure *pStruct){
823   if( pStruct && 0>=(--pStruct->nRef) ){
824     int i;
825     assert( pStruct->nRef==0 );
826     for(i=0; i<pStruct->nLevel; i++){
827       sqlite3_free(pStruct->aLevel[i].aSeg);
828     }
829     sqlite3_free(pStruct);
830   }
831 }
832 
fts5StructureRef(Fts5Structure * pStruct)833 static void fts5StructureRef(Fts5Structure *pStruct){
834   pStruct->nRef++;
835 }
836 
837 /*
838 ** Deserialize and return the structure record currently stored in serialized
839 ** form within buffer pData/nData.
840 **
841 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
842 ** are over-allocated by one slot. This allows the structure contents
843 ** to be more easily edited.
844 **
845 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
846 ** returned. Otherwise, *ppOut is set to point to the new object and
847 ** SQLITE_OK returned.
848 */
fts5StructureDecode(const u8 * pData,int nData,int * piCookie,Fts5Structure ** ppOut)849 static int fts5StructureDecode(
850   const u8 *pData,                /* Buffer containing serialized structure */
851   int nData,                      /* Size of buffer pData in bytes */
852   int *piCookie,                  /* Configuration cookie value */
853   Fts5Structure **ppOut           /* OUT: Deserialized object */
854 ){
855   int rc = SQLITE_OK;
856   int i = 0;
857   int iLvl;
858   int nLevel = 0;
859   int nSegment = 0;
860   int nByte;                      /* Bytes of space to allocate at pRet */
861   Fts5Structure *pRet = 0;        /* Structure object to return */
862 
863   /* Grab the cookie value */
864   if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
865   i = 4;
866 
867   /* Read the total number of levels and segments from the start of the
868   ** structure record.  */
869   i += fts5GetVarint32(&pData[i], nLevel);
870   i += fts5GetVarint32(&pData[i], nSegment);
871   nByte = (
872       sizeof(Fts5Structure) +                    /* Main structure */
873       sizeof(Fts5StructureLevel) * (nLevel-1)    /* aLevel[] array */
874   );
875   pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
876 
877   if( pRet ){
878     pRet->nRef = 1;
879     pRet->nLevel = nLevel;
880     pRet->nSegment = nSegment;
881     i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
882 
883     for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
884       Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
885       int nTotal = 0;
886       int iSeg;
887 
888       if( i>=nData ){
889         rc = FTS5_CORRUPT;
890       }else{
891         i += fts5GetVarint32(&pData[i], pLvl->nMerge);
892         i += fts5GetVarint32(&pData[i], nTotal);
893         assert( nTotal>=pLvl->nMerge );
894         pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
895             nTotal * sizeof(Fts5StructureSegment)
896         );
897       }
898 
899       if( rc==SQLITE_OK ){
900         pLvl->nSeg = nTotal;
901         for(iSeg=0; iSeg<nTotal; iSeg++){
902           if( i>=nData ){
903             rc = FTS5_CORRUPT;
904             break;
905           }
906           i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
907           i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
908           i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
909         }
910       }
911     }
912     if( rc!=SQLITE_OK ){
913       fts5StructureRelease(pRet);
914       pRet = 0;
915     }
916   }
917 
918   *ppOut = pRet;
919   return rc;
920 }
921 
922 /*
923 **
924 */
fts5StructureAddLevel(int * pRc,Fts5Structure ** ppStruct)925 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
926   if( *pRc==SQLITE_OK ){
927     Fts5Structure *pStruct = *ppStruct;
928     int nLevel = pStruct->nLevel;
929     int nByte = (
930         sizeof(Fts5Structure) +                  /* Main structure */
931         sizeof(Fts5StructureLevel) * (nLevel+1)  /* aLevel[] array */
932     );
933 
934     pStruct = sqlite3_realloc(pStruct, nByte);
935     if( pStruct ){
936       memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
937       pStruct->nLevel++;
938       *ppStruct = pStruct;
939     }else{
940       *pRc = SQLITE_NOMEM;
941     }
942   }
943 }
944 
945 /*
946 ** Extend level iLvl so that there is room for at least nExtra more
947 ** segments.
948 */
fts5StructureExtendLevel(int * pRc,Fts5Structure * pStruct,int iLvl,int nExtra,int bInsert)949 static void fts5StructureExtendLevel(
950   int *pRc,
951   Fts5Structure *pStruct,
952   int iLvl,
953   int nExtra,
954   int bInsert
955 ){
956   if( *pRc==SQLITE_OK ){
957     Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
958     Fts5StructureSegment *aNew;
959     int nByte;
960 
961     nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
962     aNew = sqlite3_realloc(pLvl->aSeg, nByte);
963     if( aNew ){
964       if( bInsert==0 ){
965         memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
966       }else{
967         int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
968         memmove(&aNew[nExtra], aNew, nMove);
969         memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
970       }
971       pLvl->aSeg = aNew;
972     }else{
973       *pRc = SQLITE_NOMEM;
974     }
975   }
976 }
977 
fts5StructureReadUncached(Fts5Index * p)978 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
979   Fts5Structure *pRet = 0;
980   Fts5Config *pConfig = p->pConfig;
981   int iCookie;                    /* Configuration cookie */
982   Fts5Data *pData;
983 
984   pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
985   if( p->rc==SQLITE_OK ){
986     /* TODO: Do we need this if the leaf-index is appended? Probably... */
987     memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
988     p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
989     if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
990       p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
991     }
992     fts5DataRelease(pData);
993     if( p->rc!=SQLITE_OK ){
994       fts5StructureRelease(pRet);
995       pRet = 0;
996     }
997   }
998 
999   return pRet;
1000 }
1001 
fts5IndexDataVersion(Fts5Index * p)1002 static i64 fts5IndexDataVersion(Fts5Index *p){
1003   i64 iVersion = 0;
1004 
1005   if( p->rc==SQLITE_OK ){
1006     if( p->pDataVersion==0 ){
1007       p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
1008           sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
1009           );
1010       if( p->rc ) return 0;
1011     }
1012 
1013     if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
1014       iVersion = sqlite3_column_int64(p->pDataVersion, 0);
1015     }
1016     p->rc = sqlite3_reset(p->pDataVersion);
1017   }
1018 
1019   return iVersion;
1020 }
1021 
1022 /*
1023 ** Read, deserialize and return the structure record.
1024 **
1025 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1026 ** are over-allocated as described for function fts5StructureDecode()
1027 ** above.
1028 **
1029 ** If an error occurs, NULL is returned and an error code left in the
1030 ** Fts5Index handle. If an error has already occurred when this function
1031 ** is called, it is a no-op.
1032 */
fts5StructureRead(Fts5Index * p)1033 static Fts5Structure *fts5StructureRead(Fts5Index *p){
1034 
1035   if( p->pStruct==0 ){
1036     p->iStructVersion = fts5IndexDataVersion(p);
1037     if( p->rc==SQLITE_OK ){
1038       p->pStruct = fts5StructureReadUncached(p);
1039     }
1040   }
1041 
1042 #if 0
1043   else{
1044     Fts5Structure *pTest = fts5StructureReadUncached(p);
1045     if( pTest ){
1046       int i, j;
1047       assert_nc( p->pStruct->nSegment==pTest->nSegment );
1048       assert_nc( p->pStruct->nLevel==pTest->nLevel );
1049       for(i=0; i<pTest->nLevel; i++){
1050         assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
1051         assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
1052         for(j=0; j<pTest->aLevel[i].nSeg; j++){
1053           Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
1054           Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
1055           assert_nc( p1->iSegid==p2->iSegid );
1056           assert_nc( p1->pgnoFirst==p2->pgnoFirst );
1057           assert_nc( p1->pgnoLast==p2->pgnoLast );
1058         }
1059       }
1060       fts5StructureRelease(pTest);
1061     }
1062   }
1063 #endif
1064 
1065   if( p->rc!=SQLITE_OK ) return 0;
1066   assert( p->iStructVersion!=0 );
1067   assert( p->pStruct!=0 );
1068   fts5StructureRef(p->pStruct);
1069   return p->pStruct;
1070 }
1071 
fts5StructureInvalidate(Fts5Index * p)1072 static void fts5StructureInvalidate(Fts5Index *p){
1073   if( p->pStruct ){
1074     fts5StructureRelease(p->pStruct);
1075     p->pStruct = 0;
1076   }
1077 }
1078 
1079 /*
1080 ** Return the total number of segments in index structure pStruct. This
1081 ** function is only ever used as part of assert() conditions.
1082 */
1083 #ifdef SQLITE_DEBUG
fts5StructureCountSegments(Fts5Structure * pStruct)1084 static int fts5StructureCountSegments(Fts5Structure *pStruct){
1085   int nSegment = 0;               /* Total number of segments */
1086   if( pStruct ){
1087     int iLvl;                     /* Used to iterate through levels */
1088     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1089       nSegment += pStruct->aLevel[iLvl].nSeg;
1090     }
1091   }
1092 
1093   return nSegment;
1094 }
1095 #endif
1096 
1097 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) {     \
1098   assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) );             \
1099   memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob);             \
1100   (pBuf)->n += nBlob;                                      \
1101 }
1102 
1103 #define fts5BufferSafeAppendVarint(pBuf, iVal) {                \
1104   (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal));  \
1105   assert( (pBuf)->nSpace>=(pBuf)->n );                          \
1106 }
1107 
1108 
1109 /*
1110 ** Serialize and store the "structure" record.
1111 **
1112 ** If an error occurs, leave an error code in the Fts5Index object. If an
1113 ** error has already occurred, this function is a no-op.
1114 */
fts5StructureWrite(Fts5Index * p,Fts5Structure * pStruct)1115 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
1116   if( p->rc==SQLITE_OK ){
1117     Fts5Buffer buf;               /* Buffer to serialize record into */
1118     int iLvl;                     /* Used to iterate through levels */
1119     int iCookie;                  /* Cookie value to store */
1120 
1121     assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
1122     memset(&buf, 0, sizeof(Fts5Buffer));
1123 
1124     /* Append the current configuration cookie */
1125     iCookie = p->pConfig->iCookie;
1126     if( iCookie<0 ) iCookie = 0;
1127 
1128     if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
1129       sqlite3Fts5Put32(buf.p, iCookie);
1130       buf.n = 4;
1131       fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
1132       fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
1133       fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
1134     }
1135 
1136     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1137       int iSeg;                     /* Used to iterate through segments */
1138       Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1139       fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
1140       fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1141       assert( pLvl->nMerge<=pLvl->nSeg );
1142 
1143       for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
1144         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
1145         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
1146         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
1147       }
1148     }
1149 
1150     fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
1151     fts5BufferFree(&buf);
1152   }
1153 }
1154 
1155 #if 0
1156 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
1157 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
1158   int rc = SQLITE_OK;
1159   Fts5Buffer buf;
1160   memset(&buf, 0, sizeof(buf));
1161   fts5DebugStructure(&rc, &buf, pStruct);
1162   fprintf(stdout, "%s: %s\n", zCaption, buf.p);
1163   fflush(stdout);
1164   fts5BufferFree(&buf);
1165 }
1166 #else
1167 # define fts5PrintStructure(x,y)
1168 #endif
1169 
fts5SegmentSize(Fts5StructureSegment * pSeg)1170 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
1171   return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
1172 }
1173 
1174 /*
1175 ** Return a copy of index structure pStruct. Except, promote as many
1176 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1177 ** returned.
1178 */
fts5StructurePromoteTo(Fts5Index * p,int iPromote,int szPromote,Fts5Structure * pStruct)1179 static void fts5StructurePromoteTo(
1180   Fts5Index *p,
1181   int iPromote,
1182   int szPromote,
1183   Fts5Structure *pStruct
1184 ){
1185   int il, is;
1186   Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
1187 
1188   if( pOut->nMerge==0 ){
1189     for(il=iPromote+1; il<pStruct->nLevel; il++){
1190       Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
1191       if( pLvl->nMerge ) return;
1192       for(is=pLvl->nSeg-1; is>=0; is--){
1193         int sz = fts5SegmentSize(&pLvl->aSeg[is]);
1194         if( sz>szPromote ) return;
1195         fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
1196         if( p->rc ) return;
1197         memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
1198         pOut->nSeg++;
1199         pLvl->nSeg--;
1200       }
1201     }
1202   }
1203 }
1204 
1205 /*
1206 ** A new segment has just been written to level iLvl of index structure
1207 ** pStruct. This function determines if any segments should be promoted
1208 ** as a result. Segments are promoted in two scenarios:
1209 **
1210 **   a) If the segment just written is smaller than one or more segments
1211 **      within the previous populated level, it is promoted to the previous
1212 **      populated level.
1213 **
1214 **   b) If the segment just written is larger than the newest segment on
1215 **      the next populated level, then that segment, and any other adjacent
1216 **      segments that are also smaller than the one just written, are
1217 **      promoted.
1218 **
1219 ** If one or more segments are promoted, the structure object is updated
1220 ** to reflect this.
1221 */
fts5StructurePromote(Fts5Index * p,int iLvl,Fts5Structure * pStruct)1222 static void fts5StructurePromote(
1223   Fts5Index *p,                   /* FTS5 backend object */
1224   int iLvl,                       /* Index level just updated */
1225   Fts5Structure *pStruct          /* Index structure */
1226 ){
1227   if( p->rc==SQLITE_OK ){
1228     int iTst;
1229     int iPromote = -1;
1230     int szPromote = 0;            /* Promote anything this size or smaller */
1231     Fts5StructureSegment *pSeg;   /* Segment just written */
1232     int szSeg;                    /* Size of segment just written */
1233     int nSeg = pStruct->aLevel[iLvl].nSeg;
1234 
1235     if( nSeg==0 ) return;
1236     pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
1237     szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
1238 
1239     /* Check for condition (a) */
1240     for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
1241     if( iTst>=0 ){
1242       int i;
1243       int szMax = 0;
1244       Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
1245       assert( pTst->nMerge==0 );
1246       for(i=0; i<pTst->nSeg; i++){
1247         int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
1248         if( sz>szMax ) szMax = sz;
1249       }
1250       if( szMax>=szSeg ){
1251         /* Condition (a) is true. Promote the newest segment on level
1252         ** iLvl to level iTst.  */
1253         iPromote = iTst;
1254         szPromote = szMax;
1255       }
1256     }
1257 
1258     /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1259     ** is a no-op if it is not.  */
1260     if( iPromote<0 ){
1261       iPromote = iLvl;
1262       szPromote = szSeg;
1263     }
1264     fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
1265   }
1266 }
1267 
1268 
1269 /*
1270 ** Advance the iterator passed as the only argument. If the end of the
1271 ** doclist-index page is reached, return non-zero.
1272 */
fts5DlidxLvlNext(Fts5DlidxLvl * pLvl)1273 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
1274   Fts5Data *pData = pLvl->pData;
1275 
1276   if( pLvl->iOff==0 ){
1277     assert( pLvl->bEof==0 );
1278     pLvl->iOff = 1;
1279     pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
1280     pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
1281     pLvl->iFirstOff = pLvl->iOff;
1282   }else{
1283     int iOff;
1284     for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
1285       if( pData->p[iOff] ) break;
1286     }
1287 
1288     if( iOff<pData->nn ){
1289       i64 iVal;
1290       pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
1291       iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
1292       pLvl->iRowid += iVal;
1293       pLvl->iOff = iOff;
1294     }else{
1295       pLvl->bEof = 1;
1296     }
1297   }
1298 
1299   return pLvl->bEof;
1300 }
1301 
1302 /*
1303 ** Advance the iterator passed as the only argument.
1304 */
fts5DlidxIterNextR(Fts5Index * p,Fts5DlidxIter * pIter,int iLvl)1305 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1306   Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1307 
1308   assert( iLvl<pIter->nLvl );
1309   if( fts5DlidxLvlNext(pLvl) ){
1310     if( (iLvl+1) < pIter->nLvl ){
1311       fts5DlidxIterNextR(p, pIter, iLvl+1);
1312       if( pLvl[1].bEof==0 ){
1313         fts5DataRelease(pLvl->pData);
1314         memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1315         pLvl->pData = fts5DataRead(p,
1316             FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1317         );
1318         if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
1319       }
1320     }
1321   }
1322 
1323   return pIter->aLvl[0].bEof;
1324 }
fts5DlidxIterNext(Fts5Index * p,Fts5DlidxIter * pIter)1325 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
1326   return fts5DlidxIterNextR(p, pIter, 0);
1327 }
1328 
1329 /*
1330 ** The iterator passed as the first argument has the following fields set
1331 ** as follows. This function sets up the rest of the iterator so that it
1332 ** points to the first rowid in the doclist-index.
1333 **
1334 **   pData:
1335 **     pointer to doclist-index record,
1336 **
1337 ** When this function is called pIter->iLeafPgno is the page number the
1338 ** doclist is associated with (the one featuring the term).
1339 */
fts5DlidxIterFirst(Fts5DlidxIter * pIter)1340 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
1341   int i;
1342   for(i=0; i<pIter->nLvl; i++){
1343     fts5DlidxLvlNext(&pIter->aLvl[i]);
1344   }
1345   return pIter->aLvl[0].bEof;
1346 }
1347 
1348 
fts5DlidxIterEof(Fts5Index * p,Fts5DlidxIter * pIter)1349 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
1350   return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
1351 }
1352 
fts5DlidxIterLast(Fts5Index * p,Fts5DlidxIter * pIter)1353 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
1354   int i;
1355 
1356   /* Advance each level to the last entry on the last page */
1357   for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
1358     Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
1359     while( fts5DlidxLvlNext(pLvl)==0 );
1360     pLvl->bEof = 0;
1361 
1362     if( i>0 ){
1363       Fts5DlidxLvl *pChild = &pLvl[-1];
1364       fts5DataRelease(pChild->pData);
1365       memset(pChild, 0, sizeof(Fts5DlidxLvl));
1366       pChild->pData = fts5DataRead(p,
1367           FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
1368       );
1369     }
1370   }
1371 }
1372 
1373 /*
1374 ** Move the iterator passed as the only argument to the previous entry.
1375 */
fts5DlidxLvlPrev(Fts5DlidxLvl * pLvl)1376 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
1377   int iOff = pLvl->iOff;
1378 
1379   assert( pLvl->bEof==0 );
1380   if( iOff<=pLvl->iFirstOff ){
1381     pLvl->bEof = 1;
1382   }else{
1383     u8 *a = pLvl->pData->p;
1384     i64 iVal;
1385     int iLimit;
1386     int ii;
1387     int nZero = 0;
1388 
1389     /* Currently iOff points to the first byte of a varint. This block
1390     ** decrements iOff until it points to the first byte of the previous
1391     ** varint. Taking care not to read any memory locations that occur
1392     ** before the buffer in memory.  */
1393     iLimit = (iOff>9 ? iOff-9 : 0);
1394     for(iOff--; iOff>iLimit; iOff--){
1395       if( (a[iOff-1] & 0x80)==0 ) break;
1396     }
1397 
1398     fts5GetVarint(&a[iOff], (u64*)&iVal);
1399     pLvl->iRowid -= iVal;
1400     pLvl->iLeafPgno--;
1401 
1402     /* Skip backwards past any 0x00 varints. */
1403     for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
1404       nZero++;
1405     }
1406     if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
1407       /* The byte immediately before the last 0x00 byte has the 0x80 bit
1408       ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
1409       ** bytes before a[ii]. */
1410       int bZero = 0;              /* True if last 0x00 counts */
1411       if( (ii-8)>=pLvl->iFirstOff ){
1412         int j;
1413         for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
1414         bZero = (j>8);
1415       }
1416       if( bZero==0 ) nZero--;
1417     }
1418     pLvl->iLeafPgno -= nZero;
1419     pLvl->iOff = iOff - nZero;
1420   }
1421 
1422   return pLvl->bEof;
1423 }
1424 
fts5DlidxIterPrevR(Fts5Index * p,Fts5DlidxIter * pIter,int iLvl)1425 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1426   Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1427 
1428   assert( iLvl<pIter->nLvl );
1429   if( fts5DlidxLvlPrev(pLvl) ){
1430     if( (iLvl+1) < pIter->nLvl ){
1431       fts5DlidxIterPrevR(p, pIter, iLvl+1);
1432       if( pLvl[1].bEof==0 ){
1433         fts5DataRelease(pLvl->pData);
1434         memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1435         pLvl->pData = fts5DataRead(p,
1436             FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1437         );
1438         if( pLvl->pData ){
1439           while( fts5DlidxLvlNext(pLvl)==0 );
1440           pLvl->bEof = 0;
1441         }
1442       }
1443     }
1444   }
1445 
1446   return pIter->aLvl[0].bEof;
1447 }
fts5DlidxIterPrev(Fts5Index * p,Fts5DlidxIter * pIter)1448 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
1449   return fts5DlidxIterPrevR(p, pIter, 0);
1450 }
1451 
1452 /*
1453 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1454 */
fts5DlidxIterFree(Fts5DlidxIter * pIter)1455 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
1456   if( pIter ){
1457     int i;
1458     for(i=0; i<pIter->nLvl; i++){
1459       fts5DataRelease(pIter->aLvl[i].pData);
1460     }
1461     sqlite3_free(pIter);
1462   }
1463 }
1464 
fts5DlidxIterInit(Fts5Index * p,int bRev,int iSegid,int iLeafPg)1465 static Fts5DlidxIter *fts5DlidxIterInit(
1466   Fts5Index *p,                   /* Fts5 Backend to iterate within */
1467   int bRev,                       /* True for ORDER BY ASC */
1468   int iSegid,                     /* Segment id */
1469   int iLeafPg                     /* Leaf page number to load dlidx for */
1470 ){
1471   Fts5DlidxIter *pIter = 0;
1472   int i;
1473   int bDone = 0;
1474 
1475   for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
1476     int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
1477     Fts5DlidxIter *pNew;
1478 
1479     pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
1480     if( pNew==0 ){
1481       p->rc = SQLITE_NOMEM;
1482     }else{
1483       i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
1484       Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
1485       pIter = pNew;
1486       memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1487       pLvl->pData = fts5DataRead(p, iRowid);
1488       if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
1489         bDone = 1;
1490       }
1491       pIter->nLvl = i+1;
1492     }
1493   }
1494 
1495   if( p->rc==SQLITE_OK ){
1496     pIter->iSegid = iSegid;
1497     if( bRev==0 ){
1498       fts5DlidxIterFirst(pIter);
1499     }else{
1500       fts5DlidxIterLast(p, pIter);
1501     }
1502   }
1503 
1504   if( p->rc!=SQLITE_OK ){
1505     fts5DlidxIterFree(pIter);
1506     pIter = 0;
1507   }
1508 
1509   return pIter;
1510 }
1511 
fts5DlidxIterRowid(Fts5DlidxIter * pIter)1512 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
1513   return pIter->aLvl[0].iRowid;
1514 }
fts5DlidxIterPgno(Fts5DlidxIter * pIter)1515 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
1516   return pIter->aLvl[0].iLeafPgno;
1517 }
1518 
1519 /*
1520 ** Load the next leaf page into the segment iterator.
1521 */
fts5SegIterNextPage(Fts5Index * p,Fts5SegIter * pIter)1522 static void fts5SegIterNextPage(
1523   Fts5Index *p,                   /* FTS5 backend object */
1524   Fts5SegIter *pIter              /* Iterator to advance to next page */
1525 ){
1526   Fts5Data *pLeaf;
1527   Fts5StructureSegment *pSeg = pIter->pSeg;
1528   fts5DataRelease(pIter->pLeaf);
1529   pIter->iLeafPgno++;
1530   if( pIter->pNextLeaf ){
1531     pIter->pLeaf = pIter->pNextLeaf;
1532     pIter->pNextLeaf = 0;
1533   }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1534     pIter->pLeaf = fts5LeafRead(p,
1535         FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1536     );
1537   }else{
1538     pIter->pLeaf = 0;
1539   }
1540   pLeaf = pIter->pLeaf;
1541 
1542   if( pLeaf ){
1543     pIter->iPgidxOff = pLeaf->szLeaf;
1544     if( fts5LeafIsTermless(pLeaf) ){
1545       pIter->iEndofDoclist = pLeaf->nn+1;
1546     }else{
1547       pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
1548           pIter->iEndofDoclist
1549       );
1550     }
1551   }
1552 }
1553 
1554 /*
1555 ** Argument p points to a buffer containing a varint to be interpreted as a
1556 ** position list size field. Read the varint and return the number of bytes
1557 ** read. Before returning, set *pnSz to the number of bytes in the position
1558 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1559 */
fts5GetPoslistSize(const u8 * p,int * pnSz,int * pbDel)1560 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
1561   int nSz;
1562   int n = 0;
1563   fts5FastGetVarint32(p, n, nSz);
1564   assert_nc( nSz>=0 );
1565   *pnSz = nSz/2;
1566   *pbDel = nSz & 0x0001;
1567   return n;
1568 }
1569 
1570 /*
1571 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1572 ** position-list size field. Read the value of the field and store it
1573 ** in the following variables:
1574 **
1575 **   Fts5SegIter.nPos
1576 **   Fts5SegIter.bDel
1577 **
1578 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1579 ** position list content (if any).
1580 */
fts5SegIterLoadNPos(Fts5Index * p,Fts5SegIter * pIter)1581 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1582   if( p->rc==SQLITE_OK ){
1583     int iOff = pIter->iLeafOffset;  /* Offset to read at */
1584     ASSERT_SZLEAF_OK(pIter->pLeaf);
1585     if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1586       int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
1587       pIter->bDel = 0;
1588       pIter->nPos = 1;
1589       if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1590         pIter->bDel = 1;
1591         iOff++;
1592         if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1593           pIter->nPos = 1;
1594           iOff++;
1595         }else{
1596           pIter->nPos = 0;
1597         }
1598       }
1599     }else{
1600       int nSz;
1601       fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1602       pIter->bDel = (nSz & 0x0001);
1603       pIter->nPos = nSz>>1;
1604       assert_nc( pIter->nPos>=0 );
1605     }
1606     pIter->iLeafOffset = iOff;
1607   }
1608 }
1609 
fts5SegIterLoadRowid(Fts5Index * p,Fts5SegIter * pIter)1610 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1611   u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
1612   int iOff = pIter->iLeafOffset;
1613 
1614   ASSERT_SZLEAF_OK(pIter->pLeaf);
1615   if( iOff>=pIter->pLeaf->szLeaf ){
1616     fts5SegIterNextPage(p, pIter);
1617     if( pIter->pLeaf==0 ){
1618       if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
1619       return;
1620     }
1621     iOff = 4;
1622     a = pIter->pLeaf->p;
1623   }
1624   iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
1625   pIter->iLeafOffset = iOff;
1626 }
1627 
1628 /*
1629 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1630 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1631 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1632 ** the first term in the segment).
1633 **
1634 ** This function populates:
1635 **
1636 **   Fts5SegIter.term
1637 **   Fts5SegIter.rowid
1638 **
1639 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1640 ** the first position list. The position list belonging to document
1641 ** (Fts5SegIter.iRowid).
1642 */
fts5SegIterLoadTerm(Fts5Index * p,Fts5SegIter * pIter,int nKeep)1643 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1644   u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
1645   int iOff = pIter->iLeafOffset;  /* Offset to read at */
1646   int nNew;                       /* Bytes of new data */
1647 
1648   iOff += fts5GetVarint32(&a[iOff], nNew);
1649   if( iOff+nNew>pIter->pLeaf->nn ){
1650     p->rc = FTS5_CORRUPT;
1651     return;
1652   }
1653   pIter->term.n = nKeep;
1654   fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1655   iOff += nNew;
1656   pIter->iTermLeafOffset = iOff;
1657   pIter->iTermLeafPgno = pIter->iLeafPgno;
1658   pIter->iLeafOffset = iOff;
1659 
1660   if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1661     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1662   }else{
1663     int nExtra;
1664     pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1665     pIter->iEndofDoclist += nExtra;
1666   }
1667 
1668   fts5SegIterLoadRowid(p, pIter);
1669 }
1670 
1671 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
1672 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
1673 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
1674 
fts5SegIterSetNext(Fts5Index * p,Fts5SegIter * pIter)1675 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
1676   if( pIter->flags & FTS5_SEGITER_REVERSE ){
1677     pIter->xNext = fts5SegIterNext_Reverse;
1678   }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1679     pIter->xNext = fts5SegIterNext_None;
1680   }else{
1681     pIter->xNext = fts5SegIterNext;
1682   }
1683 }
1684 
1685 /*
1686 ** Initialize the iterator object pIter to iterate through the entries in
1687 ** segment pSeg. The iterator is left pointing to the first entry when
1688 ** this function returns.
1689 **
1690 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1691 ** an error has already occurred when this function is called, it is a no-op.
1692 */
fts5SegIterInit(Fts5Index * p,Fts5StructureSegment * pSeg,Fts5SegIter * pIter)1693 static void fts5SegIterInit(
1694   Fts5Index *p,                   /* FTS index object */
1695   Fts5StructureSegment *pSeg,     /* Description of segment */
1696   Fts5SegIter *pIter              /* Object to populate */
1697 ){
1698   if( pSeg->pgnoFirst==0 ){
1699     /* This happens if the segment is being used as an input to an incremental
1700     ** merge and all data has already been "trimmed". See function
1701     ** fts5TrimSegments() for details. In this case leave the iterator empty.
1702     ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1703     ** at EOF already. */
1704     assert( pIter->pLeaf==0 );
1705     return;
1706   }
1707 
1708   if( p->rc==SQLITE_OK ){
1709     memset(pIter, 0, sizeof(*pIter));
1710     fts5SegIterSetNext(p, pIter);
1711     pIter->pSeg = pSeg;
1712     pIter->iLeafPgno = pSeg->pgnoFirst-1;
1713     fts5SegIterNextPage(p, pIter);
1714   }
1715 
1716   if( p->rc==SQLITE_OK ){
1717     pIter->iLeafOffset = 4;
1718     assert_nc( pIter->pLeaf->nn>4 );
1719     assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1720     pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
1721     fts5SegIterLoadTerm(p, pIter, 0);
1722     fts5SegIterLoadNPos(p, pIter);
1723   }
1724 }
1725 
1726 /*
1727 ** This function is only ever called on iterators created by calls to
1728 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1729 **
1730 ** The iterator is in an unusual state when this function is called: the
1731 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
1732 ** the position-list size field for the first relevant rowid on the page.
1733 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1734 **
1735 ** This function advances the iterator so that it points to the last
1736 ** relevant rowid on the page and, if necessary, initializes the
1737 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1738 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1739 ** byte of the position list content associated with said rowid.
1740 */
fts5SegIterReverseInitPage(Fts5Index * p,Fts5SegIter * pIter)1741 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
1742   int eDetail = p->pConfig->eDetail;
1743   int n = pIter->pLeaf->szLeaf;
1744   int i = pIter->iLeafOffset;
1745   u8 *a = pIter->pLeaf->p;
1746   int iRowidOffset = 0;
1747 
1748   if( n>pIter->iEndofDoclist ){
1749     n = pIter->iEndofDoclist;
1750   }
1751 
1752   ASSERT_SZLEAF_OK(pIter->pLeaf);
1753   while( 1 ){
1754     i64 iDelta = 0;
1755 
1756     if( eDetail==FTS5_DETAIL_NONE ){
1757       /* todo */
1758       if( i<n && a[i]==0 ){
1759         i++;
1760         if( i<n && a[i]==0 ) i++;
1761       }
1762     }else{
1763       int nPos;
1764       int bDummy;
1765       i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
1766       i += nPos;
1767     }
1768     if( i>=n ) break;
1769     i += fts5GetVarint(&a[i], (u64*)&iDelta);
1770     pIter->iRowid += iDelta;
1771 
1772     /* If necessary, grow the pIter->aRowidOffset[] array. */
1773     if( iRowidOffset>=pIter->nRowidOffset ){
1774       int nNew = pIter->nRowidOffset + 8;
1775       int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
1776       if( aNew==0 ){
1777         p->rc = SQLITE_NOMEM;
1778         break;
1779       }
1780       pIter->aRowidOffset = aNew;
1781       pIter->nRowidOffset = nNew;
1782     }
1783 
1784     pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
1785     pIter->iLeafOffset = i;
1786   }
1787   pIter->iRowidOffset = iRowidOffset;
1788   fts5SegIterLoadNPos(p, pIter);
1789 }
1790 
1791 /*
1792 **
1793 */
fts5SegIterReverseNewPage(Fts5Index * p,Fts5SegIter * pIter)1794 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
1795   assert( pIter->flags & FTS5_SEGITER_REVERSE );
1796   assert( pIter->flags & FTS5_SEGITER_ONETERM );
1797 
1798   fts5DataRelease(pIter->pLeaf);
1799   pIter->pLeaf = 0;
1800   while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
1801     Fts5Data *pNew;
1802     pIter->iLeafPgno--;
1803     pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
1804           pIter->pSeg->iSegid, pIter->iLeafPgno
1805     ));
1806     if( pNew ){
1807       /* iTermLeafOffset may be equal to szLeaf if the term is the last
1808       ** thing on the page - i.e. the first rowid is on the following page.
1809       ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
1810       if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
1811         assert( pIter->pLeaf==0 );
1812         if( pIter->iTermLeafOffset<pNew->szLeaf ){
1813           pIter->pLeaf = pNew;
1814           pIter->iLeafOffset = pIter->iTermLeafOffset;
1815         }
1816       }else{
1817         int iRowidOff;
1818         iRowidOff = fts5LeafFirstRowidOff(pNew);
1819         if( iRowidOff ){
1820           pIter->pLeaf = pNew;
1821           pIter->iLeafOffset = iRowidOff;
1822         }
1823       }
1824 
1825       if( pIter->pLeaf ){
1826         u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
1827         pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
1828         break;
1829       }else{
1830         fts5DataRelease(pNew);
1831       }
1832     }
1833   }
1834 
1835   if( pIter->pLeaf ){
1836     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1837     fts5SegIterReverseInitPage(p, pIter);
1838   }
1839 }
1840 
1841 /*
1842 ** Return true if the iterator passed as the second argument currently
1843 ** points to a delete marker. A delete marker is an entry with a 0 byte
1844 ** position-list.
1845 */
fts5MultiIterIsEmpty(Fts5Index * p,Fts5Iter * pIter)1846 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
1847   Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
1848   return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
1849 }
1850 
1851 /*
1852 ** Advance iterator pIter to the next entry.
1853 **
1854 ** This version of fts5SegIterNext() is only used by reverse iterators.
1855 */
fts5SegIterNext_Reverse(Fts5Index * p,Fts5SegIter * pIter,int * pbUnused)1856 static void fts5SegIterNext_Reverse(
1857   Fts5Index *p,                   /* FTS5 backend object */
1858   Fts5SegIter *pIter,             /* Iterator to advance */
1859   int *pbUnused                   /* Unused */
1860 ){
1861   assert( pIter->flags & FTS5_SEGITER_REVERSE );
1862   assert( pIter->pNextLeaf==0 );
1863   UNUSED_PARAM(pbUnused);
1864 
1865   if( pIter->iRowidOffset>0 ){
1866     u8 *a = pIter->pLeaf->p;
1867     int iOff;
1868     i64 iDelta;
1869 
1870     pIter->iRowidOffset--;
1871     pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
1872     fts5SegIterLoadNPos(p, pIter);
1873     iOff = pIter->iLeafOffset;
1874     if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
1875       iOff += pIter->nPos;
1876     }
1877     fts5GetVarint(&a[iOff], (u64*)&iDelta);
1878     pIter->iRowid -= iDelta;
1879   }else{
1880     fts5SegIterReverseNewPage(p, pIter);
1881   }
1882 }
1883 
1884 /*
1885 ** Advance iterator pIter to the next entry.
1886 **
1887 ** This version of fts5SegIterNext() is only used if detail=none and the
1888 ** iterator is not a reverse direction iterator.
1889 */
fts5SegIterNext_None(Fts5Index * p,Fts5SegIter * pIter,int * pbNewTerm)1890 static void fts5SegIterNext_None(
1891   Fts5Index *p,                   /* FTS5 backend object */
1892   Fts5SegIter *pIter,             /* Iterator to advance */
1893   int *pbNewTerm                  /* OUT: Set for new term */
1894 ){
1895   int iOff;
1896 
1897   assert( p->rc==SQLITE_OK );
1898   assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
1899   assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
1900 
1901   ASSERT_SZLEAF_OK(pIter->pLeaf);
1902   iOff = pIter->iLeafOffset;
1903 
1904   /* Next entry is on the next page */
1905   if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
1906     fts5SegIterNextPage(p, pIter);
1907     if( p->rc || pIter->pLeaf==0 ) return;
1908     pIter->iRowid = 0;
1909     iOff = 4;
1910   }
1911 
1912   if( iOff<pIter->iEndofDoclist ){
1913     /* Next entry is on the current page */
1914     i64 iDelta;
1915     iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
1916     pIter->iLeafOffset = iOff;
1917     pIter->iRowid += iDelta;
1918   }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
1919     if( pIter->pSeg ){
1920       int nKeep = 0;
1921       if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
1922         iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
1923       }
1924       pIter->iLeafOffset = iOff;
1925       fts5SegIterLoadTerm(p, pIter, nKeep);
1926     }else{
1927       const u8 *pList = 0;
1928       const char *zTerm = 0;
1929       int nList;
1930       sqlite3Fts5HashScanNext(p->pHash);
1931       sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
1932       if( pList==0 ) goto next_none_eof;
1933       pIter->pLeaf->p = (u8*)pList;
1934       pIter->pLeaf->nn = nList;
1935       pIter->pLeaf->szLeaf = nList;
1936       pIter->iEndofDoclist = nList;
1937       sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
1938       pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
1939     }
1940 
1941     if( pbNewTerm ) *pbNewTerm = 1;
1942   }else{
1943     goto next_none_eof;
1944   }
1945 
1946   fts5SegIterLoadNPos(p, pIter);
1947 
1948   return;
1949  next_none_eof:
1950   fts5DataRelease(pIter->pLeaf);
1951   pIter->pLeaf = 0;
1952 }
1953 
1954 
1955 /*
1956 ** Advance iterator pIter to the next entry.
1957 **
1958 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
1959 ** is not considered an error if the iterator reaches EOF. If an error has
1960 ** already occurred when this function is called, it is a no-op.
1961 */
fts5SegIterNext(Fts5Index * p,Fts5SegIter * pIter,int * pbNewTerm)1962 static void fts5SegIterNext(
1963   Fts5Index *p,                   /* FTS5 backend object */
1964   Fts5SegIter *pIter,             /* Iterator to advance */
1965   int *pbNewTerm                  /* OUT: Set for new term */
1966 ){
1967   Fts5Data *pLeaf = pIter->pLeaf;
1968   int iOff;
1969   int bNewTerm = 0;
1970   int nKeep = 0;
1971   u8 *a;
1972   int n;
1973 
1974   assert( pbNewTerm==0 || *pbNewTerm==0 );
1975   assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
1976 
1977   /* Search for the end of the position list within the current page. */
1978   a = pLeaf->p;
1979   n = pLeaf->szLeaf;
1980 
1981   ASSERT_SZLEAF_OK(pLeaf);
1982   iOff = pIter->iLeafOffset + pIter->nPos;
1983 
1984   if( iOff<n ){
1985     /* The next entry is on the current page. */
1986     assert_nc( iOff<=pIter->iEndofDoclist );
1987     if( iOff>=pIter->iEndofDoclist ){
1988       bNewTerm = 1;
1989       if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
1990         iOff += fts5GetVarint32(&a[iOff], nKeep);
1991       }
1992     }else{
1993       u64 iDelta;
1994       iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
1995       pIter->iRowid += iDelta;
1996       assert_nc( iDelta>0 );
1997     }
1998     pIter->iLeafOffset = iOff;
1999 
2000   }else if( pIter->pSeg==0 ){
2001     const u8 *pList = 0;
2002     const char *zTerm = 0;
2003     int nList = 0;
2004     assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
2005     if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
2006       sqlite3Fts5HashScanNext(p->pHash);
2007       sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2008     }
2009     if( pList==0 ){
2010       fts5DataRelease(pIter->pLeaf);
2011       pIter->pLeaf = 0;
2012     }else{
2013       pIter->pLeaf->p = (u8*)pList;
2014       pIter->pLeaf->nn = nList;
2015       pIter->pLeaf->szLeaf = nList;
2016       pIter->iEndofDoclist = nList+1;
2017       sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
2018           (u8*)zTerm);
2019       pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2020       *pbNewTerm = 1;
2021     }
2022   }else{
2023     iOff = 0;
2024     /* Next entry is not on the current page */
2025     while( iOff==0 ){
2026       fts5SegIterNextPage(p, pIter);
2027       pLeaf = pIter->pLeaf;
2028       if( pLeaf==0 ) break;
2029       ASSERT_SZLEAF_OK(pLeaf);
2030       if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
2031         iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
2032         pIter->iLeafOffset = iOff;
2033 
2034         if( pLeaf->nn>pLeaf->szLeaf ){
2035           pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2036               &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
2037           );
2038         }
2039       }
2040       else if( pLeaf->nn>pLeaf->szLeaf ){
2041         pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2042             &pLeaf->p[pLeaf->szLeaf], iOff
2043         );
2044         pIter->iLeafOffset = iOff;
2045         pIter->iEndofDoclist = iOff;
2046         bNewTerm = 1;
2047       }
2048       assert_nc( iOff<pLeaf->szLeaf );
2049       if( iOff>pLeaf->szLeaf ){
2050         p->rc = FTS5_CORRUPT;
2051         return;
2052       }
2053     }
2054   }
2055 
2056   /* Check if the iterator is now at EOF. If so, return early. */
2057   if( pIter->pLeaf ){
2058     if( bNewTerm ){
2059       if( pIter->flags & FTS5_SEGITER_ONETERM ){
2060         fts5DataRelease(pIter->pLeaf);
2061         pIter->pLeaf = 0;
2062       }else{
2063         fts5SegIterLoadTerm(p, pIter, nKeep);
2064         fts5SegIterLoadNPos(p, pIter);
2065         if( pbNewTerm ) *pbNewTerm = 1;
2066       }
2067     }else{
2068       /* The following could be done by calling fts5SegIterLoadNPos(). But
2069       ** this block is particularly performance critical, so equivalent
2070       ** code is inlined.
2071       **
2072       ** Later: Switched back to fts5SegIterLoadNPos() because it supports
2073       ** detail=none mode. Not ideal.
2074       */
2075       int nSz;
2076       assert( p->rc==SQLITE_OK );
2077       assert( pIter->iLeafOffset<=pIter->pLeaf->nn );
2078       fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
2079       pIter->bDel = (nSz & 0x0001);
2080       pIter->nPos = nSz>>1;
2081       assert_nc( pIter->nPos>=0 );
2082     }
2083   }
2084 }
2085 
2086 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2087 
2088 #define fts5IndexSkipVarint(a, iOff) {            \
2089   int iEnd = iOff+9;                              \
2090   while( (a[iOff++] & 0x80) && iOff<iEnd );       \
2091 }
2092 
2093 /*
2094 ** Iterator pIter currently points to the first rowid in a doclist. This
2095 ** function sets the iterator up so that iterates in reverse order through
2096 ** the doclist.
2097 */
fts5SegIterReverse(Fts5Index * p,Fts5SegIter * pIter)2098 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
2099   Fts5DlidxIter *pDlidx = pIter->pDlidx;
2100   Fts5Data *pLast = 0;
2101   int pgnoLast = 0;
2102 
2103   if( pDlidx ){
2104     int iSegid = pIter->pSeg->iSegid;
2105     pgnoLast = fts5DlidxIterPgno(pDlidx);
2106     pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
2107   }else{
2108     Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */
2109 
2110     /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2111     ** position-list content for the current rowid. Back it up so that it
2112     ** points to the start of the position-list size field. */
2113     int iPoslist;
2114     if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
2115       iPoslist = pIter->iTermLeafOffset;
2116     }else{
2117       iPoslist = 4;
2118     }
2119     fts5IndexSkipVarint(pLeaf->p, iPoslist);
2120     pIter->iLeafOffset = iPoslist;
2121 
2122     /* If this condition is true then the largest rowid for the current
2123     ** term may not be stored on the current page. So search forward to
2124     ** see where said rowid really is.  */
2125     if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
2126       int pgno;
2127       Fts5StructureSegment *pSeg = pIter->pSeg;
2128 
2129       /* The last rowid in the doclist may not be on the current page. Search
2130       ** forward to find the page containing the last rowid.  */
2131       for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
2132         i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
2133         Fts5Data *pNew = fts5DataRead(p, iAbs);
2134         if( pNew ){
2135           int iRowid, bTermless;
2136           iRowid = fts5LeafFirstRowidOff(pNew);
2137           bTermless = fts5LeafIsTermless(pNew);
2138           if( iRowid ){
2139             SWAPVAL(Fts5Data*, pNew, pLast);
2140             pgnoLast = pgno;
2141           }
2142           fts5DataRelease(pNew);
2143           if( bTermless==0 ) break;
2144         }
2145       }
2146     }
2147   }
2148 
2149   /* If pLast is NULL at this point, then the last rowid for this doclist
2150   ** lies on the page currently indicated by the iterator. In this case
2151   ** pIter->iLeafOffset is already set to point to the position-list size
2152   ** field associated with the first relevant rowid on the page.
2153   **
2154   ** Or, if pLast is non-NULL, then it is the page that contains the last
2155   ** rowid. In this case configure the iterator so that it points to the
2156   ** first rowid on this page.
2157   */
2158   if( pLast ){
2159     int iOff;
2160     fts5DataRelease(pIter->pLeaf);
2161     pIter->pLeaf = pLast;
2162     pIter->iLeafPgno = pgnoLast;
2163     iOff = fts5LeafFirstRowidOff(pLast);
2164     iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
2165     pIter->iLeafOffset = iOff;
2166 
2167     if( fts5LeafIsTermless(pLast) ){
2168       pIter->iEndofDoclist = pLast->nn+1;
2169     }else{
2170       pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
2171     }
2172 
2173   }
2174 
2175   fts5SegIterReverseInitPage(p, pIter);
2176 }
2177 
2178 /*
2179 ** Iterator pIter currently points to the first rowid of a doclist.
2180 ** There is a doclist-index associated with the final term on the current
2181 ** page. If the current term is the last term on the page, load the
2182 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2183 */
fts5SegIterLoadDlidx(Fts5Index * p,Fts5SegIter * pIter)2184 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
2185   int iSeg = pIter->pSeg->iSegid;
2186   int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2187   Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2188 
2189   assert( pIter->flags & FTS5_SEGITER_ONETERM );
2190   assert( pIter->pDlidx==0 );
2191 
2192   /* Check if the current doclist ends on this page. If it does, return
2193   ** early without loading the doclist-index (as it belongs to a different
2194   ** term. */
2195   if( pIter->iTermLeafPgno==pIter->iLeafPgno
2196    && pIter->iEndofDoclist<pLeaf->szLeaf
2197   ){
2198     return;
2199   }
2200 
2201   pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
2202 }
2203 
2204 /*
2205 ** The iterator object passed as the second argument currently contains
2206 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2207 ** function searches the leaf page for a term matching (pTerm/nTerm).
2208 **
2209 ** If the specified term is found on the page, then the iterator is left
2210 ** pointing to it. If argument bGe is zero and the term is not found,
2211 ** the iterator is left pointing at EOF.
2212 **
2213 ** If bGe is non-zero and the specified term is not found, then the
2214 ** iterator is left pointing to the smallest term in the segment that
2215 ** is larger than the specified term, even if this term is not on the
2216 ** current page.
2217 */
fts5LeafSeek(Fts5Index * p,int bGe,Fts5SegIter * pIter,const u8 * pTerm,int nTerm)2218 static void fts5LeafSeek(
2219   Fts5Index *p,                   /* Leave any error code here */
2220   int bGe,                        /* True for a >= search */
2221   Fts5SegIter *pIter,             /* Iterator to seek */
2222   const u8 *pTerm, int nTerm      /* Term to search for */
2223 ){
2224   int iOff;
2225   const u8 *a = pIter->pLeaf->p;
2226   int szLeaf = pIter->pLeaf->szLeaf;
2227   int n = pIter->pLeaf->nn;
2228 
2229   int nMatch = 0;
2230   int nKeep = 0;
2231   int nNew = 0;
2232   int iTermOff;
2233   int iPgidx;                     /* Current offset in pgidx */
2234   int bEndOfPage = 0;
2235 
2236   assert( p->rc==SQLITE_OK );
2237 
2238   iPgidx = szLeaf;
2239   iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2240   iOff = iTermOff;
2241   if( iOff>n ){
2242     p->rc = FTS5_CORRUPT;
2243     return;
2244   }
2245 
2246   while( 1 ){
2247 
2248     /* Figure out how many new bytes are in this term */
2249     fts5FastGetVarint32(a, iOff, nNew);
2250     if( nKeep<nMatch ){
2251       goto search_failed;
2252     }
2253 
2254     assert( nKeep>=nMatch );
2255     if( nKeep==nMatch ){
2256       int nCmp;
2257       int i;
2258       nCmp = MIN(nNew, nTerm-nMatch);
2259       for(i=0; i<nCmp; i++){
2260         if( a[iOff+i]!=pTerm[nMatch+i] ) break;
2261       }
2262       nMatch += i;
2263 
2264       if( nTerm==nMatch ){
2265         if( i==nNew ){
2266           goto search_success;
2267         }else{
2268           goto search_failed;
2269         }
2270       }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
2271         goto search_failed;
2272       }
2273     }
2274 
2275     if( iPgidx>=n ){
2276       bEndOfPage = 1;
2277       break;
2278     }
2279 
2280     iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2281     iTermOff += nKeep;
2282     iOff = iTermOff;
2283 
2284     if( iOff>=n ){
2285       p->rc = FTS5_CORRUPT;
2286       return;
2287     }
2288 
2289     /* Read the nKeep field of the next term. */
2290     fts5FastGetVarint32(a, iOff, nKeep);
2291   }
2292 
2293  search_failed:
2294   if( bGe==0 ){
2295     fts5DataRelease(pIter->pLeaf);
2296     pIter->pLeaf = 0;
2297     return;
2298   }else if( bEndOfPage ){
2299     do {
2300       fts5SegIterNextPage(p, pIter);
2301       if( pIter->pLeaf==0 ) return;
2302       a = pIter->pLeaf->p;
2303       if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
2304         iPgidx = pIter->pLeaf->szLeaf;
2305         iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
2306         if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
2307           p->rc = FTS5_CORRUPT;
2308         }else{
2309           nKeep = 0;
2310           iTermOff = iOff;
2311           n = pIter->pLeaf->nn;
2312           iOff += fts5GetVarint32(&a[iOff], nNew);
2313           break;
2314         }
2315       }
2316     }while( 1 );
2317   }
2318 
2319  search_success:
2320 
2321   pIter->iLeafOffset = iOff + nNew;
2322   pIter->iTermLeafOffset = pIter->iLeafOffset;
2323   pIter->iTermLeafPgno = pIter->iLeafPgno;
2324 
2325   fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
2326   fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
2327 
2328   if( iPgidx>=n ){
2329     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2330   }else{
2331     int nExtra;
2332     iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2333     pIter->iEndofDoclist = iTermOff + nExtra;
2334   }
2335   pIter->iPgidxOff = iPgidx;
2336 
2337   fts5SegIterLoadRowid(p, pIter);
2338   fts5SegIterLoadNPos(p, pIter);
2339 }
2340 
fts5IdxSelectStmt(Fts5Index * p)2341 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
2342   if( p->pIdxSelect==0 ){
2343     Fts5Config *pConfig = p->pConfig;
2344     fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2345           "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2346           "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2347           pConfig->zDb, pConfig->zName
2348     ));
2349   }
2350   return p->pIdxSelect;
2351 }
2352 
2353 /*
2354 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2355 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2356 **
2357 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2358 ** an error has already occurred when this function is called, it is a no-op.
2359 */
fts5SegIterSeekInit(Fts5Index * p,const u8 * pTerm,int nTerm,int flags,Fts5StructureSegment * pSeg,Fts5SegIter * pIter)2360 static void fts5SegIterSeekInit(
2361   Fts5Index *p,                   /* FTS5 backend */
2362   const u8 *pTerm, int nTerm,     /* Term to seek to */
2363   int flags,                      /* Mask of FTS5INDEX_XXX flags */
2364   Fts5StructureSegment *pSeg,     /* Description of segment */
2365   Fts5SegIter *pIter              /* Object to populate */
2366 ){
2367   int iPg = 1;
2368   int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2369   int bDlidx = 0;                 /* True if there is a doclist-index */
2370   sqlite3_stmt *pIdxSelect = 0;
2371 
2372   assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2373   assert( pTerm && nTerm );
2374   memset(pIter, 0, sizeof(*pIter));
2375   pIter->pSeg = pSeg;
2376 
2377   /* This block sets stack variable iPg to the leaf page number that may
2378   ** contain term (pTerm/nTerm), if it is present in the segment. */
2379   pIdxSelect = fts5IdxSelectStmt(p);
2380   if( p->rc ) return;
2381   sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
2382   sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2383   if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
2384     i64 val = sqlite3_column_int(pIdxSelect, 0);
2385     iPg = (int)(val>>1);
2386     bDlidx = (val & 0x0001);
2387   }
2388   p->rc = sqlite3_reset(pIdxSelect);
2389 
2390   if( iPg<pSeg->pgnoFirst ){
2391     iPg = pSeg->pgnoFirst;
2392     bDlidx = 0;
2393   }
2394 
2395   pIter->iLeafPgno = iPg - 1;
2396   fts5SegIterNextPage(p, pIter);
2397 
2398   if( pIter->pLeaf ){
2399     fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2400   }
2401 
2402   if( p->rc==SQLITE_OK && bGe==0 ){
2403     pIter->flags |= FTS5_SEGITER_ONETERM;
2404     if( pIter->pLeaf ){
2405       if( flags & FTS5INDEX_QUERY_DESC ){
2406         pIter->flags |= FTS5_SEGITER_REVERSE;
2407       }
2408       if( bDlidx ){
2409         fts5SegIterLoadDlidx(p, pIter);
2410       }
2411       if( flags & FTS5INDEX_QUERY_DESC ){
2412         fts5SegIterReverse(p, pIter);
2413       }
2414     }
2415   }
2416 
2417   fts5SegIterSetNext(p, pIter);
2418 
2419   /* Either:
2420   **
2421   **   1) an error has occurred, or
2422   **   2) the iterator points to EOF, or
2423   **   3) the iterator points to an entry with term (pTerm/nTerm), or
2424   **   4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2425   **      to an entry with a term greater than or equal to (pTerm/nTerm).
2426   */
2427   assert( p->rc!=SQLITE_OK                                          /* 1 */
2428    || pIter->pLeaf==0                                               /* 2 */
2429    || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0          /* 3 */
2430    || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0)  /* 4 */
2431   );
2432 }
2433 
2434 /*
2435 ** Initialize the object pIter to point to term pTerm/nTerm within the
2436 ** in-memory hash table. If there is no such term in the hash-table, the
2437 ** iterator is set to EOF.
2438 **
2439 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2440 ** an error has already occurred when this function is called, it is a no-op.
2441 */
fts5SegIterHashInit(Fts5Index * p,const u8 * pTerm,int nTerm,int flags,Fts5SegIter * pIter)2442 static void fts5SegIterHashInit(
2443   Fts5Index *p,                   /* FTS5 backend */
2444   const u8 *pTerm, int nTerm,     /* Term to seek to */
2445   int flags,                      /* Mask of FTS5INDEX_XXX flags */
2446   Fts5SegIter *pIter              /* Object to populate */
2447 ){
2448   const u8 *pList = 0;
2449   int nList = 0;
2450   const u8 *z = 0;
2451   int n = 0;
2452 
2453   assert( p->pHash );
2454   assert( p->rc==SQLITE_OK );
2455 
2456   if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
2457     p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
2458     sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
2459     n = (z ? (int)strlen((const char*)z) : 0);
2460   }else{
2461     pIter->flags |= FTS5_SEGITER_ONETERM;
2462     sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList);
2463     z = pTerm;
2464     n = nTerm;
2465   }
2466 
2467   if( pList ){
2468     Fts5Data *pLeaf;
2469     sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2470     pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2471     if( pLeaf==0 ) return;
2472     pLeaf->p = (u8*)pList;
2473     pLeaf->nn = pLeaf->szLeaf = nList;
2474     pIter->pLeaf = pLeaf;
2475     pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2476     pIter->iEndofDoclist = pLeaf->nn;
2477 
2478     if( flags & FTS5INDEX_QUERY_DESC ){
2479       pIter->flags |= FTS5_SEGITER_REVERSE;
2480       fts5SegIterReverseInitPage(p, pIter);
2481     }else{
2482       fts5SegIterLoadNPos(p, pIter);
2483     }
2484   }
2485 
2486   fts5SegIterSetNext(p, pIter);
2487 }
2488 
2489 /*
2490 ** Zero the iterator passed as the only argument.
2491 */
fts5SegIterClear(Fts5SegIter * pIter)2492 static void fts5SegIterClear(Fts5SegIter *pIter){
2493   fts5BufferFree(&pIter->term);
2494   fts5DataRelease(pIter->pLeaf);
2495   fts5DataRelease(pIter->pNextLeaf);
2496   fts5DlidxIterFree(pIter->pDlidx);
2497   sqlite3_free(pIter->aRowidOffset);
2498   memset(pIter, 0, sizeof(Fts5SegIter));
2499 }
2500 
2501 #ifdef SQLITE_DEBUG
2502 
2503 /*
2504 ** This function is used as part of the big assert() procedure implemented by
2505 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2506 ** in *pRes is the correct result of comparing the current positions of the
2507 ** two iterators.
2508 */
fts5AssertComparisonResult(Fts5Iter * pIter,Fts5SegIter * p1,Fts5SegIter * p2,Fts5CResult * pRes)2509 static void fts5AssertComparisonResult(
2510   Fts5Iter *pIter,
2511   Fts5SegIter *p1,
2512   Fts5SegIter *p2,
2513   Fts5CResult *pRes
2514 ){
2515   int i1 = p1 - pIter->aSeg;
2516   int i2 = p2 - pIter->aSeg;
2517 
2518   if( p1->pLeaf || p2->pLeaf ){
2519     if( p1->pLeaf==0 ){
2520       assert( pRes->iFirst==i2 );
2521     }else if( p2->pLeaf==0 ){
2522       assert( pRes->iFirst==i1 );
2523     }else{
2524       int nMin = MIN(p1->term.n, p2->term.n);
2525       int res = memcmp(p1->term.p, p2->term.p, nMin);
2526       if( res==0 ) res = p1->term.n - p2->term.n;
2527 
2528       if( res==0 ){
2529         assert( pRes->bTermEq==1 );
2530         assert( p1->iRowid!=p2->iRowid );
2531         res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
2532       }else{
2533         assert( pRes->bTermEq==0 );
2534       }
2535 
2536       if( res<0 ){
2537         assert( pRes->iFirst==i1 );
2538       }else{
2539         assert( pRes->iFirst==i2 );
2540       }
2541     }
2542   }
2543 }
2544 
2545 /*
2546 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2547 ** is compiled. In that case, this function is essentially an assert()
2548 ** statement used to verify that the contents of the pIter->aFirst[] array
2549 ** are correct.
2550 */
fts5AssertMultiIterSetup(Fts5Index * p,Fts5Iter * pIter)2551 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
2552   if( p->rc==SQLITE_OK ){
2553     Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2554     int i;
2555 
2556     assert( (pFirst->pLeaf==0)==pIter->base.bEof );
2557 
2558     /* Check that pIter->iSwitchRowid is set correctly. */
2559     for(i=0; i<pIter->nSeg; i++){
2560       Fts5SegIter *p1 = &pIter->aSeg[i];
2561       assert( p1==pFirst
2562            || p1->pLeaf==0
2563            || fts5BufferCompare(&pFirst->term, &p1->term)
2564            || p1->iRowid==pIter->iSwitchRowid
2565            || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2566       );
2567     }
2568 
2569     for(i=0; i<pIter->nSeg; i+=2){
2570       Fts5SegIter *p1 = &pIter->aSeg[i];
2571       Fts5SegIter *p2 = &pIter->aSeg[i+1];
2572       Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
2573       fts5AssertComparisonResult(pIter, p1, p2, pRes);
2574     }
2575 
2576     for(i=1; i<(pIter->nSeg / 2); i+=2){
2577       Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
2578       Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
2579       Fts5CResult *pRes = &pIter->aFirst[i];
2580       fts5AssertComparisonResult(pIter, p1, p2, pRes);
2581     }
2582   }
2583 }
2584 #else
2585 # define fts5AssertMultiIterSetup(x,y)
2586 #endif
2587 
2588 /*
2589 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2590 **
2591 ** If the returned value is non-zero, then it is the index of an entry
2592 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2593 ** to a key that is a duplicate of another, higher priority,
2594 ** segment-iterator in the pSeg->aSeg[] array.
2595 */
fts5MultiIterDoCompare(Fts5Iter * pIter,int iOut)2596 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
2597   int i1;                         /* Index of left-hand Fts5SegIter */
2598   int i2;                         /* Index of right-hand Fts5SegIter */
2599   int iRes;
2600   Fts5SegIter *p1;                /* Left-hand Fts5SegIter */
2601   Fts5SegIter *p2;                /* Right-hand Fts5SegIter */
2602   Fts5CResult *pRes = &pIter->aFirst[iOut];
2603 
2604   assert( iOut<pIter->nSeg && iOut>0 );
2605   assert( pIter->bRev==0 || pIter->bRev==1 );
2606 
2607   if( iOut>=(pIter->nSeg/2) ){
2608     i1 = (iOut - pIter->nSeg/2) * 2;
2609     i2 = i1 + 1;
2610   }else{
2611     i1 = pIter->aFirst[iOut*2].iFirst;
2612     i2 = pIter->aFirst[iOut*2+1].iFirst;
2613   }
2614   p1 = &pIter->aSeg[i1];
2615   p2 = &pIter->aSeg[i2];
2616 
2617   pRes->bTermEq = 0;
2618   if( p1->pLeaf==0 ){           /* If p1 is at EOF */
2619     iRes = i2;
2620   }else if( p2->pLeaf==0 ){     /* If p2 is at EOF */
2621     iRes = i1;
2622   }else{
2623     int res = fts5BufferCompare(&p1->term, &p2->term);
2624     if( res==0 ){
2625       assert( i2>i1 );
2626       assert( i2!=0 );
2627       pRes->bTermEq = 1;
2628       if( p1->iRowid==p2->iRowid ){
2629         p1->bDel = p2->bDel;
2630         return i2;
2631       }
2632       res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
2633     }
2634     assert( res!=0 );
2635     if( res<0 ){
2636       iRes = i1;
2637     }else{
2638       iRes = i2;
2639     }
2640   }
2641 
2642   pRes->iFirst = (u16)iRes;
2643   return 0;
2644 }
2645 
2646 /*
2647 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
2648 ** It is an error if leaf iLeafPgno does not exist or contains no rowids.
2649 */
fts5SegIterGotoPage(Fts5Index * p,Fts5SegIter * pIter,int iLeafPgno)2650 static void fts5SegIterGotoPage(
2651   Fts5Index *p,                   /* FTS5 backend object */
2652   Fts5SegIter *pIter,             /* Iterator to advance */
2653   int iLeafPgno
2654 ){
2655   assert( iLeafPgno>pIter->iLeafPgno );
2656 
2657   if( iLeafPgno>pIter->pSeg->pgnoLast ){
2658     p->rc = FTS5_CORRUPT;
2659   }else{
2660     fts5DataRelease(pIter->pNextLeaf);
2661     pIter->pNextLeaf = 0;
2662     pIter->iLeafPgno = iLeafPgno-1;
2663     fts5SegIterNextPage(p, pIter);
2664     assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
2665 
2666     if( p->rc==SQLITE_OK ){
2667       int iOff;
2668       u8 *a = pIter->pLeaf->p;
2669       int n = pIter->pLeaf->szLeaf;
2670 
2671       iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
2672       if( iOff<4 || iOff>=n ){
2673         p->rc = FTS5_CORRUPT;
2674       }else{
2675         iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
2676         pIter->iLeafOffset = iOff;
2677         fts5SegIterLoadNPos(p, pIter);
2678       }
2679     }
2680   }
2681 }
2682 
2683 /*
2684 ** Advance the iterator passed as the second argument until it is at or
2685 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2686 ** always advanced at least once.
2687 */
fts5SegIterNextFrom(Fts5Index * p,Fts5SegIter * pIter,i64 iMatch)2688 static void fts5SegIterNextFrom(
2689   Fts5Index *p,                   /* FTS5 backend object */
2690   Fts5SegIter *pIter,             /* Iterator to advance */
2691   i64 iMatch                      /* Advance iterator at least this far */
2692 ){
2693   int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2694   Fts5DlidxIter *pDlidx = pIter->pDlidx;
2695   int iLeafPgno = pIter->iLeafPgno;
2696   int bMove = 1;
2697 
2698   assert( pIter->flags & FTS5_SEGITER_ONETERM );
2699   assert( pIter->pDlidx );
2700   assert( pIter->pLeaf );
2701 
2702   if( bRev==0 ){
2703     while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
2704       iLeafPgno = fts5DlidxIterPgno(pDlidx);
2705       fts5DlidxIterNext(p, pDlidx);
2706     }
2707     assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
2708     if( iLeafPgno>pIter->iLeafPgno ){
2709       fts5SegIterGotoPage(p, pIter, iLeafPgno);
2710       bMove = 0;
2711     }
2712   }else{
2713     assert( pIter->pNextLeaf==0 );
2714     assert( iMatch<pIter->iRowid );
2715     while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
2716       fts5DlidxIterPrev(p, pDlidx);
2717     }
2718     iLeafPgno = fts5DlidxIterPgno(pDlidx);
2719 
2720     assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
2721 
2722     if( iLeafPgno<pIter->iLeafPgno ){
2723       pIter->iLeafPgno = iLeafPgno+1;
2724       fts5SegIterReverseNewPage(p, pIter);
2725       bMove = 0;
2726     }
2727   }
2728 
2729   do{
2730     if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
2731     if( pIter->pLeaf==0 ) break;
2732     if( bRev==0 && pIter->iRowid>=iMatch ) break;
2733     if( bRev!=0 && pIter->iRowid<=iMatch ) break;
2734     bMove = 1;
2735   }while( p->rc==SQLITE_OK );
2736 }
2737 
2738 
2739 /*
2740 ** Free the iterator object passed as the second argument.
2741 */
fts5MultiIterFree(Fts5Iter * pIter)2742 static void fts5MultiIterFree(Fts5Iter *pIter){
2743   if( pIter ){
2744     int i;
2745     for(i=0; i<pIter->nSeg; i++){
2746       fts5SegIterClear(&pIter->aSeg[i]);
2747     }
2748     fts5StructureRelease(pIter->pStruct);
2749     fts5BufferFree(&pIter->poslist);
2750     sqlite3_free(pIter);
2751   }
2752 }
2753 
fts5MultiIterAdvanced(Fts5Index * p,Fts5Iter * pIter,int iChanged,int iMinset)2754 static void fts5MultiIterAdvanced(
2755   Fts5Index *p,                   /* FTS5 backend to iterate within */
2756   Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
2757   int iChanged,                   /* Index of sub-iterator just advanced */
2758   int iMinset                     /* Minimum entry in aFirst[] to set */
2759 ){
2760   int i;
2761   for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
2762     int iEq;
2763     if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
2764       Fts5SegIter *pSeg = &pIter->aSeg[iEq];
2765       assert( p->rc==SQLITE_OK );
2766       pSeg->xNext(p, pSeg, 0);
2767       i = pIter->nSeg + iEq;
2768     }
2769   }
2770 }
2771 
2772 /*
2773 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
2774 ** points to the same term though - just a different rowid. This function
2775 ** attempts to update the contents of the pIter->aFirst[] accordingly.
2776 ** If it does so successfully, 0 is returned. Otherwise 1.
2777 **
2778 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
2779 ** on the iterator instead. That function does the same as this one, except
2780 ** that it deals with more complicated cases as well.
2781 */
fts5MultiIterAdvanceRowid(Fts5Iter * pIter,int iChanged,Fts5SegIter ** ppFirst)2782 static int fts5MultiIterAdvanceRowid(
2783   Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
2784   int iChanged,                   /* Index of sub-iterator just advanced */
2785   Fts5SegIter **ppFirst
2786 ){
2787   Fts5SegIter *pNew = &pIter->aSeg[iChanged];
2788 
2789   if( pNew->iRowid==pIter->iSwitchRowid
2790    || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
2791   ){
2792     int i;
2793     Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
2794     pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
2795     for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
2796       Fts5CResult *pRes = &pIter->aFirst[i];
2797 
2798       assert( pNew->pLeaf );
2799       assert( pRes->bTermEq==0 || pOther->pLeaf );
2800 
2801       if( pRes->bTermEq ){
2802         if( pNew->iRowid==pOther->iRowid ){
2803           return 1;
2804         }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
2805           pIter->iSwitchRowid = pOther->iRowid;
2806           pNew = pOther;
2807         }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
2808           pIter->iSwitchRowid = pOther->iRowid;
2809         }
2810       }
2811       pRes->iFirst = (u16)(pNew - pIter->aSeg);
2812       if( i==1 ) break;
2813 
2814       pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
2815     }
2816   }
2817 
2818   *ppFirst = pNew;
2819   return 0;
2820 }
2821 
2822 /*
2823 ** Set the pIter->bEof variable based on the state of the sub-iterators.
2824 */
fts5MultiIterSetEof(Fts5Iter * pIter)2825 static void fts5MultiIterSetEof(Fts5Iter *pIter){
2826   Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2827   pIter->base.bEof = pSeg->pLeaf==0;
2828   pIter->iSwitchRowid = pSeg->iRowid;
2829 }
2830 
2831 /*
2832 ** Move the iterator to the next entry.
2833 **
2834 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
2835 ** considered an error if the iterator reaches EOF, or if it is already at
2836 ** EOF when this function is called.
2837 */
fts5MultiIterNext(Fts5Index * p,Fts5Iter * pIter,int bFrom,i64 iFrom)2838 static void fts5MultiIterNext(
2839   Fts5Index *p,
2840   Fts5Iter *pIter,
2841   int bFrom,                      /* True if argument iFrom is valid */
2842   i64 iFrom                       /* Advance at least as far as this */
2843 ){
2844   int bUseFrom = bFrom;
2845   assert( pIter->base.bEof==0 );
2846   while( p->rc==SQLITE_OK ){
2847     int iFirst = pIter->aFirst[1].iFirst;
2848     int bNewTerm = 0;
2849     Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2850     assert( p->rc==SQLITE_OK );
2851     if( bUseFrom && pSeg->pDlidx ){
2852       fts5SegIterNextFrom(p, pSeg, iFrom);
2853     }else{
2854       pSeg->xNext(p, pSeg, &bNewTerm);
2855     }
2856 
2857     if( pSeg->pLeaf==0 || bNewTerm
2858      || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2859     ){
2860       fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2861       fts5MultiIterSetEof(pIter);
2862       pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
2863       if( pSeg->pLeaf==0 ) return;
2864     }
2865 
2866     fts5AssertMultiIterSetup(p, pIter);
2867     assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
2868     if( pIter->bSkipEmpty==0 || pSeg->nPos ){
2869       pIter->xSetOutputs(pIter, pSeg);
2870       return;
2871     }
2872     bUseFrom = 0;
2873   }
2874 }
2875 
fts5MultiIterNext2(Fts5Index * p,Fts5Iter * pIter,int * pbNewTerm)2876 static void fts5MultiIterNext2(
2877   Fts5Index *p,
2878   Fts5Iter *pIter,
2879   int *pbNewTerm                  /* OUT: True if *might* be new term */
2880 ){
2881   assert( pIter->bSkipEmpty );
2882   if( p->rc==SQLITE_OK ){
2883     *pbNewTerm = 0;
2884     do{
2885       int iFirst = pIter->aFirst[1].iFirst;
2886       Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2887       int bNewTerm = 0;
2888 
2889       assert( p->rc==SQLITE_OK );
2890       pSeg->xNext(p, pSeg, &bNewTerm);
2891       if( pSeg->pLeaf==0 || bNewTerm
2892        || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2893       ){
2894         fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2895         fts5MultiIterSetEof(pIter);
2896         *pbNewTerm = 1;
2897       }
2898       fts5AssertMultiIterSetup(p, pIter);
2899 
2900     }while( fts5MultiIterIsEmpty(p, pIter) );
2901   }
2902 }
2903 
fts5IterSetOutputs_Noop(Fts5Iter * pUnused1,Fts5SegIter * pUnused2)2904 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
2905   UNUSED_PARAM2(pUnused1, pUnused2);
2906 }
2907 
fts5MultiIterAlloc(Fts5Index * p,int nSeg)2908 static Fts5Iter *fts5MultiIterAlloc(
2909   Fts5Index *p,                   /* FTS5 backend to iterate within */
2910   int nSeg
2911 ){
2912   Fts5Iter *pNew;
2913   int nSlot;                      /* Power of two >= nSeg */
2914 
2915   for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
2916   pNew = fts5IdxMalloc(p,
2917       sizeof(Fts5Iter) +                  /* pNew */
2918       sizeof(Fts5SegIter) * (nSlot-1) +   /* pNew->aSeg[] */
2919       sizeof(Fts5CResult) * nSlot         /* pNew->aFirst[] */
2920   );
2921   if( pNew ){
2922     pNew->nSeg = nSlot;
2923     pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
2924     pNew->pIndex = p;
2925     pNew->xSetOutputs = fts5IterSetOutputs_Noop;
2926   }
2927   return pNew;
2928 }
2929 
fts5PoslistCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)2930 static void fts5PoslistCallback(
2931   Fts5Index *pUnused,
2932   void *pContext,
2933   const u8 *pChunk, int nChunk
2934 ){
2935   UNUSED_PARAM(pUnused);
2936   assert_nc( nChunk>=0 );
2937   if( nChunk>0 ){
2938     fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
2939   }
2940 }
2941 
2942 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
2943 struct PoslistCallbackCtx {
2944   Fts5Buffer *pBuf;               /* Append to this buffer */
2945   Fts5Colset *pColset;            /* Restrict matches to this column */
2946   int eState;                     /* See above */
2947 };
2948 
2949 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
2950 struct PoslistOffsetsCtx {
2951   Fts5Buffer *pBuf;               /* Append to this buffer */
2952   Fts5Colset *pColset;            /* Restrict matches to this column */
2953   int iRead;
2954   int iWrite;
2955 };
2956 
2957 /*
2958 ** TODO: Make this more efficient!
2959 */
fts5IndexColsetTest(Fts5Colset * pColset,int iCol)2960 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
2961   int i;
2962   for(i=0; i<pColset->nCol; i++){
2963     if( pColset->aiCol[i]==iCol ) return 1;
2964   }
2965   return 0;
2966 }
2967 
fts5PoslistOffsetsCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)2968 static void fts5PoslistOffsetsCallback(
2969   Fts5Index *pUnused,
2970   void *pContext,
2971   const u8 *pChunk, int nChunk
2972 ){
2973   PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
2974   UNUSED_PARAM(pUnused);
2975   assert_nc( nChunk>=0 );
2976   if( nChunk>0 ){
2977     int i = 0;
2978     while( i<nChunk ){
2979       int iVal;
2980       i += fts5GetVarint32(&pChunk[i], iVal);
2981       iVal += pCtx->iRead - 2;
2982       pCtx->iRead = iVal;
2983       if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
2984         fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
2985         pCtx->iWrite = iVal;
2986       }
2987     }
2988   }
2989 }
2990 
fts5PoslistFilterCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)2991 static void fts5PoslistFilterCallback(
2992   Fts5Index *pUnused,
2993   void *pContext,
2994   const u8 *pChunk, int nChunk
2995 ){
2996   PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
2997   UNUSED_PARAM(pUnused);
2998   assert_nc( nChunk>=0 );
2999   if( nChunk>0 ){
3000     /* Search through to find the first varint with value 1. This is the
3001     ** start of the next columns hits. */
3002     int i = 0;
3003     int iStart = 0;
3004 
3005     if( pCtx->eState==2 ){
3006       int iCol;
3007       fts5FastGetVarint32(pChunk, i, iCol);
3008       if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
3009         pCtx->eState = 1;
3010         fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
3011       }else{
3012         pCtx->eState = 0;
3013       }
3014     }
3015 
3016     do {
3017       while( i<nChunk && pChunk[i]!=0x01 ){
3018         while( pChunk[i] & 0x80 ) i++;
3019         i++;
3020       }
3021       if( pCtx->eState ){
3022         fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3023       }
3024       if( i<nChunk ){
3025         int iCol;
3026         iStart = i;
3027         i++;
3028         if( i>=nChunk ){
3029           pCtx->eState = 2;
3030         }else{
3031           fts5FastGetVarint32(pChunk, i, iCol);
3032           pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
3033           if( pCtx->eState ){
3034             fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3035             iStart = i;
3036           }
3037         }
3038       }
3039     }while( i<nChunk );
3040   }
3041 }
3042 
fts5ChunkIterate(Fts5Index * p,Fts5SegIter * pSeg,void * pCtx,void (* xChunk)(Fts5Index *,void *,const u8 *,int))3043 static void fts5ChunkIterate(
3044   Fts5Index *p,                   /* Index object */
3045   Fts5SegIter *pSeg,              /* Poslist of this iterator */
3046   void *pCtx,                     /* Context pointer for xChunk callback */
3047   void (*xChunk)(Fts5Index*, void*, const u8*, int)
3048 ){
3049   int nRem = pSeg->nPos;          /* Number of bytes still to come */
3050   Fts5Data *pData = 0;
3051   u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3052   int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
3053   int pgno = pSeg->iLeafPgno;
3054   int pgnoSave = 0;
3055 
3056   /* This function does notmwork with detail=none databases. */
3057   assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
3058 
3059   if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
3060     pgnoSave = pgno+1;
3061   }
3062 
3063   while( 1 ){
3064     xChunk(p, pCtx, pChunk, nChunk);
3065     nRem -= nChunk;
3066     fts5DataRelease(pData);
3067     if( nRem<=0 ){
3068       break;
3069     }else{
3070       pgno++;
3071       pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
3072       if( pData==0 ) break;
3073       pChunk = &pData->p[4];
3074       nChunk = MIN(nRem, pData->szLeaf - 4);
3075       if( pgno==pgnoSave ){
3076         assert( pSeg->pNextLeaf==0 );
3077         pSeg->pNextLeaf = pData;
3078         pData = 0;
3079       }
3080     }
3081   }
3082 }
3083 
3084 /*
3085 ** Iterator pIter currently points to a valid entry (not EOF). This
3086 ** function appends the position list data for the current entry to
3087 ** buffer pBuf. It does not make a copy of the position-list size
3088 ** field.
3089 */
fts5SegiterPoslist(Fts5Index * p,Fts5SegIter * pSeg,Fts5Colset * pColset,Fts5Buffer * pBuf)3090 static void fts5SegiterPoslist(
3091   Fts5Index *p,
3092   Fts5SegIter *pSeg,
3093   Fts5Colset *pColset,
3094   Fts5Buffer *pBuf
3095 ){
3096   if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
3097     if( pColset==0 ){
3098       fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
3099     }else{
3100       if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
3101         PoslistCallbackCtx sCtx;
3102         sCtx.pBuf = pBuf;
3103         sCtx.pColset = pColset;
3104         sCtx.eState = fts5IndexColsetTest(pColset, 0);
3105         assert( sCtx.eState==0 || sCtx.eState==1 );
3106         fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
3107       }else{
3108         PoslistOffsetsCtx sCtx;
3109         memset(&sCtx, 0, sizeof(sCtx));
3110         sCtx.pBuf = pBuf;
3111         sCtx.pColset = pColset;
3112         fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
3113       }
3114     }
3115   }
3116 }
3117 
3118 /*
3119 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If
3120 ** the position list contains entries for column iCol, then (*pa) is set
3121 ** to point to the sub-position-list for that column and the number of
3122 ** bytes in it returned. Or, if the argument position list does not
3123 ** contain any entries for column iCol, return 0.
3124 */
fts5IndexExtractCol(const u8 ** pa,int n,int iCol)3125 static int fts5IndexExtractCol(
3126   const u8 **pa,                  /* IN/OUT: Pointer to poslist */
3127   int n,                          /* IN: Size of poslist in bytes */
3128   int iCol                        /* Column to extract from poslist */
3129 ){
3130   int iCurrent = 0;               /* Anything before the first 0x01 is col 0 */
3131   const u8 *p = *pa;
3132   const u8 *pEnd = &p[n];         /* One byte past end of position list */
3133 
3134   while( iCol>iCurrent ){
3135     /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3136     ** not part of a varint. Note that it is not possible for a negative
3137     ** or extremely large varint to occur within an uncorrupted position
3138     ** list. So the last byte of each varint may be assumed to have a clear
3139     ** 0x80 bit.  */
3140     while( *p!=0x01 ){
3141       while( *p++ & 0x80 );
3142       if( p>=pEnd ) return 0;
3143     }
3144     *pa = p++;
3145     iCurrent = *p++;
3146     if( iCurrent & 0x80 ){
3147       p--;
3148       p += fts5GetVarint32(p, iCurrent);
3149     }
3150   }
3151   if( iCol!=iCurrent ) return 0;
3152 
3153   /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3154   ** not part of a varint */
3155   while( p<pEnd && *p!=0x01 ){
3156     while( *p++ & 0x80 );
3157   }
3158 
3159   return p - (*pa);
3160 }
3161 
fts5IndexExtractColset(int * pRc,Fts5Colset * pColset,const u8 * pPos,int nPos,Fts5Buffer * pBuf)3162 static void fts5IndexExtractColset(
3163   int *pRc,
3164   Fts5Colset *pColset,            /* Colset to filter on */
3165   const u8 *pPos, int nPos,       /* Position list */
3166   Fts5Buffer *pBuf                /* Output buffer */
3167 ){
3168   if( *pRc==SQLITE_OK ){
3169     int i;
3170     fts5BufferZero(pBuf);
3171     for(i=0; i<pColset->nCol; i++){
3172       const u8 *pSub = pPos;
3173       int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
3174       if( nSub ){
3175         fts5BufferAppendBlob(pRc, pBuf, nSub, pSub);
3176       }
3177     }
3178   }
3179 }
3180 
3181 /*
3182 ** xSetOutputs callback used by detail=none tables.
3183 */
fts5IterSetOutputs_None(Fts5Iter * pIter,Fts5SegIter * pSeg)3184 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
3185   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
3186   pIter->base.iRowid = pSeg->iRowid;
3187   pIter->base.nData = pSeg->nPos;
3188 }
3189 
3190 /*
3191 ** xSetOutputs callback used by detail=full and detail=col tables when no
3192 ** column filters are specified.
3193 */
fts5IterSetOutputs_Nocolset(Fts5Iter * pIter,Fts5SegIter * pSeg)3194 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3195   pIter->base.iRowid = pSeg->iRowid;
3196   pIter->base.nData = pSeg->nPos;
3197 
3198   assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
3199   assert( pIter->pColset==0 );
3200 
3201   if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3202     /* All data is stored on the current page. Populate the output
3203     ** variables to point into the body of the page object. */
3204     pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3205   }else{
3206     /* The data is distributed over two or more pages. Copy it into the
3207     ** Fts5Iter.poslist buffer and then set the output pointer to point
3208     ** to this buffer.  */
3209     fts5BufferZero(&pIter->poslist);
3210     fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
3211     pIter->base.pData = pIter->poslist.p;
3212   }
3213 }
3214 
3215 /*
3216 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3217 ** against no columns at all).
3218 */
fts5IterSetOutputs_ZeroColset(Fts5Iter * pIter,Fts5SegIter * pSeg)3219 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3220   UNUSED_PARAM(pSeg);
3221   pIter->base.nData = 0;
3222 }
3223 
3224 /*
3225 ** xSetOutputs callback used by detail=col when there is a column filter
3226 ** and there are 100 or more columns. Also called as a fallback from
3227 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3228 */
fts5IterSetOutputs_Col(Fts5Iter * pIter,Fts5SegIter * pSeg)3229 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
3230   fts5BufferZero(&pIter->poslist);
3231   fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
3232   pIter->base.iRowid = pSeg->iRowid;
3233   pIter->base.pData = pIter->poslist.p;
3234   pIter->base.nData = pIter->poslist.n;
3235 }
3236 
3237 /*
3238 ** xSetOutputs callback used when:
3239 **
3240 **   * detail=col,
3241 **   * there is a column filter, and
3242 **   * the table contains 100 or fewer columns.
3243 **
3244 ** The last point is to ensure all column numbers are stored as
3245 ** single-byte varints.
3246 */
fts5IterSetOutputs_Col100(Fts5Iter * pIter,Fts5SegIter * pSeg)3247 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
3248 
3249   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3250   assert( pIter->pColset );
3251 
3252   if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
3253     fts5IterSetOutputs_Col(pIter, pSeg);
3254   }else{
3255     u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
3256     u8 *pEnd = (u8*)&a[pSeg->nPos];
3257     int iPrev = 0;
3258     int *aiCol = pIter->pColset->aiCol;
3259     int *aiColEnd = &aiCol[pIter->pColset->nCol];
3260 
3261     u8 *aOut = pIter->poslist.p;
3262     int iPrevOut = 0;
3263 
3264     pIter->base.iRowid = pSeg->iRowid;
3265 
3266     while( a<pEnd ){
3267       iPrev += (int)a++[0] - 2;
3268       while( *aiCol<iPrev ){
3269         aiCol++;
3270         if( aiCol==aiColEnd ) goto setoutputs_col_out;
3271       }
3272       if( *aiCol==iPrev ){
3273         *aOut++ = (u8)((iPrev - iPrevOut) + 2);
3274         iPrevOut = iPrev;
3275       }
3276     }
3277 
3278 setoutputs_col_out:
3279     pIter->base.pData = pIter->poslist.p;
3280     pIter->base.nData = aOut - pIter->poslist.p;
3281   }
3282 }
3283 
3284 /*
3285 ** xSetOutputs callback used by detail=full when there is a column filter.
3286 */
fts5IterSetOutputs_Full(Fts5Iter * pIter,Fts5SegIter * pSeg)3287 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
3288   Fts5Colset *pColset = pIter->pColset;
3289   pIter->base.iRowid = pSeg->iRowid;
3290 
3291   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
3292   assert( pColset );
3293 
3294   if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3295     /* All data is stored on the current page. Populate the output
3296     ** variables to point into the body of the page object. */
3297     const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3298     if( pColset->nCol==1 ){
3299       pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]);
3300       pIter->base.pData = a;
3301     }else{
3302       int *pRc = &pIter->pIndex->rc;
3303       fts5BufferZero(&pIter->poslist);
3304       fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, &pIter->poslist);
3305       pIter->base.pData = pIter->poslist.p;
3306       pIter->base.nData = pIter->poslist.n;
3307     }
3308   }else{
3309     /* The data is distributed over two or more pages. Copy it into the
3310     ** Fts5Iter.poslist buffer and then set the output pointer to point
3311     ** to this buffer.  */
3312     fts5BufferZero(&pIter->poslist);
3313     fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
3314     pIter->base.pData = pIter->poslist.p;
3315     pIter->base.nData = pIter->poslist.n;
3316   }
3317 }
3318 
fts5IterSetOutputCb(int * pRc,Fts5Iter * pIter)3319 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
3320   if( *pRc==SQLITE_OK ){
3321     Fts5Config *pConfig = pIter->pIndex->pConfig;
3322     if( pConfig->eDetail==FTS5_DETAIL_NONE ){
3323       pIter->xSetOutputs = fts5IterSetOutputs_None;
3324     }
3325 
3326     else if( pIter->pColset==0 ){
3327       pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
3328     }
3329 
3330     else if( pIter->pColset->nCol==0 ){
3331       pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
3332     }
3333 
3334     else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
3335       pIter->xSetOutputs = fts5IterSetOutputs_Full;
3336     }
3337 
3338     else{
3339       assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3340       if( pConfig->nCol<=100 ){
3341         pIter->xSetOutputs = fts5IterSetOutputs_Col100;
3342         sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
3343       }else{
3344         pIter->xSetOutputs = fts5IterSetOutputs_Col;
3345       }
3346     }
3347   }
3348 }
3349 
3350 
3351 /*
3352 ** Allocate a new Fts5Iter object.
3353 **
3354 ** The new object will be used to iterate through data in structure pStruct.
3355 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3356 ** is zero or greater, data from the first nSegment segments on level iLevel
3357 ** is merged.
3358 **
3359 ** The iterator initially points to the first term/rowid entry in the
3360 ** iterated data.
3361 */
fts5MultiIterNew(Fts5Index * p,Fts5Structure * pStruct,int flags,Fts5Colset * pColset,const u8 * pTerm,int nTerm,int iLevel,int nSegment,Fts5Iter ** ppOut)3362 static void fts5MultiIterNew(
3363   Fts5Index *p,                   /* FTS5 backend to iterate within */
3364   Fts5Structure *pStruct,         /* Structure of specific index */
3365   int flags,                      /* FTS5INDEX_QUERY_XXX flags */
3366   Fts5Colset *pColset,            /* Colset to filter on (or NULL) */
3367   const u8 *pTerm, int nTerm,     /* Term to seek to (or NULL/0) */
3368   int iLevel,                     /* Level to iterate (-1 for all) */
3369   int nSegment,                   /* Number of segments to merge (iLevel>=0) */
3370   Fts5Iter **ppOut                /* New object */
3371 ){
3372   int nSeg = 0;                   /* Number of segment-iters in use */
3373   int iIter = 0;                  /* */
3374   int iSeg;                       /* Used to iterate through segments */
3375   Fts5StructureLevel *pLvl;
3376   Fts5Iter *pNew;
3377 
3378   assert( (pTerm==0 && nTerm==0) || iLevel<0 );
3379 
3380   /* Allocate space for the new multi-seg-iterator. */
3381   if( p->rc==SQLITE_OK ){
3382     if( iLevel<0 ){
3383       assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3384       nSeg = pStruct->nSegment;
3385       nSeg += (p->pHash ? 1 : 0);
3386     }else{
3387       nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
3388     }
3389   }
3390   *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
3391   if( pNew==0 ) return;
3392   pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
3393   pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
3394   pNew->pStruct = pStruct;
3395   pNew->pColset = pColset;
3396   fts5StructureRef(pStruct);
3397   if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
3398     fts5IterSetOutputCb(&p->rc, pNew);
3399   }
3400 
3401   /* Initialize each of the component segment iterators. */
3402   if( p->rc==SQLITE_OK ){
3403     if( iLevel<0 ){
3404       Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
3405       if( p->pHash ){
3406         /* Add a segment iterator for the current contents of the hash table. */
3407         Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3408         fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
3409       }
3410       for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
3411         for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
3412           Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
3413           Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3414           if( pTerm==0 ){
3415             fts5SegIterInit(p, pSeg, pIter);
3416           }else{
3417             fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
3418           }
3419         }
3420       }
3421     }else{
3422       pLvl = &pStruct->aLevel[iLevel];
3423       for(iSeg=nSeg-1; iSeg>=0; iSeg--){
3424         fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
3425       }
3426     }
3427     assert( iIter==nSeg );
3428   }
3429 
3430   /* If the above was successful, each component iterators now points
3431   ** to the first entry in its segment. In this case initialize the
3432   ** aFirst[] array. Or, if an error has occurred, free the iterator
3433   ** object and set the output variable to NULL.  */
3434   if( p->rc==SQLITE_OK ){
3435     for(iIter=pNew->nSeg-1; iIter>0; iIter--){
3436       int iEq;
3437       if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
3438         Fts5SegIter *pSeg = &pNew->aSeg[iEq];
3439         if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
3440         fts5MultiIterAdvanced(p, pNew, iEq, iIter);
3441       }
3442     }
3443     fts5MultiIterSetEof(pNew);
3444     fts5AssertMultiIterSetup(p, pNew);
3445 
3446     if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
3447       fts5MultiIterNext(p, pNew, 0, 0);
3448     }else if( pNew->base.bEof==0 ){
3449       Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
3450       pNew->xSetOutputs(pNew, pSeg);
3451     }
3452 
3453   }else{
3454     fts5MultiIterFree(pNew);
3455     *ppOut = 0;
3456   }
3457 }
3458 
3459 /*
3460 ** Create an Fts5Iter that iterates through the doclist provided
3461 ** as the second argument.
3462 */
fts5MultiIterNew2(Fts5Index * p,Fts5Data * pData,int bDesc,Fts5Iter ** ppOut)3463 static void fts5MultiIterNew2(
3464   Fts5Index *p,                   /* FTS5 backend to iterate within */
3465   Fts5Data *pData,                /* Doclist to iterate through */
3466   int bDesc,                      /* True for descending rowid order */
3467   Fts5Iter **ppOut                /* New object */
3468 ){
3469   Fts5Iter *pNew;
3470   pNew = fts5MultiIterAlloc(p, 2);
3471   if( pNew ){
3472     Fts5SegIter *pIter = &pNew->aSeg[1];
3473 
3474     pIter->flags = FTS5_SEGITER_ONETERM;
3475     if( pData->szLeaf>0 ){
3476       pIter->pLeaf = pData;
3477       pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
3478       pIter->iEndofDoclist = pData->nn;
3479       pNew->aFirst[1].iFirst = 1;
3480       if( bDesc ){
3481         pNew->bRev = 1;
3482         pIter->flags |= FTS5_SEGITER_REVERSE;
3483         fts5SegIterReverseInitPage(p, pIter);
3484       }else{
3485         fts5SegIterLoadNPos(p, pIter);
3486       }
3487       pData = 0;
3488     }else{
3489       pNew->base.bEof = 1;
3490     }
3491     fts5SegIterSetNext(p, pIter);
3492 
3493     *ppOut = pNew;
3494   }
3495 
3496   fts5DataRelease(pData);
3497 }
3498 
3499 /*
3500 ** Return true if the iterator is at EOF or if an error has occurred.
3501 ** False otherwise.
3502 */
fts5MultiIterEof(Fts5Index * p,Fts5Iter * pIter)3503 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
3504   assert( p->rc
3505       || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
3506   );
3507   return (p->rc || pIter->base.bEof);
3508 }
3509 
3510 /*
3511 ** Return the rowid of the entry that the iterator currently points
3512 ** to. If the iterator points to EOF when this function is called the
3513 ** results are undefined.
3514 */
fts5MultiIterRowid(Fts5Iter * pIter)3515 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
3516   assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
3517   return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
3518 }
3519 
3520 /*
3521 ** Move the iterator to the next entry at or following iMatch.
3522 */
fts5MultiIterNextFrom(Fts5Index * p,Fts5Iter * pIter,i64 iMatch)3523 static void fts5MultiIterNextFrom(
3524   Fts5Index *p,
3525   Fts5Iter *pIter,
3526   i64 iMatch
3527 ){
3528   while( 1 ){
3529     i64 iRowid;
3530     fts5MultiIterNext(p, pIter, 1, iMatch);
3531     if( fts5MultiIterEof(p, pIter) ) break;
3532     iRowid = fts5MultiIterRowid(pIter);
3533     if( pIter->bRev==0 && iRowid>=iMatch ) break;
3534     if( pIter->bRev!=0 && iRowid<=iMatch ) break;
3535   }
3536 }
3537 
3538 /*
3539 ** Return a pointer to a buffer containing the term associated with the
3540 ** entry that the iterator currently points to.
3541 */
fts5MultiIterTerm(Fts5Iter * pIter,int * pn)3542 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
3543   Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3544   *pn = p->term.n;
3545   return p->term.p;
3546 }
3547 
3548 /*
3549 ** Allocate a new segment-id for the structure pStruct. The new segment
3550 ** id must be between 1 and 65335 inclusive, and must not be used by
3551 ** any currently existing segment. If a free segment id cannot be found,
3552 ** SQLITE_FULL is returned.
3553 **
3554 ** If an error has already occurred, this function is a no-op. 0 is
3555 ** returned in this case.
3556 */
fts5AllocateSegid(Fts5Index * p,Fts5Structure * pStruct)3557 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
3558   int iSegid = 0;
3559 
3560   if( p->rc==SQLITE_OK ){
3561     if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
3562       p->rc = SQLITE_FULL;
3563     }else{
3564       /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
3565       ** array is 63 elements, or 252 bytes, in size.  */
3566       u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
3567       int iLvl, iSeg;
3568       int i;
3569       u32 mask;
3570       memset(aUsed, 0, sizeof(aUsed));
3571       for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3572         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3573           int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
3574           if( iId<=FTS5_MAX_SEGMENT ){
3575             aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32);
3576           }
3577         }
3578       }
3579 
3580       for(i=0; aUsed[i]==0xFFFFFFFF; i++);
3581       mask = aUsed[i];
3582       for(iSegid=0; mask & (1 << iSegid); iSegid++);
3583       iSegid += 1 + i*32;
3584 
3585 #ifdef SQLITE_DEBUG
3586       for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3587         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3588           assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
3589         }
3590       }
3591       assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
3592 
3593       {
3594         sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
3595         if( p->rc==SQLITE_OK ){
3596           u8 aBlob[2] = {0xff, 0xff};
3597           sqlite3_bind_int(pIdxSelect, 1, iSegid);
3598           sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
3599           assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
3600           p->rc = sqlite3_reset(pIdxSelect);
3601         }
3602       }
3603 #endif
3604     }
3605   }
3606 
3607   return iSegid;
3608 }
3609 
3610 /*
3611 ** Discard all data currently cached in the hash-tables.
3612 */
fts5IndexDiscardData(Fts5Index * p)3613 static void fts5IndexDiscardData(Fts5Index *p){
3614   assert( p->pHash || p->nPendingData==0 );
3615   if( p->pHash ){
3616     sqlite3Fts5HashClear(p->pHash);
3617     p->nPendingData = 0;
3618   }
3619 }
3620 
3621 /*
3622 ** Return the size of the prefix, in bytes, that buffer
3623 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
3624 **
3625 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
3626 ** than buffer (pOld/nOld).
3627 */
fts5PrefixCompress(int nOld,const u8 * pOld,const u8 * pNew)3628 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
3629   int i;
3630   for(i=0; i<nOld; i++){
3631     if( pOld[i]!=pNew[i] ) break;
3632   }
3633   return i;
3634 }
3635 
fts5WriteDlidxClear(Fts5Index * p,Fts5SegWriter * pWriter,int bFlush)3636 static void fts5WriteDlidxClear(
3637   Fts5Index *p,
3638   Fts5SegWriter *pWriter,
3639   int bFlush                      /* If true, write dlidx to disk */
3640 ){
3641   int i;
3642   assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
3643   for(i=0; i<pWriter->nDlidx; i++){
3644     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3645     if( pDlidx->buf.n==0 ) break;
3646     if( bFlush ){
3647       assert( pDlidx->pgno!=0 );
3648       fts5DataWrite(p,
3649           FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3650           pDlidx->buf.p, pDlidx->buf.n
3651       );
3652     }
3653     sqlite3Fts5BufferZero(&pDlidx->buf);
3654     pDlidx->bPrevValid = 0;
3655   }
3656 }
3657 
3658 /*
3659 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
3660 ** Any new array elements are zeroed before returning.
3661 */
fts5WriteDlidxGrow(Fts5Index * p,Fts5SegWriter * pWriter,int nLvl)3662 static int fts5WriteDlidxGrow(
3663   Fts5Index *p,
3664   Fts5SegWriter *pWriter,
3665   int nLvl
3666 ){
3667   if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
3668     Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
3669         pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
3670     );
3671     if( aDlidx==0 ){
3672       p->rc = SQLITE_NOMEM;
3673     }else{
3674       int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
3675       memset(&aDlidx[pWriter->nDlidx], 0, nByte);
3676       pWriter->aDlidx = aDlidx;
3677       pWriter->nDlidx = nLvl;
3678     }
3679   }
3680   return p->rc;
3681 }
3682 
3683 /*
3684 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
3685 ** enough, flush it to disk and return 1. Otherwise discard it and return
3686 ** zero.
3687 */
fts5WriteFlushDlidx(Fts5Index * p,Fts5SegWriter * pWriter)3688 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
3689   int bFlag = 0;
3690 
3691   /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
3692   ** to the database, also write the doclist-index to disk.  */
3693   if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
3694     bFlag = 1;
3695   }
3696   fts5WriteDlidxClear(p, pWriter, bFlag);
3697   pWriter->nEmpty = 0;
3698   return bFlag;
3699 }
3700 
3701 /*
3702 ** This function is called whenever processing of the doclist for the
3703 ** last term on leaf page (pWriter->iBtPage) is completed.
3704 **
3705 ** The doclist-index for that term is currently stored in-memory within the
3706 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
3707 ** writes it out to disk. Or, if it is too small to bother with, discards
3708 ** it.
3709 **
3710 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
3711 */
fts5WriteFlushBtree(Fts5Index * p,Fts5SegWriter * pWriter)3712 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
3713   int bFlag;
3714 
3715   assert( pWriter->iBtPage || pWriter->nEmpty==0 );
3716   if( pWriter->iBtPage==0 ) return;
3717   bFlag = fts5WriteFlushDlidx(p, pWriter);
3718 
3719   if( p->rc==SQLITE_OK ){
3720     const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
3721     /* The following was already done in fts5WriteInit(): */
3722     /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
3723     sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
3724     sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
3725     sqlite3_step(p->pIdxWriter);
3726     p->rc = sqlite3_reset(p->pIdxWriter);
3727   }
3728   pWriter->iBtPage = 0;
3729 }
3730 
3731 /*
3732 ** This is called once for each leaf page except the first that contains
3733 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
3734 ** is larger than all terms written to earlier leaves, and equal to or
3735 ** smaller than the first term on the new leaf.
3736 **
3737 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
3738 ** has already occurred when this function is called, it is a no-op.
3739 */
fts5WriteBtreeTerm(Fts5Index * p,Fts5SegWriter * pWriter,int nTerm,const u8 * pTerm)3740 static void fts5WriteBtreeTerm(
3741   Fts5Index *p,                   /* FTS5 backend object */
3742   Fts5SegWriter *pWriter,         /* Writer object */
3743   int nTerm, const u8 *pTerm      /* First term on new page */
3744 ){
3745   fts5WriteFlushBtree(p, pWriter);
3746   fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
3747   pWriter->iBtPage = pWriter->writer.pgno;
3748 }
3749 
3750 /*
3751 ** This function is called when flushing a leaf page that contains no
3752 ** terms at all to disk.
3753 */
fts5WriteBtreeNoTerm(Fts5Index * p,Fts5SegWriter * pWriter)3754 static void fts5WriteBtreeNoTerm(
3755   Fts5Index *p,                   /* FTS5 backend object */
3756   Fts5SegWriter *pWriter          /* Writer object */
3757 ){
3758   /* If there were no rowids on the leaf page either and the doclist-index
3759   ** has already been started, append an 0x00 byte to it.  */
3760   if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
3761     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
3762     assert( pDlidx->bPrevValid );
3763     sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
3764   }
3765 
3766   /* Increment the "number of sequential leaves without a term" counter. */
3767   pWriter->nEmpty++;
3768 }
3769 
fts5DlidxExtractFirstRowid(Fts5Buffer * pBuf)3770 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
3771   i64 iRowid;
3772   int iOff;
3773 
3774   iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
3775   fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
3776   return iRowid;
3777 }
3778 
3779 /*
3780 ** Rowid iRowid has just been appended to the current leaf page. It is the
3781 ** first on the page. This function appends an appropriate entry to the current
3782 ** doclist-index.
3783 */
fts5WriteDlidxAppend(Fts5Index * p,Fts5SegWriter * pWriter,i64 iRowid)3784 static void fts5WriteDlidxAppend(
3785   Fts5Index *p,
3786   Fts5SegWriter *pWriter,
3787   i64 iRowid
3788 ){
3789   int i;
3790   int bDone = 0;
3791 
3792   for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
3793     i64 iVal;
3794     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3795 
3796     if( pDlidx->buf.n>=p->pConfig->pgsz ){
3797       /* The current doclist-index page is full. Write it to disk and push
3798       ** a copy of iRowid (which will become the first rowid on the next
3799       ** doclist-index leaf page) up into the next level of the b-tree
3800       ** hierarchy. If the node being flushed is currently the root node,
3801       ** also push its first rowid upwards. */
3802       pDlidx->buf.p[0] = 0x01;    /* Not the root node */
3803       fts5DataWrite(p,
3804           FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3805           pDlidx->buf.p, pDlidx->buf.n
3806       );
3807       fts5WriteDlidxGrow(p, pWriter, i+2);
3808       pDlidx = &pWriter->aDlidx[i];
3809       if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
3810         i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
3811 
3812         /* This was the root node. Push its first rowid up to the new root. */
3813         pDlidx[1].pgno = pDlidx->pgno;
3814         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
3815         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
3816         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
3817         pDlidx[1].bPrevValid = 1;
3818         pDlidx[1].iPrev = iFirst;
3819       }
3820 
3821       sqlite3Fts5BufferZero(&pDlidx->buf);
3822       pDlidx->bPrevValid = 0;
3823       pDlidx->pgno++;
3824     }else{
3825       bDone = 1;
3826     }
3827 
3828     if( pDlidx->bPrevValid ){
3829       iVal = iRowid - pDlidx->iPrev;
3830     }else{
3831       i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
3832       assert( pDlidx->buf.n==0 );
3833       sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
3834       sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
3835       iVal = iRowid;
3836     }
3837 
3838     sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
3839     pDlidx->bPrevValid = 1;
3840     pDlidx->iPrev = iRowid;
3841   }
3842 }
3843 
fts5WriteFlushLeaf(Fts5Index * p,Fts5SegWriter * pWriter)3844 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
3845   static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
3846   Fts5PageWriter *pPage = &pWriter->writer;
3847   i64 iRowid;
3848 
3849   assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
3850 
3851   /* Set the szLeaf header field. */
3852   assert( 0==fts5GetU16(&pPage->buf.p[2]) );
3853   fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
3854 
3855   if( pWriter->bFirstTermInPage ){
3856     /* No term was written to this page. */
3857     assert( pPage->pgidx.n==0 );
3858     fts5WriteBtreeNoTerm(p, pWriter);
3859   }else{
3860     /* Append the pgidx to the page buffer. Set the szLeaf header field. */
3861     fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
3862   }
3863 
3864   /* Write the page out to disk */
3865   iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
3866   fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
3867 
3868   /* Initialize the next page. */
3869   fts5BufferZero(&pPage->buf);
3870   fts5BufferZero(&pPage->pgidx);
3871   fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
3872   pPage->iPrevPgidx = 0;
3873   pPage->pgno++;
3874 
3875   /* Increase the leaves written counter */
3876   pWriter->nLeafWritten++;
3877 
3878   /* The new leaf holds no terms or rowids */
3879   pWriter->bFirstTermInPage = 1;
3880   pWriter->bFirstRowidInPage = 1;
3881 }
3882 
3883 /*
3884 ** Append term pTerm/nTerm to the segment being written by the writer passed
3885 ** as the second argument.
3886 **
3887 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3888 ** already occurred, this function is a no-op.
3889 */
fts5WriteAppendTerm(Fts5Index * p,Fts5SegWriter * pWriter,int nTerm,const u8 * pTerm)3890 static void fts5WriteAppendTerm(
3891   Fts5Index *p,
3892   Fts5SegWriter *pWriter,
3893   int nTerm, const u8 *pTerm
3894 ){
3895   int nPrefix;                    /* Bytes of prefix compression for term */
3896   Fts5PageWriter *pPage = &pWriter->writer;
3897   Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
3898 
3899   assert( p->rc==SQLITE_OK );
3900   assert( pPage->buf.n>=4 );
3901   assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
3902 
3903   /* If the current leaf page is full, flush it to disk. */
3904   if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
3905     if( pPage->buf.n>4 ){
3906       fts5WriteFlushLeaf(p, pWriter);
3907     }
3908     fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
3909   }
3910 
3911   /* TODO1: Updating pgidx here. */
3912   pPgidx->n += sqlite3Fts5PutVarint(
3913       &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
3914   );
3915   pPage->iPrevPgidx = pPage->buf.n;
3916 #if 0
3917   fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
3918   pPgidx->n += 2;
3919 #endif
3920 
3921   if( pWriter->bFirstTermInPage ){
3922     nPrefix = 0;
3923     if( pPage->pgno!=1 ){
3924       /* This is the first term on a leaf that is not the leftmost leaf in
3925       ** the segment b-tree. In this case it is necessary to add a term to
3926       ** the b-tree hierarchy that is (a) larger than the largest term
3927       ** already written to the segment and (b) smaller than or equal to
3928       ** this term. In other words, a prefix of (pTerm/nTerm) that is one
3929       ** byte longer than the longest prefix (pTerm/nTerm) shares with the
3930       ** previous term.
3931       **
3932       ** Usually, the previous term is available in pPage->term. The exception
3933       ** is if this is the first term written in an incremental-merge step.
3934       ** In this case the previous term is not available, so just write a
3935       ** copy of (pTerm/nTerm) into the parent node. This is slightly
3936       ** inefficient, but still correct.  */
3937       int n = nTerm;
3938       if( pPage->term.n ){
3939         n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
3940       }
3941       fts5WriteBtreeTerm(p, pWriter, n, pTerm);
3942       pPage = &pWriter->writer;
3943     }
3944   }else{
3945     nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
3946     fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
3947   }
3948 
3949   /* Append the number of bytes of new data, then the term data itself
3950   ** to the page. */
3951   fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
3952   fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
3953 
3954   /* Update the Fts5PageWriter.term field. */
3955   fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
3956   pWriter->bFirstTermInPage = 0;
3957 
3958   pWriter->bFirstRowidInPage = 0;
3959   pWriter->bFirstRowidInDoclist = 1;
3960 
3961   assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
3962   pWriter->aDlidx[0].pgno = pPage->pgno;
3963 }
3964 
3965 /*
3966 ** Append a rowid and position-list size field to the writers output.
3967 */
fts5WriteAppendRowid(Fts5Index * p,Fts5SegWriter * pWriter,i64 iRowid)3968 static void fts5WriteAppendRowid(
3969   Fts5Index *p,
3970   Fts5SegWriter *pWriter,
3971   i64 iRowid
3972 ){
3973   if( p->rc==SQLITE_OK ){
3974     Fts5PageWriter *pPage = &pWriter->writer;
3975 
3976     if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
3977       fts5WriteFlushLeaf(p, pWriter);
3978     }
3979 
3980     /* If this is to be the first rowid written to the page, set the
3981     ** rowid-pointer in the page-header. Also append a value to the dlidx
3982     ** buffer, in case a doclist-index is required.  */
3983     if( pWriter->bFirstRowidInPage ){
3984       fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
3985       fts5WriteDlidxAppend(p, pWriter, iRowid);
3986     }
3987 
3988     /* Write the rowid. */
3989     if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
3990       fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
3991     }else{
3992       assert( p->rc || iRowid>pWriter->iPrevRowid );
3993       fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
3994     }
3995     pWriter->iPrevRowid = iRowid;
3996     pWriter->bFirstRowidInDoclist = 0;
3997     pWriter->bFirstRowidInPage = 0;
3998   }
3999 }
4000 
fts5WriteAppendPoslistData(Fts5Index * p,Fts5SegWriter * pWriter,const u8 * aData,int nData)4001 static void fts5WriteAppendPoslistData(
4002   Fts5Index *p,
4003   Fts5SegWriter *pWriter,
4004   const u8 *aData,
4005   int nData
4006 ){
4007   Fts5PageWriter *pPage = &pWriter->writer;
4008   const u8 *a = aData;
4009   int n = nData;
4010 
4011   assert( p->pConfig->pgsz>0 );
4012   while( p->rc==SQLITE_OK
4013      && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
4014   ){
4015     int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
4016     int nCopy = 0;
4017     while( nCopy<nReq ){
4018       i64 dummy;
4019       nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
4020     }
4021     fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
4022     a += nCopy;
4023     n -= nCopy;
4024     fts5WriteFlushLeaf(p, pWriter);
4025   }
4026   if( n>0 ){
4027     fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
4028   }
4029 }
4030 
4031 /*
4032 ** Flush any data cached by the writer object to the database. Free any
4033 ** allocations associated with the writer.
4034 */
fts5WriteFinish(Fts5Index * p,Fts5SegWriter * pWriter,int * pnLeaf)4035 static void fts5WriteFinish(
4036   Fts5Index *p,
4037   Fts5SegWriter *pWriter,         /* Writer object */
4038   int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
4039 ){
4040   int i;
4041   Fts5PageWriter *pLeaf = &pWriter->writer;
4042   if( p->rc==SQLITE_OK ){
4043     assert( pLeaf->pgno>=1 );
4044     if( pLeaf->buf.n>4 ){
4045       fts5WriteFlushLeaf(p, pWriter);
4046     }
4047     *pnLeaf = pLeaf->pgno-1;
4048     if( pLeaf->pgno>1 ){
4049       fts5WriteFlushBtree(p, pWriter);
4050     }
4051   }
4052   fts5BufferFree(&pLeaf->term);
4053   fts5BufferFree(&pLeaf->buf);
4054   fts5BufferFree(&pLeaf->pgidx);
4055   fts5BufferFree(&pWriter->btterm);
4056 
4057   for(i=0; i<pWriter->nDlidx; i++){
4058     sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
4059   }
4060   sqlite3_free(pWriter->aDlidx);
4061 }
4062 
fts5WriteInit(Fts5Index * p,Fts5SegWriter * pWriter,int iSegid)4063 static void fts5WriteInit(
4064   Fts5Index *p,
4065   Fts5SegWriter *pWriter,
4066   int iSegid
4067 ){
4068   const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
4069 
4070   memset(pWriter, 0, sizeof(Fts5SegWriter));
4071   pWriter->iSegid = iSegid;
4072 
4073   fts5WriteDlidxGrow(p, pWriter, 1);
4074   pWriter->writer.pgno = 1;
4075   pWriter->bFirstTermInPage = 1;
4076   pWriter->iBtPage = 1;
4077 
4078   assert( pWriter->writer.buf.n==0 );
4079   assert( pWriter->writer.pgidx.n==0 );
4080 
4081   /* Grow the two buffers to pgsz + padding bytes in size. */
4082   sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
4083   sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
4084 
4085   if( p->pIdxWriter==0 ){
4086     Fts5Config *pConfig = p->pConfig;
4087     fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
4088           "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4089           pConfig->zDb, pConfig->zName
4090     ));
4091   }
4092 
4093   if( p->rc==SQLITE_OK ){
4094     /* Initialize the 4-byte leaf-page header to 0x00. */
4095     memset(pWriter->writer.buf.p, 0, 4);
4096     pWriter->writer.buf.n = 4;
4097 
4098     /* Bind the current output segment id to the index-writer. This is an
4099     ** optimization over binding the same value over and over as rows are
4100     ** inserted into %_idx by the current writer.  */
4101     sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
4102   }
4103 }
4104 
4105 /*
4106 ** Iterator pIter was used to iterate through the input segments of on an
4107 ** incremental merge operation. This function is called if the incremental
4108 ** merge step has finished but the input has not been completely exhausted.
4109 */
fts5TrimSegments(Fts5Index * p,Fts5Iter * pIter)4110 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
4111   int i;
4112   Fts5Buffer buf;
4113   memset(&buf, 0, sizeof(Fts5Buffer));
4114   for(i=0; i<pIter->nSeg; i++){
4115     Fts5SegIter *pSeg = &pIter->aSeg[i];
4116     if( pSeg->pSeg==0 ){
4117       /* no-op */
4118     }else if( pSeg->pLeaf==0 ){
4119       /* All keys from this input segment have been transfered to the output.
4120       ** Set both the first and last page-numbers to 0 to indicate that the
4121       ** segment is now empty. */
4122       pSeg->pSeg->pgnoLast = 0;
4123       pSeg->pSeg->pgnoFirst = 0;
4124     }else{
4125       int iOff = pSeg->iTermLeafOffset;     /* Offset on new first leaf page */
4126       i64 iLeafRowid;
4127       Fts5Data *pData;
4128       int iId = pSeg->pSeg->iSegid;
4129       u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
4130 
4131       iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
4132       pData = fts5DataRead(p, iLeafRowid);
4133       if( pData ){
4134         fts5BufferZero(&buf);
4135         fts5BufferGrow(&p->rc, &buf, pData->nn);
4136         fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
4137         fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
4138         fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
4139         fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
4140         if( p->rc==SQLITE_OK ){
4141           /* Set the szLeaf field */
4142           fts5PutU16(&buf.p[2], (u16)buf.n);
4143         }
4144 
4145         /* Set up the new page-index array */
4146         fts5BufferAppendVarint(&p->rc, &buf, 4);
4147         if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
4148          && pSeg->iEndofDoclist<pData->szLeaf
4149         ){
4150           int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
4151           fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
4152           fts5BufferAppendBlob(&p->rc, &buf,
4153               pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
4154           );
4155         }
4156 
4157         fts5DataRelease(pData);
4158         pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
4159         fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
4160         fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
4161       }
4162     }
4163   }
4164   fts5BufferFree(&buf);
4165 }
4166 
fts5MergeChunkCallback(Fts5Index * p,void * pCtx,const u8 * pChunk,int nChunk)4167 static void fts5MergeChunkCallback(
4168   Fts5Index *p,
4169   void *pCtx,
4170   const u8 *pChunk, int nChunk
4171 ){
4172   Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
4173   fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
4174 }
4175 
4176 /*
4177 **
4178 */
fts5IndexMergeLevel(Fts5Index * p,Fts5Structure ** ppStruct,int iLvl,int * pnRem)4179 static void fts5IndexMergeLevel(
4180   Fts5Index *p,                   /* FTS5 backend object */
4181   Fts5Structure **ppStruct,       /* IN/OUT: Stucture of index */
4182   int iLvl,                       /* Level to read input from */
4183   int *pnRem                      /* Write up to this many output leaves */
4184 ){
4185   Fts5Structure *pStruct = *ppStruct;
4186   Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4187   Fts5StructureLevel *pLvlOut;
4188   Fts5Iter *pIter = 0;       /* Iterator to read input data */
4189   int nRem = pnRem ? *pnRem : 0;  /* Output leaf pages left to write */
4190   int nInput;                     /* Number of input segments */
4191   Fts5SegWriter writer;           /* Writer object */
4192   Fts5StructureSegment *pSeg;     /* Output segment */
4193   Fts5Buffer term;
4194   int bOldest;                    /* True if the output segment is the oldest */
4195   int eDetail = p->pConfig->eDetail;
4196   const int flags = FTS5INDEX_QUERY_NOOUTPUT;
4197   int bTermWritten = 0;           /* True if current term already output */
4198 
4199   assert( iLvl<pStruct->nLevel );
4200   assert( pLvl->nMerge<=pLvl->nSeg );
4201 
4202   memset(&writer, 0, sizeof(Fts5SegWriter));
4203   memset(&term, 0, sizeof(Fts5Buffer));
4204   if( pLvl->nMerge ){
4205     pLvlOut = &pStruct->aLevel[iLvl+1];
4206     assert( pLvlOut->nSeg>0 );
4207     nInput = pLvl->nMerge;
4208     pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
4209 
4210     fts5WriteInit(p, &writer, pSeg->iSegid);
4211     writer.writer.pgno = pSeg->pgnoLast+1;
4212     writer.iBtPage = 0;
4213   }else{
4214     int iSegid = fts5AllocateSegid(p, pStruct);
4215 
4216     /* Extend the Fts5Structure object as required to ensure the output
4217     ** segment exists. */
4218     if( iLvl==pStruct->nLevel-1 ){
4219       fts5StructureAddLevel(&p->rc, ppStruct);
4220       pStruct = *ppStruct;
4221     }
4222     fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
4223     if( p->rc ) return;
4224     pLvl = &pStruct->aLevel[iLvl];
4225     pLvlOut = &pStruct->aLevel[iLvl+1];
4226 
4227     fts5WriteInit(p, &writer, iSegid);
4228 
4229     /* Add the new segment to the output level */
4230     pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
4231     pLvlOut->nSeg++;
4232     pSeg->pgnoFirst = 1;
4233     pSeg->iSegid = iSegid;
4234     pStruct->nSegment++;
4235 
4236     /* Read input from all segments in the input level */
4237     nInput = pLvl->nSeg;
4238   }
4239   bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
4240 
4241   assert( iLvl>=0 );
4242   for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
4243       fts5MultiIterEof(p, pIter)==0;
4244       fts5MultiIterNext(p, pIter, 0, 0)
4245   ){
4246     Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4247     int nPos;                     /* position-list size field value */
4248     int nTerm;
4249     const u8 *pTerm;
4250 
4251     pTerm = fts5MultiIterTerm(pIter, &nTerm);
4252     if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
4253       if( pnRem && writer.nLeafWritten>nRem ){
4254         break;
4255       }
4256       fts5BufferSet(&p->rc, &term, nTerm, pTerm);
4257       bTermWritten =0;
4258     }
4259 
4260     /* Check for key annihilation. */
4261     if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
4262 
4263     if( p->rc==SQLITE_OK && bTermWritten==0 ){
4264       /* This is a new term. Append a term to the output segment. */
4265       fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
4266       bTermWritten = 1;
4267     }
4268 
4269     /* Append the rowid to the output */
4270     /* WRITEPOSLISTSIZE */
4271     fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
4272 
4273     if( eDetail==FTS5_DETAIL_NONE ){
4274       if( pSegIter->bDel ){
4275         fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4276         if( pSegIter->nPos>0 ){
4277           fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4278         }
4279       }
4280     }else{
4281       /* Append the position-list data to the output */
4282       nPos = pSegIter->nPos*2 + pSegIter->bDel;
4283       fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
4284       fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
4285     }
4286   }
4287 
4288   /* Flush the last leaf page to disk. Set the output segment b-tree height
4289   ** and last leaf page number at the same time.  */
4290   fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
4291 
4292   if( fts5MultiIterEof(p, pIter) ){
4293     int i;
4294 
4295     /* Remove the redundant segments from the %_data table */
4296     for(i=0; i<nInput; i++){
4297       fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
4298     }
4299 
4300     /* Remove the redundant segments from the input level */
4301     if( pLvl->nSeg!=nInput ){
4302       int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
4303       memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
4304     }
4305     pStruct->nSegment -= nInput;
4306     pLvl->nSeg -= nInput;
4307     pLvl->nMerge = 0;
4308     if( pSeg->pgnoLast==0 ){
4309       pLvlOut->nSeg--;
4310       pStruct->nSegment--;
4311     }
4312   }else{
4313     assert( pSeg->pgnoLast>0 );
4314     fts5TrimSegments(p, pIter);
4315     pLvl->nMerge = nInput;
4316   }
4317 
4318   fts5MultiIterFree(pIter);
4319   fts5BufferFree(&term);
4320   if( pnRem ) *pnRem -= writer.nLeafWritten;
4321 }
4322 
4323 /*
4324 ** Do up to nPg pages of automerge work on the index.
4325 **
4326 ** Return true if any changes were actually made, or false otherwise.
4327 */
fts5IndexMerge(Fts5Index * p,Fts5Structure ** ppStruct,int nPg,int nMin)4328 static int fts5IndexMerge(
4329   Fts5Index *p,                   /* FTS5 backend object */
4330   Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
4331   int nPg,                        /* Pages of work to do */
4332   int nMin                        /* Minimum number of segments to merge */
4333 ){
4334   int nRem = nPg;
4335   int bRet = 0;
4336   Fts5Structure *pStruct = *ppStruct;
4337   while( nRem>0 && p->rc==SQLITE_OK ){
4338     int iLvl;                   /* To iterate through levels */
4339     int iBestLvl = 0;           /* Level offering the most input segments */
4340     int nBest = 0;              /* Number of input segments on best level */
4341 
4342     /* Set iBestLvl to the level to read input segments from. */
4343     assert( pStruct->nLevel>0 );
4344     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4345       Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4346       if( pLvl->nMerge ){
4347         if( pLvl->nMerge>nBest ){
4348           iBestLvl = iLvl;
4349           nBest = pLvl->nMerge;
4350         }
4351         break;
4352       }
4353       if( pLvl->nSeg>nBest ){
4354         nBest = pLvl->nSeg;
4355         iBestLvl = iLvl;
4356       }
4357     }
4358 
4359     /* If nBest is still 0, then the index must be empty. */
4360 #ifdef SQLITE_DEBUG
4361     for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
4362       assert( pStruct->aLevel[iLvl].nSeg==0 );
4363     }
4364 #endif
4365 
4366     if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
4367       break;
4368     }
4369     bRet = 1;
4370     fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
4371     if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
4372       fts5StructurePromote(p, iBestLvl+1, pStruct);
4373     }
4374   }
4375   *ppStruct = pStruct;
4376   return bRet;
4377 }
4378 
4379 /*
4380 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4381 ** segment. This function updates the write-counter accordingly and, if
4382 ** necessary, performs incremental merge work.
4383 **
4384 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4385 ** already occurred, this function is a no-op.
4386 */
fts5IndexAutomerge(Fts5Index * p,Fts5Structure ** ppStruct,int nLeaf)4387 static void fts5IndexAutomerge(
4388   Fts5Index *p,                   /* FTS5 backend object */
4389   Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
4390   int nLeaf                       /* Number of output leaves just written */
4391 ){
4392   if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){
4393     Fts5Structure *pStruct = *ppStruct;
4394     u64 nWrite;                   /* Initial value of write-counter */
4395     int nWork;                    /* Number of work-quanta to perform */
4396     int nRem;                     /* Number of leaf pages left to write */
4397 
4398     /* Update the write-counter. While doing so, set nWork. */
4399     nWrite = pStruct->nWriteCounter;
4400     nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
4401     pStruct->nWriteCounter += nLeaf;
4402     nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
4403 
4404     fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
4405   }
4406 }
4407 
fts5IndexCrisismerge(Fts5Index * p,Fts5Structure ** ppStruct)4408 static void fts5IndexCrisismerge(
4409   Fts5Index *p,                   /* FTS5 backend object */
4410   Fts5Structure **ppStruct        /* IN/OUT: Current structure of index */
4411 ){
4412   const int nCrisis = p->pConfig->nCrisisMerge;
4413   Fts5Structure *pStruct = *ppStruct;
4414   int iLvl = 0;
4415 
4416   assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
4417   while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
4418     fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
4419     assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
4420     fts5StructurePromote(p, iLvl+1, pStruct);
4421     iLvl++;
4422   }
4423   *ppStruct = pStruct;
4424 }
4425 
fts5IndexReturn(Fts5Index * p)4426 static int fts5IndexReturn(Fts5Index *p){
4427   int rc = p->rc;
4428   p->rc = SQLITE_OK;
4429   return rc;
4430 }
4431 
4432 typedef struct Fts5FlushCtx Fts5FlushCtx;
4433 struct Fts5FlushCtx {
4434   Fts5Index *pIdx;
4435   Fts5SegWriter writer;
4436 };
4437 
4438 /*
4439 ** Buffer aBuf[] contains a list of varints, all small enough to fit
4440 ** in a 32-bit integer. Return the size of the largest prefix of this
4441 ** list nMax bytes or less in size.
4442 */
fts5PoslistPrefix(const u8 * aBuf,int nMax)4443 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
4444   int ret;
4445   u32 dummy;
4446   ret = fts5GetVarint32(aBuf, dummy);
4447   if( ret<nMax ){
4448     while( 1 ){
4449       int i = fts5GetVarint32(&aBuf[ret], dummy);
4450       if( (ret + i) > nMax ) break;
4451       ret += i;
4452     }
4453   }
4454   return ret;
4455 }
4456 
4457 /*
4458 ** Flush the contents of in-memory hash table iHash to a new level-0
4459 ** segment on disk. Also update the corresponding structure record.
4460 **
4461 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4462 ** already occurred, this function is a no-op.
4463 */
fts5FlushOneHash(Fts5Index * p)4464 static void fts5FlushOneHash(Fts5Index *p){
4465   Fts5Hash *pHash = p->pHash;
4466   Fts5Structure *pStruct;
4467   int iSegid;
4468   int pgnoLast = 0;                 /* Last leaf page number in segment */
4469 
4470   /* Obtain a reference to the index structure and allocate a new segment-id
4471   ** for the new level-0 segment.  */
4472   pStruct = fts5StructureRead(p);
4473   iSegid = fts5AllocateSegid(p, pStruct);
4474   fts5StructureInvalidate(p);
4475 
4476   if( iSegid ){
4477     const int pgsz = p->pConfig->pgsz;
4478     int eDetail = p->pConfig->eDetail;
4479     Fts5StructureSegment *pSeg;   /* New segment within pStruct */
4480     Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
4481     Fts5Buffer *pPgidx;           /* Buffer in which to assemble pgidx */
4482 
4483     Fts5SegWriter writer;
4484     fts5WriteInit(p, &writer, iSegid);
4485 
4486     pBuf = &writer.writer.buf;
4487     pPgidx = &writer.writer.pgidx;
4488 
4489     /* fts5WriteInit() should have initialized the buffers to (most likely)
4490     ** the maximum space required. */
4491     assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4492     assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4493 
4494     /* Begin scanning through hash table entries. This loop runs once for each
4495     ** term/doclist currently stored within the hash table. */
4496     if( p->rc==SQLITE_OK ){
4497       p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
4498     }
4499     while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
4500       const char *zTerm;          /* Buffer containing term */
4501       const u8 *pDoclist;         /* Pointer to doclist for this term */
4502       int nDoclist;               /* Size of doclist in bytes */
4503 
4504       /* Write the term for this entry to disk. */
4505       sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
4506       fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
4507 
4508       assert( writer.bFirstRowidInPage==0 );
4509       if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
4510         /* The entire doclist will fit on the current leaf. */
4511         fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
4512       }else{
4513         i64 iRowid = 0;
4514         i64 iDelta = 0;
4515         int iOff = 0;
4516 
4517         /* The entire doclist will not fit on this leaf. The following
4518         ** loop iterates through the poslists that make up the current
4519         ** doclist.  */
4520         while( p->rc==SQLITE_OK && iOff<nDoclist ){
4521           iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
4522           iRowid += iDelta;
4523 
4524           if( writer.bFirstRowidInPage ){
4525             fts5PutU16(&pBuf->p[0], (u16)pBuf->n);   /* first rowid on page */
4526             pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
4527             writer.bFirstRowidInPage = 0;
4528             fts5WriteDlidxAppend(p, &writer, iRowid);
4529           }else{
4530             pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
4531           }
4532           assert( pBuf->n<=pBuf->nSpace );
4533 
4534           if( eDetail==FTS5_DETAIL_NONE ){
4535             if( iOff<nDoclist && pDoclist[iOff]==0 ){
4536               pBuf->p[pBuf->n++] = 0;
4537               iOff++;
4538               if( iOff<nDoclist && pDoclist[iOff]==0 ){
4539                 pBuf->p[pBuf->n++] = 0;
4540                 iOff++;
4541               }
4542             }
4543             if( (pBuf->n + pPgidx->n)>=pgsz ){
4544               fts5WriteFlushLeaf(p, &writer);
4545             }
4546           }else{
4547             int bDummy;
4548             int nPos;
4549             int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
4550             nCopy += nPos;
4551             if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
4552               /* The entire poslist will fit on the current leaf. So copy
4553               ** it in one go. */
4554               fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
4555             }else{
4556               /* The entire poslist will not fit on this leaf. So it needs
4557               ** to be broken into sections. The only qualification being
4558               ** that each varint must be stored contiguously.  */
4559               const u8 *pPoslist = &pDoclist[iOff];
4560               int iPos = 0;
4561               while( p->rc==SQLITE_OK ){
4562                 int nSpace = pgsz - pBuf->n - pPgidx->n;
4563                 int n = 0;
4564                 if( (nCopy - iPos)<=nSpace ){
4565                   n = nCopy - iPos;
4566                 }else{
4567                   n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
4568                 }
4569                 assert( n>0 );
4570                 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
4571                 iPos += n;
4572                 if( (pBuf->n + pPgidx->n)>=pgsz ){
4573                   fts5WriteFlushLeaf(p, &writer);
4574                 }
4575                 if( iPos>=nCopy ) break;
4576               }
4577             }
4578             iOff += nCopy;
4579           }
4580         }
4581       }
4582 
4583       /* TODO2: Doclist terminator written here. */
4584       /* pBuf->p[pBuf->n++] = '\0'; */
4585       assert( pBuf->n<=pBuf->nSpace );
4586       sqlite3Fts5HashScanNext(pHash);
4587     }
4588     sqlite3Fts5HashClear(pHash);
4589     fts5WriteFinish(p, &writer, &pgnoLast);
4590 
4591     /* Update the Fts5Structure. It is written back to the database by the
4592     ** fts5StructureRelease() call below.  */
4593     if( pStruct->nLevel==0 ){
4594       fts5StructureAddLevel(&p->rc, &pStruct);
4595     }
4596     fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
4597     if( p->rc==SQLITE_OK ){
4598       pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
4599       pSeg->iSegid = iSegid;
4600       pSeg->pgnoFirst = 1;
4601       pSeg->pgnoLast = pgnoLast;
4602       pStruct->nSegment++;
4603     }
4604     fts5StructurePromote(p, 0, pStruct);
4605   }
4606 
4607   fts5IndexAutomerge(p, &pStruct, pgnoLast);
4608   fts5IndexCrisismerge(p, &pStruct);
4609   fts5StructureWrite(p, pStruct);
4610   fts5StructureRelease(pStruct);
4611 }
4612 
4613 /*
4614 ** Flush any data stored in the in-memory hash tables to the database.
4615 */
fts5IndexFlush(Fts5Index * p)4616 static void fts5IndexFlush(Fts5Index *p){
4617   /* Unless it is empty, flush the hash table to disk */
4618   if( p->nPendingData ){
4619     assert( p->pHash );
4620     p->nPendingData = 0;
4621     fts5FlushOneHash(p);
4622   }
4623 }
4624 
fts5IndexOptimizeStruct(Fts5Index * p,Fts5Structure * pStruct)4625 static Fts5Structure *fts5IndexOptimizeStruct(
4626   Fts5Index *p,
4627   Fts5Structure *pStruct
4628 ){
4629   Fts5Structure *pNew = 0;
4630   int nByte = sizeof(Fts5Structure);
4631   int nSeg = pStruct->nSegment;
4632   int i;
4633 
4634   /* Figure out if this structure requires optimization. A structure does
4635   ** not require optimization if either:
4636   **
4637   **  + it consists of fewer than two segments, or
4638   **  + all segments are on the same level, or
4639   **  + all segments except one are currently inputs to a merge operation.
4640   **
4641   ** In the first case, return NULL. In the second, increment the ref-count
4642   ** on *pStruct and return a copy of the pointer to it.
4643   */
4644   if( nSeg<2 ) return 0;
4645   for(i=0; i<pStruct->nLevel; i++){
4646     int nThis = pStruct->aLevel[i].nSeg;
4647     if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
4648       fts5StructureRef(pStruct);
4649       return pStruct;
4650     }
4651     assert( pStruct->aLevel[i].nMerge<=nThis );
4652   }
4653 
4654   nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
4655   pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
4656 
4657   if( pNew ){
4658     Fts5StructureLevel *pLvl;
4659     nByte = nSeg * sizeof(Fts5StructureSegment);
4660     pNew->nLevel = pStruct->nLevel+1;
4661     pNew->nRef = 1;
4662     pNew->nWriteCounter = pStruct->nWriteCounter;
4663     pLvl = &pNew->aLevel[pStruct->nLevel];
4664     pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
4665     if( pLvl->aSeg ){
4666       int iLvl, iSeg;
4667       int iSegOut = 0;
4668       /* Iterate through all segments, from oldest to newest. Add them to
4669       ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
4670       ** segment in the data structure.  */
4671       for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
4672         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
4673           pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
4674           iSegOut++;
4675         }
4676       }
4677       pNew->nSegment = pLvl->nSeg = nSeg;
4678     }else{
4679       sqlite3_free(pNew);
4680       pNew = 0;
4681     }
4682   }
4683 
4684   return pNew;
4685 }
4686 
sqlite3Fts5IndexOptimize(Fts5Index * p)4687 int sqlite3Fts5IndexOptimize(Fts5Index *p){
4688   Fts5Structure *pStruct;
4689   Fts5Structure *pNew = 0;
4690 
4691   assert( p->rc==SQLITE_OK );
4692   fts5IndexFlush(p);
4693   pStruct = fts5StructureRead(p);
4694   fts5StructureInvalidate(p);
4695 
4696   if( pStruct ){
4697     pNew = fts5IndexOptimizeStruct(p, pStruct);
4698   }
4699   fts5StructureRelease(pStruct);
4700 
4701   assert( pNew==0 || pNew->nSegment>0 );
4702   if( pNew ){
4703     int iLvl;
4704     for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
4705     while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
4706       int nRem = FTS5_OPT_WORK_UNIT;
4707       fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
4708     }
4709 
4710     fts5StructureWrite(p, pNew);
4711     fts5StructureRelease(pNew);
4712   }
4713 
4714   return fts5IndexReturn(p);
4715 }
4716 
4717 /*
4718 ** This is called to implement the special "VALUES('merge', $nMerge)"
4719 ** INSERT command.
4720 */
sqlite3Fts5IndexMerge(Fts5Index * p,int nMerge)4721 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
4722   Fts5Structure *pStruct = fts5StructureRead(p);
4723   if( pStruct ){
4724     int nMin = p->pConfig->nUsermerge;
4725     fts5StructureInvalidate(p);
4726     if( nMerge<0 ){
4727       Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
4728       fts5StructureRelease(pStruct);
4729       pStruct = pNew;
4730       nMin = 2;
4731       nMerge = nMerge*-1;
4732     }
4733     if( pStruct && pStruct->nLevel ){
4734       if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
4735         fts5StructureWrite(p, pStruct);
4736       }
4737     }
4738     fts5StructureRelease(pStruct);
4739   }
4740   return fts5IndexReturn(p);
4741 }
4742 
fts5AppendRowid(Fts5Index * p,i64 iDelta,Fts5Iter * pUnused,Fts5Buffer * pBuf)4743 static void fts5AppendRowid(
4744   Fts5Index *p,
4745   i64 iDelta,
4746   Fts5Iter *pUnused,
4747   Fts5Buffer *pBuf
4748 ){
4749   UNUSED_PARAM(pUnused);
4750   fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
4751 }
4752 
fts5AppendPoslist(Fts5Index * p,i64 iDelta,Fts5Iter * pMulti,Fts5Buffer * pBuf)4753 static void fts5AppendPoslist(
4754   Fts5Index *p,
4755   i64 iDelta,
4756   Fts5Iter *pMulti,
4757   Fts5Buffer *pBuf
4758 ){
4759   int nData = pMulti->base.nData;
4760   assert( nData>0 );
4761   if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){
4762     fts5BufferSafeAppendVarint(pBuf, iDelta);
4763     fts5BufferSafeAppendVarint(pBuf, nData*2);
4764     fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
4765   }
4766 }
4767 
4768 
fts5DoclistIterNext(Fts5DoclistIter * pIter)4769 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
4770   u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
4771 
4772   assert( pIter->aPoslist );
4773   if( p>=pIter->aEof ){
4774     pIter->aPoslist = 0;
4775   }else{
4776     i64 iDelta;
4777 
4778     p += fts5GetVarint(p, (u64*)&iDelta);
4779     pIter->iRowid += iDelta;
4780 
4781     /* Read position list size */
4782     if( p[0] & 0x80 ){
4783       int nPos;
4784       pIter->nSize = fts5GetVarint32(p, nPos);
4785       pIter->nPoslist = (nPos>>1);
4786     }else{
4787       pIter->nPoslist = ((int)(p[0])) >> 1;
4788       pIter->nSize = 1;
4789     }
4790 
4791     pIter->aPoslist = p;
4792   }
4793 }
4794 
fts5DoclistIterInit(Fts5Buffer * pBuf,Fts5DoclistIter * pIter)4795 static void fts5DoclistIterInit(
4796   Fts5Buffer *pBuf,
4797   Fts5DoclistIter *pIter
4798 ){
4799   memset(pIter, 0, sizeof(*pIter));
4800   pIter->aPoslist = pBuf->p;
4801   pIter->aEof = &pBuf->p[pBuf->n];
4802   fts5DoclistIterNext(pIter);
4803 }
4804 
4805 #if 0
4806 /*
4807 ** Append a doclist to buffer pBuf.
4808 **
4809 ** This function assumes that space within the buffer has already been
4810 ** allocated.
4811 */
4812 static void fts5MergeAppendDocid(
4813   Fts5Buffer *pBuf,               /* Buffer to write to */
4814   i64 *piLastRowid,               /* IN/OUT: Previous rowid written (if any) */
4815   i64 iRowid                      /* Rowid to append */
4816 ){
4817   assert( pBuf->n!=0 || (*piLastRowid)==0 );
4818   fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
4819   *piLastRowid = iRowid;
4820 }
4821 #endif
4822 
4823 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) {       \
4824   assert( (pBuf)->n!=0 || (iLastRowid)==0 );                   \
4825   fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
4826   (iLastRowid) = (iRowid);                                     \
4827 }
4828 
4829 /*
4830 ** Swap the contents of buffer *p1 with that of *p2.
4831 */
fts5BufferSwap(Fts5Buffer * p1,Fts5Buffer * p2)4832 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
4833   Fts5Buffer tmp = *p1;
4834   *p1 = *p2;
4835   *p2 = tmp;
4836 }
4837 
fts5NextRowid(Fts5Buffer * pBuf,int * piOff,i64 * piRowid)4838 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
4839   int i = *piOff;
4840   if( i>=pBuf->n ){
4841     *piOff = -1;
4842   }else{
4843     u64 iVal;
4844     *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
4845     *piRowid += iVal;
4846   }
4847 }
4848 
4849 /*
4850 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
4851 ** In this case the buffers consist of a delta-encoded list of rowids only.
4852 */
fts5MergeRowidLists(Fts5Index * p,Fts5Buffer * p1,Fts5Buffer * p2)4853 static void fts5MergeRowidLists(
4854   Fts5Index *p,                   /* FTS5 backend object */
4855   Fts5Buffer *p1,                 /* First list to merge */
4856   Fts5Buffer *p2                  /* Second list to merge */
4857 ){
4858   int i1 = 0;
4859   int i2 = 0;
4860   i64 iRowid1 = 0;
4861   i64 iRowid2 = 0;
4862   i64 iOut = 0;
4863 
4864   Fts5Buffer out;
4865   memset(&out, 0, sizeof(out));
4866   sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
4867   if( p->rc ) return;
4868 
4869   fts5NextRowid(p1, &i1, &iRowid1);
4870   fts5NextRowid(p2, &i2, &iRowid2);
4871   while( i1>=0 || i2>=0 ){
4872     if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
4873       assert( iOut==0 || iRowid1>iOut );
4874       fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
4875       iOut = iRowid1;
4876       fts5NextRowid(p1, &i1, &iRowid1);
4877     }else{
4878       assert( iOut==0 || iRowid2>iOut );
4879       fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
4880       iOut = iRowid2;
4881       if( i1>=0 && iRowid1==iRowid2 ){
4882         fts5NextRowid(p1, &i1, &iRowid1);
4883       }
4884       fts5NextRowid(p2, &i2, &iRowid2);
4885     }
4886   }
4887 
4888   fts5BufferSwap(&out, p1);
4889   fts5BufferFree(&out);
4890 }
4891 
4892 /*
4893 ** Buffers p1 and p2 contain doclists. This function merges the content
4894 ** of the two doclists together and sets buffer p1 to the result before
4895 ** returning.
4896 **
4897 ** If an error occurs, an error code is left in p->rc. If an error has
4898 ** already occurred, this function is a no-op.
4899 */
fts5MergePrefixLists(Fts5Index * p,Fts5Buffer * p1,Fts5Buffer * p2)4900 static void fts5MergePrefixLists(
4901   Fts5Index *p,                   /* FTS5 backend object */
4902   Fts5Buffer *p1,                 /* First list to merge */
4903   Fts5Buffer *p2                  /* Second list to merge */
4904 ){
4905   if( p2->n ){
4906     i64 iLastRowid = 0;
4907     Fts5DoclistIter i1;
4908     Fts5DoclistIter i2;
4909     Fts5Buffer out = {0, 0, 0};
4910     Fts5Buffer tmp = {0, 0, 0};
4911 
4912     if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return;
4913     fts5DoclistIterInit(p1, &i1);
4914     fts5DoclistIterInit(p2, &i2);
4915 
4916     while( 1 ){
4917       if( i1.iRowid<i2.iRowid ){
4918         /* Copy entry from i1 */
4919         fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
4920         fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize);
4921         fts5DoclistIterNext(&i1);
4922         if( i1.aPoslist==0 ) break;
4923       }
4924       else if( i2.iRowid!=i1.iRowid ){
4925         /* Copy entry from i2 */
4926         fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4927         fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize);
4928         fts5DoclistIterNext(&i2);
4929         if( i2.aPoslist==0 ) break;
4930       }
4931       else{
4932         /* Merge the two position lists. */
4933         i64 iPos1 = 0;
4934         i64 iPos2 = 0;
4935         int iOff1 = 0;
4936         int iOff2 = 0;
4937         u8 *a1 = &i1.aPoslist[i1.nSize];
4938         u8 *a2 = &i2.aPoslist[i2.nSize];
4939 
4940         i64 iPrev = 0;
4941         Fts5PoslistWriter writer;
4942         memset(&writer, 0, sizeof(writer));
4943 
4944         fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4945         fts5BufferZero(&tmp);
4946         sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist);
4947         if( p->rc ) break;
4948 
4949         sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4950         sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4951         assert( iPos1>=0 && iPos2>=0 );
4952 
4953         if( iPos1<iPos2 ){
4954           sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
4955           sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4956         }else{
4957           sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
4958           sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4959         }
4960 
4961         if( iPos1>=0 && iPos2>=0 ){
4962           while( 1 ){
4963             if( iPos1<iPos2 ){
4964               if( iPos1!=iPrev ){
4965                 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
4966               }
4967               sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4968               if( iPos1<0 ) break;
4969             }else{
4970               assert( iPos2!=iPrev );
4971               sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
4972               sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4973               if( iPos2<0 ) break;
4974             }
4975           }
4976         }
4977 
4978         if( iPos1>=0 ){
4979           if( iPos1!=iPrev ){
4980             sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
4981           }
4982           fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1);
4983         }else{
4984           assert( iPos2>=0 && iPos2!=iPrev );
4985           sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
4986           fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2);
4987         }
4988 
4989         /* WRITEPOSLISTSIZE */
4990         fts5BufferSafeAppendVarint(&out, tmp.n * 2);
4991         fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
4992         fts5DoclistIterNext(&i1);
4993         fts5DoclistIterNext(&i2);
4994         if( i1.aPoslist==0 || i2.aPoslist==0 ) break;
4995       }
4996     }
4997 
4998     if( i1.aPoslist ){
4999       fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
5000       fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist);
5001     }
5002     else if( i2.aPoslist ){
5003       fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
5004       fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist);
5005     }
5006 
5007     fts5BufferSet(&p->rc, p1, out.n, out.p);
5008     fts5BufferFree(&tmp);
5009     fts5BufferFree(&out);
5010   }
5011 }
5012 
fts5SetupPrefixIter(Fts5Index * p,int bDesc,const u8 * pToken,int nToken,Fts5Colset * pColset,Fts5Iter ** ppIter)5013 static void fts5SetupPrefixIter(
5014   Fts5Index *p,                   /* Index to read from */
5015   int bDesc,                      /* True for "ORDER BY rowid DESC" */
5016   const u8 *pToken,               /* Buffer containing prefix to match */
5017   int nToken,                     /* Size of buffer pToken in bytes */
5018   Fts5Colset *pColset,            /* Restrict matches to these columns */
5019   Fts5Iter **ppIter          /* OUT: New iterator */
5020 ){
5021   Fts5Structure *pStruct;
5022   Fts5Buffer *aBuf;
5023   const int nBuf = 32;
5024 
5025   void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*);
5026   void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*);
5027   if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
5028     xMerge = fts5MergeRowidLists;
5029     xAppend = fts5AppendRowid;
5030   }else{
5031     xMerge = fts5MergePrefixLists;
5032     xAppend = fts5AppendPoslist;
5033   }
5034 
5035   aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
5036   pStruct = fts5StructureRead(p);
5037 
5038   if( aBuf && pStruct ){
5039     const int flags = FTS5INDEX_QUERY_SCAN
5040                     | FTS5INDEX_QUERY_SKIPEMPTY
5041                     | FTS5INDEX_QUERY_NOOUTPUT;
5042     int i;
5043     i64 iLastRowid = 0;
5044     Fts5Iter *p1 = 0;     /* Iterator used to gather data from index */
5045     Fts5Data *pData;
5046     Fts5Buffer doclist;
5047     int bNewTerm = 1;
5048 
5049     memset(&doclist, 0, sizeof(doclist));
5050     fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
5051     fts5IterSetOutputCb(&p->rc, p1);
5052     for( /* no-op */ ;
5053         fts5MultiIterEof(p, p1)==0;
5054         fts5MultiIterNext2(p, p1, &bNewTerm)
5055     ){
5056       Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
5057       int nTerm = pSeg->term.n;
5058       const u8 *pTerm = pSeg->term.p;
5059       p1->xSetOutputs(p1, pSeg);
5060 
5061       assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
5062       if( bNewTerm ){
5063         if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
5064       }
5065 
5066       if( p1->base.nData==0 ) continue;
5067 
5068       if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
5069         for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
5070           assert( i<nBuf );
5071           if( aBuf[i].n==0 ){
5072             fts5BufferSwap(&doclist, &aBuf[i]);
5073             fts5BufferZero(&doclist);
5074           }else{
5075             xMerge(p, &doclist, &aBuf[i]);
5076             fts5BufferZero(&aBuf[i]);
5077           }
5078         }
5079         iLastRowid = 0;
5080       }
5081 
5082       xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
5083       iLastRowid = p1->base.iRowid;
5084     }
5085 
5086     for(i=0; i<nBuf; i++){
5087       if( p->rc==SQLITE_OK ){
5088         xMerge(p, &doclist, &aBuf[i]);
5089       }
5090       fts5BufferFree(&aBuf[i]);
5091     }
5092     fts5MultiIterFree(p1);
5093 
5094     pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
5095     if( pData ){
5096       pData->p = (u8*)&pData[1];
5097       pData->nn = pData->szLeaf = doclist.n;
5098       if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
5099       fts5MultiIterNew2(p, pData, bDesc, ppIter);
5100     }
5101     fts5BufferFree(&doclist);
5102   }
5103 
5104   fts5StructureRelease(pStruct);
5105   sqlite3_free(aBuf);
5106 }
5107 
5108 
5109 /*
5110 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
5111 ** to the document with rowid iRowid.
5112 */
sqlite3Fts5IndexBeginWrite(Fts5Index * p,int bDelete,i64 iRowid)5113 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
5114   assert( p->rc==SQLITE_OK );
5115 
5116   /* Allocate the hash table if it has not already been allocated */
5117   if( p->pHash==0 ){
5118     p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
5119   }
5120 
5121   /* Flush the hash table to disk if required */
5122   if( iRowid<p->iWriteRowid
5123    || (iRowid==p->iWriteRowid && p->bDelete==0)
5124    || (p->nPendingData > p->pConfig->nHashSize)
5125   ){
5126     fts5IndexFlush(p);
5127   }
5128 
5129   p->iWriteRowid = iRowid;
5130   p->bDelete = bDelete;
5131   return fts5IndexReturn(p);
5132 }
5133 
5134 /*
5135 ** Commit data to disk.
5136 */
sqlite3Fts5IndexSync(Fts5Index * p)5137 int sqlite3Fts5IndexSync(Fts5Index *p){
5138   assert( p->rc==SQLITE_OK );
5139   fts5IndexFlush(p);
5140   fts5CloseReader(p);
5141   return fts5IndexReturn(p);
5142 }
5143 
5144 /*
5145 ** Discard any data stored in the in-memory hash tables. Do not write it
5146 ** to the database. Additionally, assume that the contents of the %_data
5147 ** table may have changed on disk. So any in-memory caches of %_data
5148 ** records must be invalidated.
5149 */
sqlite3Fts5IndexRollback(Fts5Index * p)5150 int sqlite3Fts5IndexRollback(Fts5Index *p){
5151   fts5CloseReader(p);
5152   fts5IndexDiscardData(p);
5153   fts5StructureInvalidate(p);
5154   /* assert( p->rc==SQLITE_OK ); */
5155   return SQLITE_OK;
5156 }
5157 
5158 /*
5159 ** The %_data table is completely empty when this function is called. This
5160 ** function populates it with the initial structure objects for each index,
5161 ** and the initial version of the "averages" record (a zero-byte blob).
5162 */
sqlite3Fts5IndexReinit(Fts5Index * p)5163 int sqlite3Fts5IndexReinit(Fts5Index *p){
5164   Fts5Structure s;
5165   fts5StructureInvalidate(p);
5166   memset(&s, 0, sizeof(Fts5Structure));
5167   fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
5168   fts5StructureWrite(p, &s);
5169   return fts5IndexReturn(p);
5170 }
5171 
5172 /*
5173 ** Open a new Fts5Index handle. If the bCreate argument is true, create
5174 ** and initialize the underlying %_data table.
5175 **
5176 ** If successful, set *pp to point to the new object and return SQLITE_OK.
5177 ** Otherwise, set *pp to NULL and return an SQLite error code.
5178 */
sqlite3Fts5IndexOpen(Fts5Config * pConfig,int bCreate,Fts5Index ** pp,char ** pzErr)5179 int sqlite3Fts5IndexOpen(
5180   Fts5Config *pConfig,
5181   int bCreate,
5182   Fts5Index **pp,
5183   char **pzErr
5184 ){
5185   int rc = SQLITE_OK;
5186   Fts5Index *p;                   /* New object */
5187 
5188   *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
5189   if( rc==SQLITE_OK ){
5190     p->pConfig = pConfig;
5191     p->nWorkUnit = FTS5_WORK_UNIT;
5192     p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
5193     if( p->zDataTbl && bCreate ){
5194       rc = sqlite3Fts5CreateTable(
5195           pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
5196       );
5197       if( rc==SQLITE_OK ){
5198         rc = sqlite3Fts5CreateTable(pConfig, "idx",
5199             "segid, term, pgno, PRIMARY KEY(segid, term)",
5200             1, pzErr
5201         );
5202       }
5203       if( rc==SQLITE_OK ){
5204         rc = sqlite3Fts5IndexReinit(p);
5205       }
5206     }
5207   }
5208 
5209   assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
5210   if( rc ){
5211     sqlite3Fts5IndexClose(p);
5212     *pp = 0;
5213   }
5214   return rc;
5215 }
5216 
5217 /*
5218 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
5219 */
sqlite3Fts5IndexClose(Fts5Index * p)5220 int sqlite3Fts5IndexClose(Fts5Index *p){
5221   int rc = SQLITE_OK;
5222   if( p ){
5223     assert( p->pReader==0 );
5224     fts5StructureInvalidate(p);
5225     sqlite3_finalize(p->pWriter);
5226     sqlite3_finalize(p->pDeleter);
5227     sqlite3_finalize(p->pIdxWriter);
5228     sqlite3_finalize(p->pIdxDeleter);
5229     sqlite3_finalize(p->pIdxSelect);
5230     sqlite3_finalize(p->pDataVersion);
5231     sqlite3Fts5HashFree(p->pHash);
5232     sqlite3_free(p->zDataTbl);
5233     sqlite3_free(p);
5234   }
5235   return rc;
5236 }
5237 
5238 /*
5239 ** Argument p points to a buffer containing utf-8 text that is n bytes in
5240 ** size. Return the number of bytes in the nChar character prefix of the
5241 ** buffer, or 0 if there are less than nChar characters in total.
5242 */
sqlite3Fts5IndexCharlenToBytelen(const char * p,int nByte,int nChar)5243 int sqlite3Fts5IndexCharlenToBytelen(
5244   const char *p,
5245   int nByte,
5246   int nChar
5247 ){
5248   int n = 0;
5249   int i;
5250   for(i=0; i<nChar; i++){
5251     if( n>=nByte ) return 0;      /* Input contains fewer than nChar chars */
5252     if( (unsigned char)p[n++]>=0xc0 ){
5253       while( (p[n] & 0xc0)==0x80 ) n++;
5254     }
5255   }
5256   return n;
5257 }
5258 
5259 /*
5260 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
5261 ** unicode characters in the string.
5262 */
fts5IndexCharlen(const char * pIn,int nIn)5263 static int fts5IndexCharlen(const char *pIn, int nIn){
5264   int nChar = 0;
5265   int i = 0;
5266   while( i<nIn ){
5267     if( (unsigned char)pIn[i++]>=0xc0 ){
5268       while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
5269     }
5270     nChar++;
5271   }
5272   return nChar;
5273 }
5274 
5275 /*
5276 ** Insert or remove data to or from the index. Each time a document is
5277 ** added to or removed from the index, this function is called one or more
5278 ** times.
5279 **
5280 ** For an insert, it must be called once for each token in the new document.
5281 ** If the operation is a delete, it must be called (at least) once for each
5282 ** unique token in the document with an iCol value less than zero. The iPos
5283 ** argument is ignored for a delete.
5284 */
sqlite3Fts5IndexWrite(Fts5Index * p,int iCol,int iPos,const char * pToken,int nToken)5285 int sqlite3Fts5IndexWrite(
5286   Fts5Index *p,                   /* Index to write to */
5287   int iCol,                       /* Column token appears in (-ve -> delete) */
5288   int iPos,                       /* Position of token within column */
5289   const char *pToken, int nToken  /* Token to add or remove to or from index */
5290 ){
5291   int i;                          /* Used to iterate through indexes */
5292   int rc = SQLITE_OK;             /* Return code */
5293   Fts5Config *pConfig = p->pConfig;
5294 
5295   assert( p->rc==SQLITE_OK );
5296   assert( (iCol<0)==p->bDelete );
5297 
5298   /* Add the entry to the main terms index. */
5299   rc = sqlite3Fts5HashWrite(
5300       p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
5301   );
5302 
5303   for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
5304     const int nChar = pConfig->aPrefix[i];
5305     int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
5306     if( nByte ){
5307       rc = sqlite3Fts5HashWrite(p->pHash,
5308           p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
5309           nByte
5310       );
5311     }
5312   }
5313 
5314   return rc;
5315 }
5316 
5317 /*
5318 ** Open a new iterator to iterate though all rowid that match the
5319 ** specified token or token prefix.
5320 */
sqlite3Fts5IndexQuery(Fts5Index * p,const char * pToken,int nToken,int flags,Fts5Colset * pColset,Fts5IndexIter ** ppIter)5321 int sqlite3Fts5IndexQuery(
5322   Fts5Index *p,                   /* FTS index to query */
5323   const char *pToken, int nToken, /* Token (or prefix) to query for */
5324   int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
5325   Fts5Colset *pColset,            /* Match these columns only */
5326   Fts5IndexIter **ppIter          /* OUT: New iterator object */
5327 ){
5328   Fts5Config *pConfig = p->pConfig;
5329   Fts5Iter *pRet = 0;
5330   Fts5Buffer buf = {0, 0, 0};
5331 
5332   /* If the QUERY_SCAN flag is set, all other flags must be clear. */
5333   assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
5334 
5335   if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
5336     int iIdx = 0;                 /* Index to search */
5337     if( nToken ) memcpy(&buf.p[1], pToken, nToken);
5338 
5339     /* Figure out which index to search and set iIdx accordingly. If this
5340     ** is a prefix query for which there is no prefix index, set iIdx to
5341     ** greater than pConfig->nPrefix to indicate that the query will be
5342     ** satisfied by scanning multiple terms in the main index.
5343     **
5344     ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
5345     ** prefix-query. Instead of using a prefix-index (if one exists),
5346     ** evaluate the prefix query using the main FTS index. This is used
5347     ** for internal sanity checking by the integrity-check in debug
5348     ** mode only.  */
5349 #ifdef SQLITE_DEBUG
5350     if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
5351       assert( flags & FTS5INDEX_QUERY_PREFIX );
5352       iIdx = 1+pConfig->nPrefix;
5353     }else
5354 #endif
5355     if( flags & FTS5INDEX_QUERY_PREFIX ){
5356       int nChar = fts5IndexCharlen(pToken, nToken);
5357       for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
5358         if( pConfig->aPrefix[iIdx-1]==nChar ) break;
5359       }
5360     }
5361 
5362     if( iIdx<=pConfig->nPrefix ){
5363       /* Straight index lookup */
5364       Fts5Structure *pStruct = fts5StructureRead(p);
5365       buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
5366       if( pStruct ){
5367         fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
5368             pColset, buf.p, nToken+1, -1, 0, &pRet
5369         );
5370         fts5StructureRelease(pStruct);
5371       }
5372     }else{
5373       /* Scan multiple terms in the main index */
5374       int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
5375       buf.p[0] = FTS5_MAIN_PREFIX;
5376       fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
5377       assert( p->rc!=SQLITE_OK || pRet->pColset==0 );
5378       fts5IterSetOutputCb(&p->rc, pRet);
5379       if( p->rc==SQLITE_OK ){
5380         Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
5381         if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
5382       }
5383     }
5384 
5385     if( p->rc ){
5386       sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
5387       pRet = 0;
5388       fts5CloseReader(p);
5389     }
5390 
5391     *ppIter = &pRet->base;
5392     sqlite3Fts5BufferFree(&buf);
5393   }
5394   return fts5IndexReturn(p);
5395 }
5396 
5397 /*
5398 ** Return true if the iterator passed as the only argument is at EOF.
5399 */
5400 /*
5401 ** Move to the next matching rowid.
5402 */
sqlite3Fts5IterNext(Fts5IndexIter * pIndexIter)5403 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
5404   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5405   assert( pIter->pIndex->rc==SQLITE_OK );
5406   fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
5407   return fts5IndexReturn(pIter->pIndex);
5408 }
5409 
5410 /*
5411 ** Move to the next matching term/rowid. Used by the fts5vocab module.
5412 */
sqlite3Fts5IterNextScan(Fts5IndexIter * pIndexIter)5413 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
5414   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5415   Fts5Index *p = pIter->pIndex;
5416 
5417   assert( pIter->pIndex->rc==SQLITE_OK );
5418 
5419   fts5MultiIterNext(p, pIter, 0, 0);
5420   if( p->rc==SQLITE_OK ){
5421     Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
5422     if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
5423       fts5DataRelease(pSeg->pLeaf);
5424       pSeg->pLeaf = 0;
5425       pIter->base.bEof = 1;
5426     }
5427   }
5428 
5429   return fts5IndexReturn(pIter->pIndex);
5430 }
5431 
5432 /*
5433 ** Move to the next matching rowid that occurs at or after iMatch. The
5434 ** definition of "at or after" depends on whether this iterator iterates
5435 ** in ascending or descending rowid order.
5436 */
sqlite3Fts5IterNextFrom(Fts5IndexIter * pIndexIter,i64 iMatch)5437 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
5438   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5439   fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
5440   return fts5IndexReturn(pIter->pIndex);
5441 }
5442 
5443 /*
5444 ** Return the current term.
5445 */
sqlite3Fts5IterTerm(Fts5IndexIter * pIndexIter,int * pn)5446 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
5447   int n;
5448   const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
5449   *pn = n-1;
5450   return &z[1];
5451 }
5452 
5453 /*
5454 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
5455 */
sqlite3Fts5IterClose(Fts5IndexIter * pIndexIter)5456 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
5457   if( pIndexIter ){
5458     Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5459     Fts5Index *pIndex = pIter->pIndex;
5460     fts5MultiIterFree(pIter);
5461     fts5CloseReader(pIndex);
5462   }
5463 }
5464 
5465 /*
5466 ** Read and decode the "averages" record from the database.
5467 **
5468 ** Parameter anSize must point to an array of size nCol, where nCol is
5469 ** the number of user defined columns in the FTS table.
5470 */
sqlite3Fts5IndexGetAverages(Fts5Index * p,i64 * pnRow,i64 * anSize)5471 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
5472   int nCol = p->pConfig->nCol;
5473   Fts5Data *pData;
5474 
5475   *pnRow = 0;
5476   memset(anSize, 0, sizeof(i64) * nCol);
5477   pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
5478   if( p->rc==SQLITE_OK && pData->nn ){
5479     int i = 0;
5480     int iCol;
5481     i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
5482     for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
5483       i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
5484     }
5485   }
5486 
5487   fts5DataRelease(pData);
5488   return fts5IndexReturn(p);
5489 }
5490 
5491 /*
5492 ** Replace the current "averages" record with the contents of the buffer
5493 ** supplied as the second argument.
5494 */
sqlite3Fts5IndexSetAverages(Fts5Index * p,const u8 * pData,int nData)5495 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
5496   assert( p->rc==SQLITE_OK );
5497   fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
5498   return fts5IndexReturn(p);
5499 }
5500 
5501 /*
5502 ** Return the total number of blocks this module has read from the %_data
5503 ** table since it was created.
5504 */
sqlite3Fts5IndexReads(Fts5Index * p)5505 int sqlite3Fts5IndexReads(Fts5Index *p){
5506   return p->nRead;
5507 }
5508 
5509 /*
5510 ** Set the 32-bit cookie value stored at the start of all structure
5511 ** records to the value passed as the second argument.
5512 **
5513 ** Return SQLITE_OK if successful, or an SQLite error code if an error
5514 ** occurs.
5515 */
sqlite3Fts5IndexSetCookie(Fts5Index * p,int iNew)5516 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
5517   int rc;                              /* Return code */
5518   Fts5Config *pConfig = p->pConfig;    /* Configuration object */
5519   u8 aCookie[4];                       /* Binary representation of iNew */
5520   sqlite3_blob *pBlob = 0;
5521 
5522   assert( p->rc==SQLITE_OK );
5523   sqlite3Fts5Put32(aCookie, iNew);
5524 
5525   rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
5526       "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
5527   );
5528   if( rc==SQLITE_OK ){
5529     sqlite3_blob_write(pBlob, aCookie, 4, 0);
5530     rc = sqlite3_blob_close(pBlob);
5531   }
5532 
5533   return rc;
5534 }
5535 
sqlite3Fts5IndexLoadConfig(Fts5Index * p)5536 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
5537   Fts5Structure *pStruct;
5538   pStruct = fts5StructureRead(p);
5539   fts5StructureRelease(pStruct);
5540   return fts5IndexReturn(p);
5541 }
5542 
5543 
5544 /*************************************************************************
5545 **************************************************************************
5546 ** Below this point is the implementation of the integrity-check
5547 ** functionality.
5548 */
5549 
5550 /*
5551 ** Return a simple checksum value based on the arguments.
5552 */
sqlite3Fts5IndexEntryCksum(i64 iRowid,int iCol,int iPos,int iIdx,const char * pTerm,int nTerm)5553 u64 sqlite3Fts5IndexEntryCksum(
5554   i64 iRowid,
5555   int iCol,
5556   int iPos,
5557   int iIdx,
5558   const char *pTerm,
5559   int nTerm
5560 ){
5561   int i;
5562   u64 ret = iRowid;
5563   ret += (ret<<3) + iCol;
5564   ret += (ret<<3) + iPos;
5565   if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
5566   for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
5567   return ret;
5568 }
5569 
5570 #ifdef SQLITE_DEBUG
5571 /*
5572 ** This function is purely an internal test. It does not contribute to
5573 ** FTS functionality, or even the integrity-check, in any way.
5574 **
5575 ** Instead, it tests that the same set of pgno/rowid combinations are
5576 ** visited regardless of whether the doclist-index identified by parameters
5577 ** iSegid/iLeaf is iterated in forwards or reverse order.
5578 */
fts5TestDlidxReverse(Fts5Index * p,int iSegid,int iLeaf)5579 static void fts5TestDlidxReverse(
5580   Fts5Index *p,
5581   int iSegid,                     /* Segment id to load from */
5582   int iLeaf                       /* Load doclist-index for this leaf */
5583 ){
5584   Fts5DlidxIter *pDlidx = 0;
5585   u64 cksum1 = 13;
5586   u64 cksum2 = 13;
5587 
5588   for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
5589       fts5DlidxIterEof(p, pDlidx)==0;
5590       fts5DlidxIterNext(p, pDlidx)
5591   ){
5592     i64 iRowid = fts5DlidxIterRowid(pDlidx);
5593     int pgno = fts5DlidxIterPgno(pDlidx);
5594     assert( pgno>iLeaf );
5595     cksum1 += iRowid + ((i64)pgno<<32);
5596   }
5597   fts5DlidxIterFree(pDlidx);
5598   pDlidx = 0;
5599 
5600   for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
5601       fts5DlidxIterEof(p, pDlidx)==0;
5602       fts5DlidxIterPrev(p, pDlidx)
5603   ){
5604     i64 iRowid = fts5DlidxIterRowid(pDlidx);
5605     int pgno = fts5DlidxIterPgno(pDlidx);
5606     assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
5607     cksum2 += iRowid + ((i64)pgno<<32);
5608   }
5609   fts5DlidxIterFree(pDlidx);
5610   pDlidx = 0;
5611 
5612   if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
5613 }
5614 
fts5QueryCksum(Fts5Index * p,int iIdx,const char * z,int n,int flags,u64 * pCksum)5615 static int fts5QueryCksum(
5616   Fts5Index *p,                   /* Fts5 index object */
5617   int iIdx,
5618   const char *z,                  /* Index key to query for */
5619   int n,                          /* Size of index key in bytes */
5620   int flags,                      /* Flags for Fts5IndexQuery */
5621   u64 *pCksum                     /* IN/OUT: Checksum value */
5622 ){
5623   int eDetail = p->pConfig->eDetail;
5624   u64 cksum = *pCksum;
5625   Fts5IndexIter *pIter = 0;
5626   int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
5627 
5628   while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
5629     i64 rowid = pIter->iRowid;
5630 
5631     if( eDetail==FTS5_DETAIL_NONE ){
5632       cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
5633     }else{
5634       Fts5PoslistReader sReader;
5635       for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
5636           sReader.bEof==0;
5637           sqlite3Fts5PoslistReaderNext(&sReader)
5638       ){
5639         int iCol = FTS5_POS2COLUMN(sReader.iPos);
5640         int iOff = FTS5_POS2OFFSET(sReader.iPos);
5641         cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
5642       }
5643     }
5644     if( rc==SQLITE_OK ){
5645       rc = sqlite3Fts5IterNext(pIter);
5646     }
5647   }
5648   sqlite3Fts5IterClose(pIter);
5649 
5650   *pCksum = cksum;
5651   return rc;
5652 }
5653 
5654 
5655 /*
5656 ** This function is also purely an internal test. It does not contribute to
5657 ** FTS functionality, or even the integrity-check, in any way.
5658 */
fts5TestTerm(Fts5Index * p,Fts5Buffer * pPrev,const char * z,int n,u64 expected,u64 * pCksum)5659 static void fts5TestTerm(
5660   Fts5Index *p,
5661   Fts5Buffer *pPrev,              /* Previous term */
5662   const char *z, int n,           /* Possibly new term to test */
5663   u64 expected,
5664   u64 *pCksum
5665 ){
5666   int rc = p->rc;
5667   if( pPrev->n==0 ){
5668     fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5669   }else
5670   if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
5671     u64 cksum3 = *pCksum;
5672     const char *zTerm = (const char*)&pPrev->p[1];  /* term sans prefix-byte */
5673     int nTerm = pPrev->n-1;            /* Size of zTerm in bytes */
5674     int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
5675     int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
5676     u64 ck1 = 0;
5677     u64 ck2 = 0;
5678 
5679     /* Check that the results returned for ASC and DESC queries are
5680     ** the same. If not, call this corruption.  */
5681     rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
5682     if( rc==SQLITE_OK ){
5683       int f = flags|FTS5INDEX_QUERY_DESC;
5684       rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5685     }
5686     if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5687 
5688     /* If this is a prefix query, check that the results returned if the
5689     ** the index is disabled are the same. In both ASC and DESC order.
5690     **
5691     ** This check may only be performed if the hash table is empty. This
5692     ** is because the hash table only supports a single scan query at
5693     ** a time, and the multi-iter loop from which this function is called
5694     ** is already performing such a scan. */
5695     if( p->nPendingData==0 ){
5696       if( iIdx>0 && rc==SQLITE_OK ){
5697         int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
5698         ck2 = 0;
5699         rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5700         if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5701       }
5702       if( iIdx>0 && rc==SQLITE_OK ){
5703         int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
5704         ck2 = 0;
5705         rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5706         if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5707       }
5708     }
5709 
5710     cksum3 ^= ck1;
5711     fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5712 
5713     if( rc==SQLITE_OK && cksum3!=expected ){
5714       rc = FTS5_CORRUPT;
5715     }
5716     *pCksum = cksum3;
5717   }
5718   p->rc = rc;
5719 }
5720 
5721 #else
5722 # define fts5TestDlidxReverse(x,y,z)
5723 # define fts5TestTerm(u,v,w,x,y,z)
5724 #endif
5725 
5726 /*
5727 ** Check that:
5728 **
5729 **   1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
5730 **      contain zero terms.
5731 **   2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
5732 **      contain zero rowids.
5733 */
fts5IndexIntegrityCheckEmpty(Fts5Index * p,Fts5StructureSegment * pSeg,int iFirst,int iNoRowid,int iLast)5734 static void fts5IndexIntegrityCheckEmpty(
5735   Fts5Index *p,
5736   Fts5StructureSegment *pSeg,     /* Segment to check internal consistency */
5737   int iFirst,
5738   int iNoRowid,
5739   int iLast
5740 ){
5741   int i;
5742 
5743   /* Now check that the iter.nEmpty leaves following the current leaf
5744   ** (a) exist and (b) contain no terms. */
5745   for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
5746     Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
5747     if( pLeaf ){
5748       if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
5749       if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
5750     }
5751     fts5DataRelease(pLeaf);
5752   }
5753 }
5754 
fts5IntegrityCheckPgidx(Fts5Index * p,Fts5Data * pLeaf)5755 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
5756   int iTermOff = 0;
5757   int ii;
5758 
5759   Fts5Buffer buf1 = {0,0,0};
5760   Fts5Buffer buf2 = {0,0,0};
5761 
5762   ii = pLeaf->szLeaf;
5763   while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
5764     int res;
5765     int iOff;
5766     int nIncr;
5767 
5768     ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
5769     iTermOff += nIncr;
5770     iOff = iTermOff;
5771 
5772     if( iOff>=pLeaf->szLeaf ){
5773       p->rc = FTS5_CORRUPT;
5774     }else if( iTermOff==nIncr ){
5775       int nByte;
5776       iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
5777       if( (iOff+nByte)>pLeaf->szLeaf ){
5778         p->rc = FTS5_CORRUPT;
5779       }else{
5780         fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
5781       }
5782     }else{
5783       int nKeep, nByte;
5784       iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
5785       iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
5786       if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
5787         p->rc = FTS5_CORRUPT;
5788       }else{
5789         buf1.n = nKeep;
5790         fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
5791       }
5792 
5793       if( p->rc==SQLITE_OK ){
5794         res = fts5BufferCompare(&buf1, &buf2);
5795         if( res<=0 ) p->rc = FTS5_CORRUPT;
5796       }
5797     }
5798     fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
5799   }
5800 
5801   fts5BufferFree(&buf1);
5802   fts5BufferFree(&buf2);
5803 }
5804 
fts5IndexIntegrityCheckSegment(Fts5Index * p,Fts5StructureSegment * pSeg)5805 static void fts5IndexIntegrityCheckSegment(
5806   Fts5Index *p,                   /* FTS5 backend object */
5807   Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
5808 ){
5809   Fts5Config *pConfig = p->pConfig;
5810   sqlite3_stmt *pStmt = 0;
5811   int rc2;
5812   int iIdxPrevLeaf = pSeg->pgnoFirst-1;
5813   int iDlidxPrevLeaf = pSeg->pgnoLast;
5814 
5815   if( pSeg->pgnoFirst==0 ) return;
5816 
5817   fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
5818       "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d",
5819       pConfig->zDb, pConfig->zName, pSeg->iSegid
5820   ));
5821 
5822   /* Iterate through the b-tree hierarchy.  */
5823   while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
5824     i64 iRow;                     /* Rowid for this leaf */
5825     Fts5Data *pLeaf;              /* Data for this leaf */
5826 
5827     int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
5828     const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
5829     int iIdxLeaf = sqlite3_column_int(pStmt, 2);
5830     int bIdxDlidx = sqlite3_column_int(pStmt, 3);
5831 
5832     /* If the leaf in question has already been trimmed from the segment,
5833     ** ignore this b-tree entry. Otherwise, load it into memory. */
5834     if( iIdxLeaf<pSeg->pgnoFirst ) continue;
5835     iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
5836     pLeaf = fts5LeafRead(p, iRow);
5837     if( pLeaf==0 ) break;
5838 
5839     /* Check that the leaf contains at least one term, and that it is equal
5840     ** to or larger than the split-key in zIdxTerm.  Also check that if there
5841     ** is also a rowid pointer within the leaf page header, it points to a
5842     ** location before the term.  */
5843     if( pLeaf->nn<=pLeaf->szLeaf ){
5844       p->rc = FTS5_CORRUPT;
5845     }else{
5846       int iOff;                   /* Offset of first term on leaf */
5847       int iRowidOff;              /* Offset of first rowid on leaf */
5848       int nTerm;                  /* Size of term on leaf in bytes */
5849       int res;                    /* Comparison of term and split-key */
5850 
5851       iOff = fts5LeafFirstTermOff(pLeaf);
5852       iRowidOff = fts5LeafFirstRowidOff(pLeaf);
5853       if( iRowidOff>=iOff ){
5854         p->rc = FTS5_CORRUPT;
5855       }else{
5856         iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
5857         res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
5858         if( res==0 ) res = nTerm - nIdxTerm;
5859         if( res<0 ) p->rc = FTS5_CORRUPT;
5860       }
5861 
5862       fts5IntegrityCheckPgidx(p, pLeaf);
5863     }
5864     fts5DataRelease(pLeaf);
5865     if( p->rc ) break;
5866 
5867     /* Now check that the iter.nEmpty leaves following the current leaf
5868     ** (a) exist and (b) contain no terms. */
5869     fts5IndexIntegrityCheckEmpty(
5870         p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
5871     );
5872     if( p->rc ) break;
5873 
5874     /* If there is a doclist-index, check that it looks right. */
5875     if( bIdxDlidx ){
5876       Fts5DlidxIter *pDlidx = 0;  /* For iterating through doclist index */
5877       int iPrevLeaf = iIdxLeaf;
5878       int iSegid = pSeg->iSegid;
5879       int iPg = 0;
5880       i64 iKey;
5881 
5882       for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
5883           fts5DlidxIterEof(p, pDlidx)==0;
5884           fts5DlidxIterNext(p, pDlidx)
5885       ){
5886 
5887         /* Check any rowid-less pages that occur before the current leaf. */
5888         for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
5889           iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
5890           pLeaf = fts5DataRead(p, iKey);
5891           if( pLeaf ){
5892             if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
5893             fts5DataRelease(pLeaf);
5894           }
5895         }
5896         iPrevLeaf = fts5DlidxIterPgno(pDlidx);
5897 
5898         /* Check that the leaf page indicated by the iterator really does
5899         ** contain the rowid suggested by the same. */
5900         iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
5901         pLeaf = fts5DataRead(p, iKey);
5902         if( pLeaf ){
5903           i64 iRowid;
5904           int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
5905           ASSERT_SZLEAF_OK(pLeaf);
5906           if( iRowidOff>=pLeaf->szLeaf ){
5907             p->rc = FTS5_CORRUPT;
5908           }else{
5909             fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
5910             if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
5911           }
5912           fts5DataRelease(pLeaf);
5913         }
5914       }
5915 
5916       iDlidxPrevLeaf = iPg;
5917       fts5DlidxIterFree(pDlidx);
5918       fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
5919     }else{
5920       iDlidxPrevLeaf = pSeg->pgnoLast;
5921       /* TODO: Check there is no doclist index */
5922     }
5923 
5924     iIdxPrevLeaf = iIdxLeaf;
5925   }
5926 
5927   rc2 = sqlite3_finalize(pStmt);
5928   if( p->rc==SQLITE_OK ) p->rc = rc2;
5929 
5930   /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
5931 #if 0
5932   if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
5933     p->rc = FTS5_CORRUPT;
5934   }
5935 #endif
5936 }
5937 
5938 
5939 /*
5940 ** Run internal checks to ensure that the FTS index (a) is internally
5941 ** consistent and (b) contains entries for which the XOR of the checksums
5942 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
5943 **
5944 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
5945 ** checksum does not match. Return SQLITE_OK if all checks pass without
5946 ** error, or some other SQLite error code if another error (e.g. OOM)
5947 ** occurs.
5948 */
sqlite3Fts5IndexIntegrityCheck(Fts5Index * p,u64 cksum)5949 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
5950   int eDetail = p->pConfig->eDetail;
5951   u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
5952   Fts5Buffer poslist = {0,0,0};   /* Buffer used to hold a poslist */
5953   Fts5Iter *pIter;                /* Used to iterate through entire index */
5954   Fts5Structure *pStruct;         /* Index structure */
5955 
5956 #ifdef SQLITE_DEBUG
5957   /* Used by extra internal tests only run if NDEBUG is not defined */
5958   u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
5959   Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */
5960 #endif
5961   const int flags = FTS5INDEX_QUERY_NOOUTPUT;
5962 
5963   /* Load the FTS index structure */
5964   pStruct = fts5StructureRead(p);
5965 
5966   /* Check that the internal nodes of each segment match the leaves */
5967   if( pStruct ){
5968     int iLvl, iSeg;
5969     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
5970       for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
5971         Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
5972         fts5IndexIntegrityCheckSegment(p, pSeg);
5973       }
5974     }
5975   }
5976 
5977   /* The cksum argument passed to this function is a checksum calculated
5978   ** based on all expected entries in the FTS index (including prefix index
5979   ** entries). This block checks that a checksum calculated based on the
5980   ** actual contents of FTS index is identical.
5981   **
5982   ** Two versions of the same checksum are calculated. The first (stack
5983   ** variable cksum2) based on entries extracted from the full-text index
5984   ** while doing a linear scan of each individual index in turn.
5985   **
5986   ** As each term visited by the linear scans, a separate query for the
5987   ** same term is performed. cksum3 is calculated based on the entries
5988   ** extracted by these queries.
5989   */
5990   for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
5991       fts5MultiIterEof(p, pIter)==0;
5992       fts5MultiIterNext(p, pIter, 0, 0)
5993   ){
5994     int n;                      /* Size of term in bytes */
5995     i64 iPos = 0;               /* Position read from poslist */
5996     int iOff = 0;               /* Offset within poslist */
5997     i64 iRowid = fts5MultiIterRowid(pIter);
5998     char *z = (char*)fts5MultiIterTerm(pIter, &n);
5999 
6000     /* If this is a new term, query for it. Update cksum3 with the results. */
6001     fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
6002 
6003     if( eDetail==FTS5_DETAIL_NONE ){
6004       if( 0==fts5MultiIterIsEmpty(p, pIter) ){
6005         cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
6006       }
6007     }else{
6008       poslist.n = 0;
6009       fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
6010       while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
6011         int iCol = FTS5_POS2COLUMN(iPos);
6012         int iTokOff = FTS5_POS2OFFSET(iPos);
6013         cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
6014       }
6015     }
6016   }
6017   fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
6018 
6019   fts5MultiIterFree(pIter);
6020   if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
6021 
6022   fts5StructureRelease(pStruct);
6023 #ifdef SQLITE_DEBUG
6024   fts5BufferFree(&term);
6025 #endif
6026   fts5BufferFree(&poslist);
6027   return fts5IndexReturn(p);
6028 }
6029 
6030 /*************************************************************************
6031 **************************************************************************
6032 ** Below this point is the implementation of the fts5_decode() scalar
6033 ** function only.
6034 */
6035 
6036 /*
6037 ** Decode a segment-data rowid from the %_data table. This function is
6038 ** the opposite of macro FTS5_SEGMENT_ROWID().
6039 */
fts5DecodeRowid(i64 iRowid,int * piSegid,int * pbDlidx,int * piHeight,int * piPgno)6040 static void fts5DecodeRowid(
6041   i64 iRowid,                     /* Rowid from %_data table */
6042   int *piSegid,                   /* OUT: Segment id */
6043   int *pbDlidx,                   /* OUT: Dlidx flag */
6044   int *piHeight,                  /* OUT: Height */
6045   int *piPgno                     /* OUT: Page number */
6046 ){
6047   *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
6048   iRowid >>= FTS5_DATA_PAGE_B;
6049 
6050   *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
6051   iRowid >>= FTS5_DATA_HEIGHT_B;
6052 
6053   *pbDlidx = (int)(iRowid & 0x0001);
6054   iRowid >>= FTS5_DATA_DLI_B;
6055 
6056   *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
6057 }
6058 
fts5DebugRowid(int * pRc,Fts5Buffer * pBuf,i64 iKey)6059 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
6060   int iSegid, iHeight, iPgno, bDlidx;       /* Rowid compenents */
6061   fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
6062 
6063   if( iSegid==0 ){
6064     if( iKey==FTS5_AVERAGES_ROWID ){
6065       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
6066     }else{
6067       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
6068     }
6069   }
6070   else{
6071     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
6072         bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
6073     );
6074   }
6075 }
6076 
fts5DebugStructure(int * pRc,Fts5Buffer * pBuf,Fts5Structure * p)6077 static void fts5DebugStructure(
6078   int *pRc,                       /* IN/OUT: error code */
6079   Fts5Buffer *pBuf,
6080   Fts5Structure *p
6081 ){
6082   int iLvl, iSeg;                 /* Iterate through levels, segments */
6083 
6084   for(iLvl=0; iLvl<p->nLevel; iLvl++){
6085     Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
6086     sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
6087         " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
6088     );
6089     for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
6090       Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
6091       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
6092           pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
6093       );
6094     }
6095     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
6096   }
6097 }
6098 
6099 /*
6100 ** This is part of the fts5_decode() debugging aid.
6101 **
6102 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
6103 ** function appends a human-readable representation of the same object
6104 ** to the buffer passed as the second argument.
6105 */
fts5DecodeStructure(int * pRc,Fts5Buffer * pBuf,const u8 * pBlob,int nBlob)6106 static void fts5DecodeStructure(
6107   int *pRc,                       /* IN/OUT: error code */
6108   Fts5Buffer *pBuf,
6109   const u8 *pBlob, int nBlob
6110 ){
6111   int rc;                         /* Return code */
6112   Fts5Structure *p = 0;           /* Decoded structure object */
6113 
6114   rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
6115   if( rc!=SQLITE_OK ){
6116     *pRc = rc;
6117     return;
6118   }
6119 
6120   fts5DebugStructure(pRc, pBuf, p);
6121   fts5StructureRelease(p);
6122 }
6123 
6124 /*
6125 ** This is part of the fts5_decode() debugging aid.
6126 **
6127 ** Arguments pBlob/nBlob contain an "averages" record. This function
6128 ** appends a human-readable representation of record to the buffer passed
6129 ** as the second argument.
6130 */
fts5DecodeAverages(int * pRc,Fts5Buffer * pBuf,const u8 * pBlob,int nBlob)6131 static void fts5DecodeAverages(
6132   int *pRc,                       /* IN/OUT: error code */
6133   Fts5Buffer *pBuf,
6134   const u8 *pBlob, int nBlob
6135 ){
6136   int i = 0;
6137   const char *zSpace = "";
6138 
6139   while( i<nBlob ){
6140     u64 iVal;
6141     i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
6142     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
6143     zSpace = " ";
6144   }
6145 }
6146 
6147 /*
6148 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
6149 ** each varint and append its string representation to buffer pBuf. Return
6150 ** after either the input buffer is exhausted or a 0 value is read.
6151 **
6152 ** The return value is the number of bytes read from the input buffer.
6153 */
fts5DecodePoslist(int * pRc,Fts5Buffer * pBuf,const u8 * a,int n)6154 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6155   int iOff = 0;
6156   while( iOff<n ){
6157     int iVal;
6158     iOff += fts5GetVarint32(&a[iOff], iVal);
6159     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
6160   }
6161   return iOff;
6162 }
6163 
6164 /*
6165 ** The start of buffer (a/n) contains the start of a doclist. The doclist
6166 ** may or may not finish within the buffer. This function appends a text
6167 ** representation of the part of the doclist that is present to buffer
6168 ** pBuf.
6169 **
6170 ** The return value is the number of bytes read from the input buffer.
6171 */
fts5DecodeDoclist(int * pRc,Fts5Buffer * pBuf,const u8 * a,int n)6172 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6173   i64 iDocid = 0;
6174   int iOff = 0;
6175 
6176   if( n>0 ){
6177     iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
6178     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6179   }
6180   while( iOff<n ){
6181     int nPos;
6182     int bDel;
6183     iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
6184     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
6185     iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
6186     if( iOff<n ){
6187       i64 iDelta;
6188       iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
6189       iDocid += iDelta;
6190       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6191     }
6192   }
6193 
6194   return iOff;
6195 }
6196 
6197 /*
6198 ** This function is part of the fts5_decode() debugging function. It is
6199 ** only ever used with detail=none tables.
6200 **
6201 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
6202 ** tables. This function appends a human-readable version of that list to
6203 ** buffer pBuf.
6204 **
6205 ** If *pRc is other than SQLITE_OK when this function is called, it is a
6206 ** no-op. If an OOM or other error occurs within this function, *pRc is
6207 ** set to an SQLite error code before returning. The final state of buffer
6208 ** pBuf is undefined in this case.
6209 */
fts5DecodeRowidList(int * pRc,Fts5Buffer * pBuf,const u8 * pData,int nData)6210 static void fts5DecodeRowidList(
6211   int *pRc,                       /* IN/OUT: Error code */
6212   Fts5Buffer *pBuf,               /* Buffer to append text to */
6213   const u8 *pData, int nData      /* Data to decode list-of-rowids from */
6214 ){
6215   int i = 0;
6216   i64 iRowid = 0;
6217 
6218   while( i<nData ){
6219     const char *zApp = "";
6220     u64 iVal;
6221     i += sqlite3Fts5GetVarint(&pData[i], &iVal);
6222     iRowid += iVal;
6223 
6224     if( i<nData && pData[i]==0x00 ){
6225       i++;
6226       if( i<nData && pData[i]==0x00 ){
6227         i++;
6228         zApp = "+";
6229       }else{
6230         zApp = "*";
6231       }
6232     }
6233 
6234     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
6235   }
6236 }
6237 
6238 /*
6239 ** The implementation of user-defined scalar function fts5_decode().
6240 */
fts5DecodeFunction(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)6241 static void fts5DecodeFunction(
6242   sqlite3_context *pCtx,          /* Function call context */
6243   int nArg,                       /* Number of args (always 2) */
6244   sqlite3_value **apVal           /* Function arguments */
6245 ){
6246   i64 iRowid;                     /* Rowid for record being decoded */
6247   int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
6248   const u8 *aBlob; int n;         /* Record to decode */
6249   u8 *a = 0;
6250   Fts5Buffer s;                   /* Build up text to return here */
6251   int rc = SQLITE_OK;             /* Return code */
6252   int nSpace = 0;
6253   int eDetailNone = (sqlite3_user_data(pCtx)!=0);
6254 
6255   assert( nArg==2 );
6256   UNUSED_PARAM(nArg);
6257   memset(&s, 0, sizeof(Fts5Buffer));
6258   iRowid = sqlite3_value_int64(apVal[0]);
6259 
6260   /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
6261   ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
6262   ** buffer overreads even if the record is corrupt.  */
6263   n = sqlite3_value_bytes(apVal[1]);
6264   aBlob = sqlite3_value_blob(apVal[1]);
6265   nSpace = n + FTS5_DATA_ZERO_PADDING;
6266   a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
6267   if( a==0 ) goto decode_out;
6268   memcpy(a, aBlob, n);
6269 
6270 
6271   fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
6272 
6273   fts5DebugRowid(&rc, &s, iRowid);
6274   if( bDlidx ){
6275     Fts5Data dlidx;
6276     Fts5DlidxLvl lvl;
6277 
6278     dlidx.p = a;
6279     dlidx.nn = n;
6280 
6281     memset(&lvl, 0, sizeof(Fts5DlidxLvl));
6282     lvl.pData = &dlidx;
6283     lvl.iLeafPgno = iPgno;
6284 
6285     for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
6286       sqlite3Fts5BufferAppendPrintf(&rc, &s,
6287           " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
6288       );
6289     }
6290   }else if( iSegid==0 ){
6291     if( iRowid==FTS5_AVERAGES_ROWID ){
6292       fts5DecodeAverages(&rc, &s, a, n);
6293     }else{
6294       fts5DecodeStructure(&rc, &s, a, n);
6295     }
6296   }else if( eDetailNone ){
6297     Fts5Buffer term;              /* Current term read from page */
6298     int szLeaf;
6299     int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6300     int iTermOff;
6301     int nKeep = 0;
6302     int iOff;
6303 
6304     memset(&term, 0, sizeof(Fts5Buffer));
6305 
6306     /* Decode any entries that occur before the first term. */
6307     if( szLeaf<n ){
6308       iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
6309     }else{
6310       iTermOff = szLeaf;
6311     }
6312     fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
6313 
6314     iOff = iTermOff;
6315     while( iOff<szLeaf ){
6316       int nAppend;
6317 
6318       /* Read the term data for the next term*/
6319       iOff += fts5GetVarint32(&a[iOff], nAppend);
6320       term.n = nKeep;
6321       fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
6322       sqlite3Fts5BufferAppendPrintf(
6323           &rc, &s, " term=%.*s", term.n, (const char*)term.p
6324       );
6325       iOff += nAppend;
6326 
6327       /* Figure out where the doclist for this term ends */
6328       if( iPgidxOff<n ){
6329         int nIncr;
6330         iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
6331         iTermOff += nIncr;
6332       }else{
6333         iTermOff = szLeaf;
6334       }
6335 
6336       fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
6337       iOff = iTermOff;
6338       if( iOff<szLeaf ){
6339         iOff += fts5GetVarint32(&a[iOff], nKeep);
6340       }
6341     }
6342 
6343     fts5BufferFree(&term);
6344   }else{
6345     Fts5Buffer term;              /* Current term read from page */
6346     int szLeaf;                   /* Offset of pgidx in a[] */
6347     int iPgidxOff;
6348     int iPgidxPrev = 0;           /* Previous value read from pgidx */
6349     int iTermOff = 0;
6350     int iRowidOff = 0;
6351     int iOff;
6352     int nDoclist;
6353 
6354     memset(&term, 0, sizeof(Fts5Buffer));
6355 
6356     if( n<4 ){
6357       sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
6358       goto decode_out;
6359     }else{
6360       iRowidOff = fts5GetU16(&a[0]);
6361       iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6362       if( iPgidxOff<n ){
6363         fts5GetVarint32(&a[iPgidxOff], iTermOff);
6364       }
6365     }
6366 
6367     /* Decode the position list tail at the start of the page */
6368     if( iRowidOff!=0 ){
6369       iOff = iRowidOff;
6370     }else if( iTermOff!=0 ){
6371       iOff = iTermOff;
6372     }else{
6373       iOff = szLeaf;
6374     }
6375     fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
6376 
6377     /* Decode any more doclist data that appears on the page before the
6378     ** first term. */
6379     nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
6380     fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
6381 
6382     while( iPgidxOff<n ){
6383       int bFirst = (iPgidxOff==szLeaf);     /* True for first term on page */
6384       int nByte;                            /* Bytes of data */
6385       int iEnd;
6386 
6387       iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
6388       iPgidxPrev += nByte;
6389       iOff = iPgidxPrev;
6390 
6391       if( iPgidxOff<n ){
6392         fts5GetVarint32(&a[iPgidxOff], nByte);
6393         iEnd = iPgidxPrev + nByte;
6394       }else{
6395         iEnd = szLeaf;
6396       }
6397 
6398       if( bFirst==0 ){
6399         iOff += fts5GetVarint32(&a[iOff], nByte);
6400         term.n = nByte;
6401       }
6402       iOff += fts5GetVarint32(&a[iOff], nByte);
6403       fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
6404       iOff += nByte;
6405 
6406       sqlite3Fts5BufferAppendPrintf(
6407           &rc, &s, " term=%.*s", term.n, (const char*)term.p
6408       );
6409       iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
6410     }
6411 
6412     fts5BufferFree(&term);
6413   }
6414 
6415  decode_out:
6416   sqlite3_free(a);
6417   if( rc==SQLITE_OK ){
6418     sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
6419   }else{
6420     sqlite3_result_error_code(pCtx, rc);
6421   }
6422   fts5BufferFree(&s);
6423 }
6424 
6425 /*
6426 ** The implementation of user-defined scalar function fts5_rowid().
6427 */
fts5RowidFunction(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)6428 static void fts5RowidFunction(
6429   sqlite3_context *pCtx,          /* Function call context */
6430   int nArg,                       /* Number of args (always 2) */
6431   sqlite3_value **apVal           /* Function arguments */
6432 ){
6433   const char *zArg;
6434   if( nArg==0 ){
6435     sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
6436   }else{
6437     zArg = (const char*)sqlite3_value_text(apVal[0]);
6438     if( 0==sqlite3_stricmp(zArg, "segment") ){
6439       i64 iRowid;
6440       int segid, pgno;
6441       if( nArg!=3 ){
6442         sqlite3_result_error(pCtx,
6443             "should be: fts5_rowid('segment', segid, pgno))", -1
6444         );
6445       }else{
6446         segid = sqlite3_value_int(apVal[1]);
6447         pgno = sqlite3_value_int(apVal[2]);
6448         iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
6449         sqlite3_result_int64(pCtx, iRowid);
6450       }
6451     }else{
6452       sqlite3_result_error(pCtx,
6453         "first arg to fts5_rowid() must be 'segment'" , -1
6454       );
6455     }
6456   }
6457 }
6458 
6459 /*
6460 ** This is called as part of registering the FTS5 module with database
6461 ** connection db. It registers several user-defined scalar functions useful
6462 ** with FTS5.
6463 **
6464 ** If successful, SQLITE_OK is returned. If an error occurs, some other
6465 ** SQLite error code is returned instead.
6466 */
sqlite3Fts5IndexInit(sqlite3 * db)6467 int sqlite3Fts5IndexInit(sqlite3 *db){
6468   int rc = sqlite3_create_function(
6469       db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
6470   );
6471 
6472   if( rc==SQLITE_OK ){
6473     rc = sqlite3_create_function(
6474         db, "fts5_decode_none", 2,
6475         SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
6476     );
6477   }
6478 
6479   if( rc==SQLITE_OK ){
6480     rc = sqlite3_create_function(
6481         db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
6482     );
6483   }
6484   return rc;
6485 }
6486 
6487 
sqlite3Fts5IndexReset(Fts5Index * p)6488 int sqlite3Fts5IndexReset(Fts5Index *p){
6489   assert( p->pStruct==0 || p->iStructVersion!=0 );
6490   if( fts5IndexDataVersion(p)!=p->iStructVersion ){
6491     fts5StructureInvalidate(p);
6492   }
6493   return fts5IndexReturn(p);
6494 }
6495