1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
17 */
18 
19 
20 #include "fts5Int.h"
21 
22 /*
23 ** Overview:
24 **
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
28 **
29 **   * all segment b-tree leaf data is stored in fixed size page records
30 **     (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 **     taken to ensure it is possible to iterate in either direction through
32 **     the entries in a doclist, or to seek to a specific entry within a
33 **     doclist, without loading it into memory.
34 **
35 **   * large doclists that span many pages have associated "doclist index"
36 **     records that contain a copy of the first rowid on each page spanned by
37 **     the doclist. This is used to speed up seek operations, and merges of
38 **     large doclists with very small doclists.
39 **
40 **   * extra fields in the "structure record" record the state of ongoing
41 **     incremental merge operations.
42 **
43 */
44 
45 
46 #define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */
48 
49 #define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */
50 
51 #define FTS5_MAIN_PREFIX '0'
52 
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
55 #endif
56 
57 /*
58 ** Details:
59 **
60 ** The %_data table managed by this module,
61 **
62 **     CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
63 **
64 ** , contains the following 5 types of records. See the comments surrounding
65 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
66 ** assigned to each fo them.
67 **
68 ** 1. Structure Records:
69 **
70 **   The set of segments that make up an index - the index structure - are
71 **   recorded in a single record within the %_data table. The record consists
72 **   of a single 32-bit configuration cookie value followed by a list of
73 **   SQLite varints. If the FTS table features more than one index (because
74 **   there are one or more prefix indexes), it is guaranteed that all share
75 **   the same cookie value.
76 **
77 **   Immediately following the configuration cookie, the record begins with
78 **   three varints:
79 **
80 **     + number of levels,
81 **     + total number of segments on all levels,
82 **     + value of write counter.
83 **
84 **   Then, for each level from 0 to nMax:
85 **
86 **     + number of input segments in ongoing merge.
87 **     + total number of segments in level.
88 **     + for each segment from oldest to newest:
89 **         + segment id (always > 0)
90 **         + first leaf page number (often 1, always greater than 0)
91 **         + final leaf page number
92 **
93 ** 2. The Averages Record:
94 **
95 **   A single record within the %_data table. The data is a list of varints.
96 **   The first value is the number of rows in the index. Then, for each column
97 **   from left to right, the total number of tokens in the column for all
98 **   rows of the table.
99 **
100 ** 3. Segment leaves:
101 **
102 **   TERM/DOCLIST FORMAT:
103 **
104 **     Most of each segment leaf is taken up by term/doclist data. The
105 **     general format of term/doclist, starting with the first term
106 **     on the leaf page, is:
107 **
108 **         varint : size of first term
109 **         blob:    first term data
110 **         doclist: first doclist
111 **         zero-or-more {
112 **           varint:  number of bytes in common with previous term
113 **           varint:  number of bytes of new term data (nNew)
114 **           blob:    nNew bytes of new term data
115 **           doclist: next doclist
116 **         }
117 **
118 **     doclist format:
119 **
120 **         varint:  first rowid
121 **         poslist: first poslist
122 **         zero-or-more {
123 **           varint:  rowid delta (always > 0)
124 **           poslist: next poslist
125 **         }
126 **
127 **     poslist format:
128 **
129 **         varint: size of poslist in bytes multiplied by 2, not including
130 **                 this field. Plus 1 if this entry carries the "delete" flag.
131 **         collist: collist for column 0
132 **         zero-or-more {
133 **           0x01 byte
134 **           varint: column number (I)
135 **           collist: collist for column I
136 **         }
137 **
138 **     collist format:
139 **
140 **         varint: first offset + 2
141 **         zero-or-more {
142 **           varint: offset delta + 2
143 **         }
144 **
145 **   PAGE FORMAT
146 **
147 **     Each leaf page begins with a 4-byte header containing 2 16-bit
148 **     unsigned integer fields in big-endian format. They are:
149 **
150 **       * The byte offset of the first rowid on the page, if it exists
151 **         and occurs before the first term (otherwise 0).
152 **
153 **       * The byte offset of the start of the page footer. If the page
154 **         footer is 0 bytes in size, then this field is the same as the
155 **         size of the leaf page in bytes.
156 **
157 **     The page footer consists of a single varint for each term located
158 **     on the page. Each varint is the byte offset of the current term
159 **     within the page, delta-compressed against the previous value. In
160 **     other words, the first varint in the footer is the byte offset of
161 **     the first term, the second is the byte offset of the second less that
162 **     of the first, and so on.
163 **
164 **     The term/doclist format described above is accurate if the entire
165 **     term/doclist data fits on a single leaf page. If this is not the case,
166 **     the format is changed in two ways:
167 **
168 **       + if the first rowid on a page occurs before the first term, it
169 **         is stored as a literal value:
170 **
171 **             varint:  first rowid
172 **
173 **       + the first term on each page is stored in the same way as the
174 **         very first term of the segment:
175 **
176 **             varint : size of first term
177 **             blob:    first term data
178 **
179 ** 5. Segment doclist indexes:
180 **
181 **   Doclist indexes are themselves b-trees, however they usually consist of
182 **   a single leaf record only. The format of each doclist index leaf page
183 **   is:
184 **
185 **     * Flags byte. Bits are:
186 **         0x01: Clear if leaf is also the root page, otherwise set.
187 **
188 **     * Page number of fts index leaf page. As a varint.
189 **
190 **     * First rowid on page indicated by previous field. As a varint.
191 **
192 **     * A list of varints, one for each subsequent termless page. A
193 **       positive delta if the termless page contains at least one rowid,
194 **       or an 0x00 byte otherwise.
195 **
196 **   Internal doclist index nodes are:
197 **
198 **     * Flags byte. Bits are:
199 **         0x01: Clear for root page, otherwise set.
200 **
201 **     * Page number of first child page. As a varint.
202 **
203 **     * Copy of first rowid on page indicated by previous field. As a varint.
204 **
205 **     * A list of delta-encoded varints - the first rowid on each subsequent
206 **       child page.
207 **
208 */
209 
210 /*
211 ** Rowids for the averages and structure records in the %_data table.
212 */
213 #define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
214 #define FTS5_STRUCTURE_ROWID   10    /* The structure record */
215 
216 /*
217 ** Macros determining the rowids used by segment leaves and dlidx leaves
218 ** and nodes. All nodes and leaves are stored in the %_data table with large
219 ** positive rowids.
220 **
221 ** Each segment has a unique non-zero 16-bit id.
222 **
223 ** The rowid for each segment leaf is found by passing the segment id and
224 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
225 ** sequentially starting from 1.
226 */
227 #define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
228 #define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
229 #define FTS5_DATA_HEIGHT_B  5     /* Max dlidx tree height of 32 */
230 #define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */
231 
232 #define fts5_dri(segid, dlidx, height, pgno) (                                 \
233  ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
234  ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
235  ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
236  ((i64)(pgno))                                                                 \
237 )
238 
239 #define FTS5_SEGMENT_ROWID(segid, pgno)       fts5_dri(segid, 0, 0, pgno)
240 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
241 
242 #ifdef SQLITE_DEBUG
sqlite3Fts5Corrupt()243 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
244 #endif
245 
246 
247 /*
248 ** Each time a blob is read from the %_data table, it is padded with this
249 ** many zero bytes. This makes it easier to decode the various record formats
250 ** without overreading if the records are corrupt.
251 */
252 #define FTS5_DATA_ZERO_PADDING 8
253 #define FTS5_DATA_PADDING 20
254 
255 typedef struct Fts5Data Fts5Data;
256 typedef struct Fts5DlidxIter Fts5DlidxIter;
257 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
258 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
259 typedef struct Fts5Iter Fts5Iter;
260 typedef struct Fts5PageWriter Fts5PageWriter;
261 typedef struct Fts5SegIter Fts5SegIter;
262 typedef struct Fts5DoclistIter Fts5DoclistIter;
263 typedef struct Fts5SegWriter Fts5SegWriter;
264 typedef struct Fts5Structure Fts5Structure;
265 typedef struct Fts5StructureLevel Fts5StructureLevel;
266 typedef struct Fts5StructureSegment Fts5StructureSegment;
267 
268 struct Fts5Data {
269   u8 *p;                          /* Pointer to buffer containing record */
270   int nn;                         /* Size of record in bytes */
271   int szLeaf;                     /* Size of leaf without page-index */
272 };
273 
274 /*
275 ** One object per %_data table.
276 */
277 struct Fts5Index {
278   Fts5Config *pConfig;            /* Virtual table configuration */
279   char *zDataTbl;                 /* Name of %_data table */
280   int nWorkUnit;                  /* Leaf pages in a "unit" of work */
281 
282   /*
283   ** Variables related to the accumulation of tokens and doclists within the
284   ** in-memory hash tables before they are flushed to disk.
285   */
286   Fts5Hash *pHash;                /* Hash table for in-memory data */
287   int nPendingData;               /* Current bytes of pending data */
288   i64 iWriteRowid;                /* Rowid for current doc being written */
289   int bDelete;                    /* Current write is a delete */
290 
291   /* Error state. */
292   int rc;                         /* Current error code */
293 
294   /* State used by the fts5DataXXX() functions. */
295   sqlite3_blob *pReader;          /* RO incr-blob open on %_data table */
296   sqlite3_stmt *pWriter;          /* "INSERT ... %_data VALUES(?,?)" */
297   sqlite3_stmt *pDeleter;         /* "DELETE FROM %_data ... id>=? AND id<=?" */
298   sqlite3_stmt *pIdxWriter;       /* "INSERT ... %_idx VALUES(?,?,?,?)" */
299   sqlite3_stmt *pIdxDeleter;      /* "DELETE FROM %_idx WHERE segid=? */
300   sqlite3_stmt *pIdxSelect;
301   int nRead;                      /* Total number of blocks read */
302 
303   sqlite3_stmt *pDataVersion;
304   i64 iStructVersion;             /* data_version when pStruct read */
305   Fts5Structure *pStruct;         /* Current db structure (or NULL) */
306 };
307 
308 struct Fts5DoclistIter {
309   u8 *aEof;                       /* Pointer to 1 byte past end of doclist */
310 
311   /* Output variables. aPoslist==0 at EOF */
312   i64 iRowid;
313   u8 *aPoslist;
314   int nPoslist;
315   int nSize;
316 };
317 
318 /*
319 ** The contents of the "structure" record for each index are represented
320 ** using an Fts5Structure record in memory. Which uses instances of the
321 ** other Fts5StructureXXX types as components.
322 */
323 struct Fts5StructureSegment {
324   int iSegid;                     /* Segment id */
325   int pgnoFirst;                  /* First leaf page number in segment */
326   int pgnoLast;                   /* Last leaf page number in segment */
327 };
328 struct Fts5StructureLevel {
329   int nMerge;                     /* Number of segments in incr-merge */
330   int nSeg;                       /* Total number of segments on level */
331   Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */
332 };
333 struct Fts5Structure {
334   int nRef;                       /* Object reference count */
335   u64 nWriteCounter;              /* Total leaves written to level 0 */
336   int nSegment;                   /* Total segments in this structure */
337   int nLevel;                     /* Number of levels in this index */
338   Fts5StructureLevel aLevel[1];   /* Array of nLevel level objects */
339 };
340 
341 /*
342 ** An object of type Fts5SegWriter is used to write to segments.
343 */
344 struct Fts5PageWriter {
345   int pgno;                       /* Page number for this page */
346   int iPrevPgidx;                 /* Previous value written into pgidx */
347   Fts5Buffer buf;                 /* Buffer containing leaf data */
348   Fts5Buffer pgidx;               /* Buffer containing page-index */
349   Fts5Buffer term;                /* Buffer containing previous term on page */
350 };
351 struct Fts5DlidxWriter {
352   int pgno;                       /* Page number for this page */
353   int bPrevValid;                 /* True if iPrev is valid */
354   i64 iPrev;                      /* Previous rowid value written to page */
355   Fts5Buffer buf;                 /* Buffer containing page data */
356 };
357 struct Fts5SegWriter {
358   int iSegid;                     /* Segid to write to */
359   Fts5PageWriter writer;          /* PageWriter object */
360   i64 iPrevRowid;                 /* Previous rowid written to current leaf */
361   u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
362   u8 bFirstRowidInPage;           /* True if next rowid is first in page */
363   /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
364   u8 bFirstTermInPage;            /* True if next term will be first in leaf */
365   int nLeafWritten;               /* Number of leaf pages written */
366   int nEmpty;                     /* Number of contiguous term-less nodes */
367 
368   int nDlidx;                     /* Allocated size of aDlidx[] array */
369   Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */
370 
371   /* Values to insert into the %_idx table */
372   Fts5Buffer btterm;              /* Next term to insert into %_idx table */
373   int iBtPage;                    /* Page number corresponding to btterm */
374 };
375 
376 typedef struct Fts5CResult Fts5CResult;
377 struct Fts5CResult {
378   u16 iFirst;                     /* aSeg[] index of firstest iterator */
379   u8 bTermEq;                     /* True if the terms are equal */
380 };
381 
382 /*
383 ** Object for iterating through a single segment, visiting each term/rowid
384 ** pair in the segment.
385 **
386 ** pSeg:
387 **   The segment to iterate through.
388 **
389 ** iLeafPgno:
390 **   Current leaf page number within segment.
391 **
392 ** iLeafOffset:
393 **   Byte offset within the current leaf that is the first byte of the
394 **   position list data (one byte passed the position-list size field).
395 **   rowid field of the current entry. Usually this is the size field of the
396 **   position list data. The exception is if the rowid for the current entry
397 **   is the last thing on the leaf page.
398 **
399 ** pLeaf:
400 **   Buffer containing current leaf page data. Set to NULL at EOF.
401 **
402 ** iTermLeafPgno, iTermLeafOffset:
403 **   Leaf page number containing the last term read from the segment. And
404 **   the offset immediately following the term data.
405 **
406 ** flags:
407 **   Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
408 **
409 **   FTS5_SEGITER_ONETERM:
410 **     If set, set the iterator to point to EOF after the current doclist
411 **     has been exhausted. Do not proceed to the next term in the segment.
412 **
413 **   FTS5_SEGITER_REVERSE:
414 **     This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
415 **     it is set, iterate through rowid in descending order instead of the
416 **     default ascending order.
417 **
418 ** iRowidOffset/nRowidOffset/aRowidOffset:
419 **     These are used if the FTS5_SEGITER_REVERSE flag is set.
420 **
421 **     For each rowid on the page corresponding to the current term, the
422 **     corresponding aRowidOffset[] entry is set to the byte offset of the
423 **     start of the "position-list-size" field within the page.
424 **
425 ** iTermIdx:
426 **     Index of current term on iTermLeafPgno.
427 */
428 struct Fts5SegIter {
429   Fts5StructureSegment *pSeg;     /* Segment to iterate through */
430   int flags;                      /* Mask of configuration flags */
431   int iLeafPgno;                  /* Current leaf page number */
432   Fts5Data *pLeaf;                /* Current leaf data */
433   Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
434   int iLeafOffset;                /* Byte offset within current leaf */
435 
436   /* Next method */
437   void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
438 
439   /* The page and offset from which the current term was read. The offset
440   ** is the offset of the first rowid in the current doclist.  */
441   int iTermLeafPgno;
442   int iTermLeafOffset;
443 
444   int iPgidxOff;                  /* Next offset in pgidx */
445   int iEndofDoclist;
446 
447   /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
448   int iRowidOffset;               /* Current entry in aRowidOffset[] */
449   int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
450   int *aRowidOffset;              /* Array of offset to rowid fields */
451 
452   Fts5DlidxIter *pDlidx;          /* If there is a doclist-index */
453 
454   /* Variables populated based on current entry. */
455   Fts5Buffer term;                /* Current term */
456   i64 iRowid;                     /* Current rowid */
457   int nPos;                       /* Number of bytes in current position list */
458   u8 bDel;                        /* True if the delete flag is set */
459 };
460 
461 /*
462 ** Argument is a pointer to an Fts5Data structure that contains a
463 ** leaf page.
464 */
465 #define ASSERT_SZLEAF_OK(x) assert( \
466     (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
467 )
468 
469 #define FTS5_SEGITER_ONETERM 0x01
470 #define FTS5_SEGITER_REVERSE 0x02
471 
472 /*
473 ** Argument is a pointer to an Fts5Data structure that contains a leaf
474 ** page. This macro evaluates to true if the leaf contains no terms, or
475 ** false if it contains at least one term.
476 */
477 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
478 
479 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
480 
481 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
482 
483 /*
484 ** Object for iterating through the merged results of one or more segments,
485 ** visiting each term/rowid pair in the merged data.
486 **
487 ** nSeg is always a power of two greater than or equal to the number of
488 ** segments that this object is merging data from. Both the aSeg[] and
489 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
490 ** with zeroed objects - these are handled as if they were iterators opened
491 ** on empty segments.
492 **
493 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
494 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
495 ** comparison in this context is the index of the iterator that currently
496 ** points to the smaller term/rowid combination. Iterators at EOF are
497 ** considered to be greater than all other iterators.
498 **
499 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
500 ** the smallest key overall. aFirst[0] is unused.
501 **
502 ** poslist:
503 **   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
504 **   There is no way to tell if this is populated or not.
505 */
506 struct Fts5Iter {
507   Fts5IndexIter base;             /* Base class containing output vars */
508 
509   Fts5Index *pIndex;              /* Index that owns this iterator */
510   Fts5Buffer poslist;             /* Buffer containing current poslist */
511   Fts5Colset *pColset;            /* Restrict matches to these columns */
512 
513   /* Invoked to set output variables. */
514   void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
515 
516   int nSeg;                       /* Size of aSeg[] array */
517   int bRev;                       /* True to iterate in reverse order */
518   u8 bSkipEmpty;                  /* True to skip deleted entries */
519 
520   i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
521   Fts5CResult *aFirst;            /* Current merge state (see above) */
522   Fts5SegIter aSeg[1];            /* Array of segment iterators */
523 };
524 
525 
526 /*
527 ** An instance of the following type is used to iterate through the contents
528 ** of a doclist-index record.
529 **
530 ** pData:
531 **   Record containing the doclist-index data.
532 **
533 ** bEof:
534 **   Set to true once iterator has reached EOF.
535 **
536 ** iOff:
537 **   Set to the current offset within record pData.
538 */
539 struct Fts5DlidxLvl {
540   Fts5Data *pData;              /* Data for current page of this level */
541   int iOff;                     /* Current offset into pData */
542   int bEof;                     /* At EOF already */
543   int iFirstOff;                /* Used by reverse iterators */
544 
545   /* Output variables */
546   int iLeafPgno;                /* Page number of current leaf page */
547   i64 iRowid;                   /* First rowid on leaf iLeafPgno */
548 };
549 struct Fts5DlidxIter {
550   int nLvl;
551   int iSegid;
552   Fts5DlidxLvl aLvl[1];
553 };
554 
fts5PutU16(u8 * aOut,u16 iVal)555 static void fts5PutU16(u8 *aOut, u16 iVal){
556   aOut[0] = (iVal>>8);
557   aOut[1] = (iVal&0xFF);
558 }
559 
fts5GetU16(const u8 * aIn)560 static u16 fts5GetU16(const u8 *aIn){
561   return ((u16)aIn[0] << 8) + aIn[1];
562 }
563 
564 /*
565 ** Allocate and return a buffer at least nByte bytes in size.
566 **
567 ** If an OOM error is encountered, return NULL and set the error code in
568 ** the Fts5Index handle passed as the first argument.
569 */
fts5IdxMalloc(Fts5Index * p,sqlite3_int64 nByte)570 static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
571   return sqlite3Fts5MallocZero(&p->rc, nByte);
572 }
573 
574 /*
575 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
576 **
577 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
578 ** +ve if pRight is smaller than pLeft. In other words:
579 **
580 **     res = *pLeft - *pRight
581 */
582 #ifdef SQLITE_DEBUG
fts5BufferCompareBlob(Fts5Buffer * pLeft,const u8 * pRight,int nRight)583 static int fts5BufferCompareBlob(
584   Fts5Buffer *pLeft,              /* Left hand side of comparison */
585   const u8 *pRight, int nRight    /* Right hand side of comparison */
586 ){
587   int nCmp = MIN(pLeft->n, nRight);
588   int res = memcmp(pLeft->p, pRight, nCmp);
589   return (res==0 ? (pLeft->n - nRight) : res);
590 }
591 #endif
592 
593 /*
594 ** Compare the contents of the two buffers using memcmp(). If one buffer
595 ** is a prefix of the other, it is considered the lesser.
596 **
597 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
598 ** +ve if pRight is smaller than pLeft. In other words:
599 **
600 **     res = *pLeft - *pRight
601 */
fts5BufferCompare(Fts5Buffer * pLeft,Fts5Buffer * pRight)602 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
603   int nCmp = MIN(pLeft->n, pRight->n);
604   int res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
605   return (res==0 ? (pLeft->n - pRight->n) : res);
606 }
607 
fts5LeafFirstTermOff(Fts5Data * pLeaf)608 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
609   int ret;
610   fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
611   return ret;
612 }
613 
614 /*
615 ** Close the read-only blob handle, if it is open.
616 */
sqlite3Fts5IndexCloseReader(Fts5Index * p)617 void sqlite3Fts5IndexCloseReader(Fts5Index *p){
618   if( p->pReader ){
619     sqlite3_blob *pReader = p->pReader;
620     p->pReader = 0;
621     sqlite3_blob_close(pReader);
622   }
623 }
624 
625 /*
626 ** Retrieve a record from the %_data table.
627 **
628 ** If an error occurs, NULL is returned and an error left in the
629 ** Fts5Index object.
630 */
fts5DataRead(Fts5Index * p,i64 iRowid)631 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
632   Fts5Data *pRet = 0;
633   if( p->rc==SQLITE_OK ){
634     int rc = SQLITE_OK;
635 
636     if( p->pReader ){
637       /* This call may return SQLITE_ABORT if there has been a savepoint
638       ** rollback since it was last used. In this case a new blob handle
639       ** is required.  */
640       sqlite3_blob *pBlob = p->pReader;
641       p->pReader = 0;
642       rc = sqlite3_blob_reopen(pBlob, iRowid);
643       assert( p->pReader==0 );
644       p->pReader = pBlob;
645       if( rc!=SQLITE_OK ){
646         sqlite3Fts5IndexCloseReader(p);
647       }
648       if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
649     }
650 
651     /* If the blob handle is not open at this point, open it and seek
652     ** to the requested entry.  */
653     if( p->pReader==0 && rc==SQLITE_OK ){
654       Fts5Config *pConfig = p->pConfig;
655       rc = sqlite3_blob_open(pConfig->db,
656           pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
657       );
658     }
659 
660     /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
661     ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
662     ** All the reasons those functions might return SQLITE_ERROR - missing
663     ** table, missing row, non-blob/text in block column - indicate
664     ** backing store corruption.  */
665     if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
666 
667     if( rc==SQLITE_OK ){
668       u8 *aOut = 0;               /* Read blob data into this buffer */
669       int nByte = sqlite3_blob_bytes(p->pReader);
670       sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
671       pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
672       if( pRet ){
673         pRet->nn = nByte;
674         aOut = pRet->p = (u8*)&pRet[1];
675       }else{
676         rc = SQLITE_NOMEM;
677       }
678 
679       if( rc==SQLITE_OK ){
680         rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
681       }
682       if( rc!=SQLITE_OK ){
683         sqlite3_free(pRet);
684         pRet = 0;
685       }else{
686         /* TODO1: Fix this */
687         pRet->p[nByte] = 0x00;
688         pRet->p[nByte+1] = 0x00;
689         pRet->szLeaf = fts5GetU16(&pRet->p[2]);
690       }
691     }
692     p->rc = rc;
693     p->nRead++;
694   }
695 
696   assert( (pRet==0)==(p->rc!=SQLITE_OK) );
697   return pRet;
698 }
699 
700 /*
701 ** Release a reference to data record returned by an earlier call to
702 ** fts5DataRead().
703 */
fts5DataRelease(Fts5Data * pData)704 static void fts5DataRelease(Fts5Data *pData){
705   sqlite3_free(pData);
706 }
707 
fts5LeafRead(Fts5Index * p,i64 iRowid)708 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
709   Fts5Data *pRet = fts5DataRead(p, iRowid);
710   if( pRet ){
711     if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
712       p->rc = FTS5_CORRUPT;
713       fts5DataRelease(pRet);
714       pRet = 0;
715     }
716   }
717   return pRet;
718 }
719 
fts5IndexPrepareStmt(Fts5Index * p,sqlite3_stmt ** ppStmt,char * zSql)720 static int fts5IndexPrepareStmt(
721   Fts5Index *p,
722   sqlite3_stmt **ppStmt,
723   char *zSql
724 ){
725   if( p->rc==SQLITE_OK ){
726     if( zSql ){
727       p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
728           SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
729           ppStmt, 0);
730     }else{
731       p->rc = SQLITE_NOMEM;
732     }
733   }
734   sqlite3_free(zSql);
735   return p->rc;
736 }
737 
738 
739 /*
740 ** INSERT OR REPLACE a record into the %_data table.
741 */
fts5DataWrite(Fts5Index * p,i64 iRowid,const u8 * pData,int nData)742 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
743   if( p->rc!=SQLITE_OK ) return;
744 
745   if( p->pWriter==0 ){
746     Fts5Config *pConfig = p->pConfig;
747     fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
748           "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
749           pConfig->zDb, pConfig->zName
750     ));
751     if( p->rc ) return;
752   }
753 
754   sqlite3_bind_int64(p->pWriter, 1, iRowid);
755   sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
756   sqlite3_step(p->pWriter);
757   p->rc = sqlite3_reset(p->pWriter);
758   sqlite3_bind_null(p->pWriter, 2);
759 }
760 
761 /*
762 ** Execute the following SQL:
763 **
764 **     DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
765 */
fts5DataDelete(Fts5Index * p,i64 iFirst,i64 iLast)766 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
767   if( p->rc!=SQLITE_OK ) return;
768 
769   if( p->pDeleter==0 ){
770     Fts5Config *pConfig = p->pConfig;
771     char *zSql = sqlite3_mprintf(
772         "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
773           pConfig->zDb, pConfig->zName
774     );
775     if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
776   }
777 
778   sqlite3_bind_int64(p->pDeleter, 1, iFirst);
779   sqlite3_bind_int64(p->pDeleter, 2, iLast);
780   sqlite3_step(p->pDeleter);
781   p->rc = sqlite3_reset(p->pDeleter);
782 }
783 
784 /*
785 ** Remove all records associated with segment iSegid.
786 */
fts5DataRemoveSegment(Fts5Index * p,int iSegid)787 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
788   i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
789   i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
790   fts5DataDelete(p, iFirst, iLast);
791   if( p->pIdxDeleter==0 ){
792     Fts5Config *pConfig = p->pConfig;
793     fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
794           "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
795           pConfig->zDb, pConfig->zName
796     ));
797   }
798   if( p->rc==SQLITE_OK ){
799     sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
800     sqlite3_step(p->pIdxDeleter);
801     p->rc = sqlite3_reset(p->pIdxDeleter);
802   }
803 }
804 
805 /*
806 ** Release a reference to an Fts5Structure object returned by an earlier
807 ** call to fts5StructureRead() or fts5StructureDecode().
808 */
fts5StructureRelease(Fts5Structure * pStruct)809 static void fts5StructureRelease(Fts5Structure *pStruct){
810   if( pStruct && 0>=(--pStruct->nRef) ){
811     int i;
812     assert( pStruct->nRef==0 );
813     for(i=0; i<pStruct->nLevel; i++){
814       sqlite3_free(pStruct->aLevel[i].aSeg);
815     }
816     sqlite3_free(pStruct);
817   }
818 }
819 
fts5StructureRef(Fts5Structure * pStruct)820 static void fts5StructureRef(Fts5Structure *pStruct){
821   pStruct->nRef++;
822 }
823 
824 /*
825 ** Deserialize and return the structure record currently stored in serialized
826 ** form within buffer pData/nData.
827 **
828 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
829 ** are over-allocated by one slot. This allows the structure contents
830 ** to be more easily edited.
831 **
832 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
833 ** returned. Otherwise, *ppOut is set to point to the new object and
834 ** SQLITE_OK returned.
835 */
fts5StructureDecode(const u8 * pData,int nData,int * piCookie,Fts5Structure ** ppOut)836 static int fts5StructureDecode(
837   const u8 *pData,                /* Buffer containing serialized structure */
838   int nData,                      /* Size of buffer pData in bytes */
839   int *piCookie,                  /* Configuration cookie value */
840   Fts5Structure **ppOut           /* OUT: Deserialized object */
841 ){
842   int rc = SQLITE_OK;
843   int i = 0;
844   int iLvl;
845   int nLevel = 0;
846   int nSegment = 0;
847   sqlite3_int64 nByte;            /* Bytes of space to allocate at pRet */
848   Fts5Structure *pRet = 0;        /* Structure object to return */
849 
850   /* Grab the cookie value */
851   if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
852   i = 4;
853 
854   /* Read the total number of levels and segments from the start of the
855   ** structure record.  */
856   i += fts5GetVarint32(&pData[i], nLevel);
857   i += fts5GetVarint32(&pData[i], nSegment);
858   if( nLevel>FTS5_MAX_SEGMENT   || nLevel<0
859    || nSegment>FTS5_MAX_SEGMENT || nSegment<0
860   ){
861     return FTS5_CORRUPT;
862   }
863   nByte = (
864       sizeof(Fts5Structure) +                    /* Main structure */
865       sizeof(Fts5StructureLevel) * (nLevel-1)    /* aLevel[] array */
866   );
867   pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
868 
869   if( pRet ){
870     pRet->nRef = 1;
871     pRet->nLevel = nLevel;
872     pRet->nSegment = nSegment;
873     i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
874 
875     for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
876       Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
877       int nTotal = 0;
878       int iSeg;
879 
880       if( i>=nData ){
881         rc = FTS5_CORRUPT;
882       }else{
883         i += fts5GetVarint32(&pData[i], pLvl->nMerge);
884         i += fts5GetVarint32(&pData[i], nTotal);
885         if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
886         pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
887             nTotal * sizeof(Fts5StructureSegment)
888         );
889         nSegment -= nTotal;
890       }
891 
892       if( rc==SQLITE_OK ){
893         pLvl->nSeg = nTotal;
894         for(iSeg=0; iSeg<nTotal; iSeg++){
895           Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
896           if( i>=nData ){
897             rc = FTS5_CORRUPT;
898             break;
899           }
900           i += fts5GetVarint32(&pData[i], pSeg->iSegid);
901           i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
902           i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
903           if( pSeg->pgnoLast<pSeg->pgnoFirst ){
904             rc = FTS5_CORRUPT;
905             break;
906           }
907         }
908         if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
909         if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
910       }
911     }
912     if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
913 
914     if( rc!=SQLITE_OK ){
915       fts5StructureRelease(pRet);
916       pRet = 0;
917     }
918   }
919 
920   *ppOut = pRet;
921   return rc;
922 }
923 
924 /*
925 **
926 */
fts5StructureAddLevel(int * pRc,Fts5Structure ** ppStruct)927 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
928   if( *pRc==SQLITE_OK ){
929     Fts5Structure *pStruct = *ppStruct;
930     int nLevel = pStruct->nLevel;
931     sqlite3_int64 nByte = (
932         sizeof(Fts5Structure) +                  /* Main structure */
933         sizeof(Fts5StructureLevel) * (nLevel+1)  /* aLevel[] array */
934     );
935 
936     pStruct = sqlite3_realloc64(pStruct, nByte);
937     if( pStruct ){
938       memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
939       pStruct->nLevel++;
940       *ppStruct = pStruct;
941     }else{
942       *pRc = SQLITE_NOMEM;
943     }
944   }
945 }
946 
947 /*
948 ** Extend level iLvl so that there is room for at least nExtra more
949 ** segments.
950 */
fts5StructureExtendLevel(int * pRc,Fts5Structure * pStruct,int iLvl,int nExtra,int bInsert)951 static void fts5StructureExtendLevel(
952   int *pRc,
953   Fts5Structure *pStruct,
954   int iLvl,
955   int nExtra,
956   int bInsert
957 ){
958   if( *pRc==SQLITE_OK ){
959     Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
960     Fts5StructureSegment *aNew;
961     sqlite3_int64 nByte;
962 
963     nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
964     aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
965     if( aNew ){
966       if( bInsert==0 ){
967         memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
968       }else{
969         int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
970         memmove(&aNew[nExtra], aNew, nMove);
971         memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
972       }
973       pLvl->aSeg = aNew;
974     }else{
975       *pRc = SQLITE_NOMEM;
976     }
977   }
978 }
979 
fts5StructureReadUncached(Fts5Index * p)980 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
981   Fts5Structure *pRet = 0;
982   Fts5Config *pConfig = p->pConfig;
983   int iCookie;                    /* Configuration cookie */
984   Fts5Data *pData;
985 
986   pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
987   if( p->rc==SQLITE_OK ){
988     /* TODO: Do we need this if the leaf-index is appended? Probably... */
989     memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
990     p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
991     if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
992       p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
993     }
994     fts5DataRelease(pData);
995     if( p->rc!=SQLITE_OK ){
996       fts5StructureRelease(pRet);
997       pRet = 0;
998     }
999   }
1000 
1001   return pRet;
1002 }
1003 
fts5IndexDataVersion(Fts5Index * p)1004 static i64 fts5IndexDataVersion(Fts5Index *p){
1005   i64 iVersion = 0;
1006 
1007   if( p->rc==SQLITE_OK ){
1008     if( p->pDataVersion==0 ){
1009       p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
1010           sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
1011           );
1012       if( p->rc ) return 0;
1013     }
1014 
1015     if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
1016       iVersion = sqlite3_column_int64(p->pDataVersion, 0);
1017     }
1018     p->rc = sqlite3_reset(p->pDataVersion);
1019   }
1020 
1021   return iVersion;
1022 }
1023 
1024 /*
1025 ** Read, deserialize and return the structure record.
1026 **
1027 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1028 ** are over-allocated as described for function fts5StructureDecode()
1029 ** above.
1030 **
1031 ** If an error occurs, NULL is returned and an error code left in the
1032 ** Fts5Index handle. If an error has already occurred when this function
1033 ** is called, it is a no-op.
1034 */
fts5StructureRead(Fts5Index * p)1035 static Fts5Structure *fts5StructureRead(Fts5Index *p){
1036 
1037   if( p->pStruct==0 ){
1038     p->iStructVersion = fts5IndexDataVersion(p);
1039     if( p->rc==SQLITE_OK ){
1040       p->pStruct = fts5StructureReadUncached(p);
1041     }
1042   }
1043 
1044 #if 0
1045   else{
1046     Fts5Structure *pTest = fts5StructureReadUncached(p);
1047     if( pTest ){
1048       int i, j;
1049       assert_nc( p->pStruct->nSegment==pTest->nSegment );
1050       assert_nc( p->pStruct->nLevel==pTest->nLevel );
1051       for(i=0; i<pTest->nLevel; i++){
1052         assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
1053         assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
1054         for(j=0; j<pTest->aLevel[i].nSeg; j++){
1055           Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
1056           Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
1057           assert_nc( p1->iSegid==p2->iSegid );
1058           assert_nc( p1->pgnoFirst==p2->pgnoFirst );
1059           assert_nc( p1->pgnoLast==p2->pgnoLast );
1060         }
1061       }
1062       fts5StructureRelease(pTest);
1063     }
1064   }
1065 #endif
1066 
1067   if( p->rc!=SQLITE_OK ) return 0;
1068   assert( p->iStructVersion!=0 );
1069   assert( p->pStruct!=0 );
1070   fts5StructureRef(p->pStruct);
1071   return p->pStruct;
1072 }
1073 
fts5StructureInvalidate(Fts5Index * p)1074 static void fts5StructureInvalidate(Fts5Index *p){
1075   if( p->pStruct ){
1076     fts5StructureRelease(p->pStruct);
1077     p->pStruct = 0;
1078   }
1079 }
1080 
1081 /*
1082 ** Return the total number of segments in index structure pStruct. This
1083 ** function is only ever used as part of assert() conditions.
1084 */
1085 #ifdef SQLITE_DEBUG
fts5StructureCountSegments(Fts5Structure * pStruct)1086 static int fts5StructureCountSegments(Fts5Structure *pStruct){
1087   int nSegment = 0;               /* Total number of segments */
1088   if( pStruct ){
1089     int iLvl;                     /* Used to iterate through levels */
1090     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1091       nSegment += pStruct->aLevel[iLvl].nSeg;
1092     }
1093   }
1094 
1095   return nSegment;
1096 }
1097 #endif
1098 
1099 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) {     \
1100   assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) );             \
1101   memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob);             \
1102   (pBuf)->n += nBlob;                                      \
1103 }
1104 
1105 #define fts5BufferSafeAppendVarint(pBuf, iVal) {                \
1106   (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal));  \
1107   assert( (pBuf)->nSpace>=(pBuf)->n );                          \
1108 }
1109 
1110 
1111 /*
1112 ** Serialize and store the "structure" record.
1113 **
1114 ** If an error occurs, leave an error code in the Fts5Index object. If an
1115 ** error has already occurred, this function is a no-op.
1116 */
fts5StructureWrite(Fts5Index * p,Fts5Structure * pStruct)1117 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
1118   if( p->rc==SQLITE_OK ){
1119     Fts5Buffer buf;               /* Buffer to serialize record into */
1120     int iLvl;                     /* Used to iterate through levels */
1121     int iCookie;                  /* Cookie value to store */
1122 
1123     assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
1124     memset(&buf, 0, sizeof(Fts5Buffer));
1125 
1126     /* Append the current configuration cookie */
1127     iCookie = p->pConfig->iCookie;
1128     if( iCookie<0 ) iCookie = 0;
1129 
1130     if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
1131       sqlite3Fts5Put32(buf.p, iCookie);
1132       buf.n = 4;
1133       fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
1134       fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
1135       fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
1136     }
1137 
1138     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1139       int iSeg;                     /* Used to iterate through segments */
1140       Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1141       fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
1142       fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1143       assert( pLvl->nMerge<=pLvl->nSeg );
1144 
1145       for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
1146         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
1147         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
1148         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
1149       }
1150     }
1151 
1152     fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
1153     fts5BufferFree(&buf);
1154   }
1155 }
1156 
1157 #if 0
1158 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
1159 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
1160   int rc = SQLITE_OK;
1161   Fts5Buffer buf;
1162   memset(&buf, 0, sizeof(buf));
1163   fts5DebugStructure(&rc, &buf, pStruct);
1164   fprintf(stdout, "%s: %s\n", zCaption, buf.p);
1165   fflush(stdout);
1166   fts5BufferFree(&buf);
1167 }
1168 #else
1169 # define fts5PrintStructure(x,y)
1170 #endif
1171 
fts5SegmentSize(Fts5StructureSegment * pSeg)1172 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
1173   return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
1174 }
1175 
1176 /*
1177 ** Return a copy of index structure pStruct. Except, promote as many
1178 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1179 ** returned.
1180 */
fts5StructurePromoteTo(Fts5Index * p,int iPromote,int szPromote,Fts5Structure * pStruct)1181 static void fts5StructurePromoteTo(
1182   Fts5Index *p,
1183   int iPromote,
1184   int szPromote,
1185   Fts5Structure *pStruct
1186 ){
1187   int il, is;
1188   Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
1189 
1190   if( pOut->nMerge==0 ){
1191     for(il=iPromote+1; il<pStruct->nLevel; il++){
1192       Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
1193       if( pLvl->nMerge ) return;
1194       for(is=pLvl->nSeg-1; is>=0; is--){
1195         int sz = fts5SegmentSize(&pLvl->aSeg[is]);
1196         if( sz>szPromote ) return;
1197         fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
1198         if( p->rc ) return;
1199         memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
1200         pOut->nSeg++;
1201         pLvl->nSeg--;
1202       }
1203     }
1204   }
1205 }
1206 
1207 /*
1208 ** A new segment has just been written to level iLvl of index structure
1209 ** pStruct. This function determines if any segments should be promoted
1210 ** as a result. Segments are promoted in two scenarios:
1211 **
1212 **   a) If the segment just written is smaller than one or more segments
1213 **      within the previous populated level, it is promoted to the previous
1214 **      populated level.
1215 **
1216 **   b) If the segment just written is larger than the newest segment on
1217 **      the next populated level, then that segment, and any other adjacent
1218 **      segments that are also smaller than the one just written, are
1219 **      promoted.
1220 **
1221 ** If one or more segments are promoted, the structure object is updated
1222 ** to reflect this.
1223 */
fts5StructurePromote(Fts5Index * p,int iLvl,Fts5Structure * pStruct)1224 static void fts5StructurePromote(
1225   Fts5Index *p,                   /* FTS5 backend object */
1226   int iLvl,                       /* Index level just updated */
1227   Fts5Structure *pStruct          /* Index structure */
1228 ){
1229   if( p->rc==SQLITE_OK ){
1230     int iTst;
1231     int iPromote = -1;
1232     int szPromote = 0;            /* Promote anything this size or smaller */
1233     Fts5StructureSegment *pSeg;   /* Segment just written */
1234     int szSeg;                    /* Size of segment just written */
1235     int nSeg = pStruct->aLevel[iLvl].nSeg;
1236 
1237     if( nSeg==0 ) return;
1238     pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
1239     szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
1240 
1241     /* Check for condition (a) */
1242     for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
1243     if( iTst>=0 ){
1244       int i;
1245       int szMax = 0;
1246       Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
1247       assert( pTst->nMerge==0 );
1248       for(i=0; i<pTst->nSeg; i++){
1249         int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
1250         if( sz>szMax ) szMax = sz;
1251       }
1252       if( szMax>=szSeg ){
1253         /* Condition (a) is true. Promote the newest segment on level
1254         ** iLvl to level iTst.  */
1255         iPromote = iTst;
1256         szPromote = szMax;
1257       }
1258     }
1259 
1260     /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1261     ** is a no-op if it is not.  */
1262     if( iPromote<0 ){
1263       iPromote = iLvl;
1264       szPromote = szSeg;
1265     }
1266     fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
1267   }
1268 }
1269 
1270 
1271 /*
1272 ** Advance the iterator passed as the only argument. If the end of the
1273 ** doclist-index page is reached, return non-zero.
1274 */
fts5DlidxLvlNext(Fts5DlidxLvl * pLvl)1275 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
1276   Fts5Data *pData = pLvl->pData;
1277 
1278   if( pLvl->iOff==0 ){
1279     assert( pLvl->bEof==0 );
1280     pLvl->iOff = 1;
1281     pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
1282     pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
1283     pLvl->iFirstOff = pLvl->iOff;
1284   }else{
1285     int iOff;
1286     for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
1287       if( pData->p[iOff] ) break;
1288     }
1289 
1290     if( iOff<pData->nn ){
1291       i64 iVal;
1292       pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
1293       iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
1294       pLvl->iRowid += iVal;
1295       pLvl->iOff = iOff;
1296     }else{
1297       pLvl->bEof = 1;
1298     }
1299   }
1300 
1301   return pLvl->bEof;
1302 }
1303 
1304 /*
1305 ** Advance the iterator passed as the only argument.
1306 */
fts5DlidxIterNextR(Fts5Index * p,Fts5DlidxIter * pIter,int iLvl)1307 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1308   Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1309 
1310   assert( iLvl<pIter->nLvl );
1311   if( fts5DlidxLvlNext(pLvl) ){
1312     if( (iLvl+1) < pIter->nLvl ){
1313       fts5DlidxIterNextR(p, pIter, iLvl+1);
1314       if( pLvl[1].bEof==0 ){
1315         fts5DataRelease(pLvl->pData);
1316         memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1317         pLvl->pData = fts5DataRead(p,
1318             FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1319         );
1320         if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
1321       }
1322     }
1323   }
1324 
1325   return pIter->aLvl[0].bEof;
1326 }
fts5DlidxIterNext(Fts5Index * p,Fts5DlidxIter * pIter)1327 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
1328   return fts5DlidxIterNextR(p, pIter, 0);
1329 }
1330 
1331 /*
1332 ** The iterator passed as the first argument has the following fields set
1333 ** as follows. This function sets up the rest of the iterator so that it
1334 ** points to the first rowid in the doclist-index.
1335 **
1336 **   pData:
1337 **     pointer to doclist-index record,
1338 **
1339 ** When this function is called pIter->iLeafPgno is the page number the
1340 ** doclist is associated with (the one featuring the term).
1341 */
fts5DlidxIterFirst(Fts5DlidxIter * pIter)1342 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
1343   int i;
1344   for(i=0; i<pIter->nLvl; i++){
1345     fts5DlidxLvlNext(&pIter->aLvl[i]);
1346   }
1347   return pIter->aLvl[0].bEof;
1348 }
1349 
1350 
fts5DlidxIterEof(Fts5Index * p,Fts5DlidxIter * pIter)1351 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
1352   return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
1353 }
1354 
fts5DlidxIterLast(Fts5Index * p,Fts5DlidxIter * pIter)1355 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
1356   int i;
1357 
1358   /* Advance each level to the last entry on the last page */
1359   for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
1360     Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
1361     while( fts5DlidxLvlNext(pLvl)==0 );
1362     pLvl->bEof = 0;
1363 
1364     if( i>0 ){
1365       Fts5DlidxLvl *pChild = &pLvl[-1];
1366       fts5DataRelease(pChild->pData);
1367       memset(pChild, 0, sizeof(Fts5DlidxLvl));
1368       pChild->pData = fts5DataRead(p,
1369           FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
1370       );
1371     }
1372   }
1373 }
1374 
1375 /*
1376 ** Move the iterator passed as the only argument to the previous entry.
1377 */
fts5DlidxLvlPrev(Fts5DlidxLvl * pLvl)1378 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
1379   int iOff = pLvl->iOff;
1380 
1381   assert( pLvl->bEof==0 );
1382   if( iOff<=pLvl->iFirstOff ){
1383     pLvl->bEof = 1;
1384   }else{
1385     u8 *a = pLvl->pData->p;
1386     i64 iVal;
1387     int iLimit;
1388     int ii;
1389     int nZero = 0;
1390 
1391     /* Currently iOff points to the first byte of a varint. This block
1392     ** decrements iOff until it points to the first byte of the previous
1393     ** varint. Taking care not to read any memory locations that occur
1394     ** before the buffer in memory.  */
1395     iLimit = (iOff>9 ? iOff-9 : 0);
1396     for(iOff--; iOff>iLimit; iOff--){
1397       if( (a[iOff-1] & 0x80)==0 ) break;
1398     }
1399 
1400     fts5GetVarint(&a[iOff], (u64*)&iVal);
1401     pLvl->iRowid -= iVal;
1402     pLvl->iLeafPgno--;
1403 
1404     /* Skip backwards past any 0x00 varints. */
1405     for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
1406       nZero++;
1407     }
1408     if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
1409       /* The byte immediately before the last 0x00 byte has the 0x80 bit
1410       ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
1411       ** bytes before a[ii]. */
1412       int bZero = 0;              /* True if last 0x00 counts */
1413       if( (ii-8)>=pLvl->iFirstOff ){
1414         int j;
1415         for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
1416         bZero = (j>8);
1417       }
1418       if( bZero==0 ) nZero--;
1419     }
1420     pLvl->iLeafPgno -= nZero;
1421     pLvl->iOff = iOff - nZero;
1422   }
1423 
1424   return pLvl->bEof;
1425 }
1426 
fts5DlidxIterPrevR(Fts5Index * p,Fts5DlidxIter * pIter,int iLvl)1427 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1428   Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1429 
1430   assert( iLvl<pIter->nLvl );
1431   if( fts5DlidxLvlPrev(pLvl) ){
1432     if( (iLvl+1) < pIter->nLvl ){
1433       fts5DlidxIterPrevR(p, pIter, iLvl+1);
1434       if( pLvl[1].bEof==0 ){
1435         fts5DataRelease(pLvl->pData);
1436         memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1437         pLvl->pData = fts5DataRead(p,
1438             FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1439         );
1440         if( pLvl->pData ){
1441           while( fts5DlidxLvlNext(pLvl)==0 );
1442           pLvl->bEof = 0;
1443         }
1444       }
1445     }
1446   }
1447 
1448   return pIter->aLvl[0].bEof;
1449 }
fts5DlidxIterPrev(Fts5Index * p,Fts5DlidxIter * pIter)1450 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
1451   return fts5DlidxIterPrevR(p, pIter, 0);
1452 }
1453 
1454 /*
1455 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1456 */
fts5DlidxIterFree(Fts5DlidxIter * pIter)1457 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
1458   if( pIter ){
1459     int i;
1460     for(i=0; i<pIter->nLvl; i++){
1461       fts5DataRelease(pIter->aLvl[i].pData);
1462     }
1463     sqlite3_free(pIter);
1464   }
1465 }
1466 
fts5DlidxIterInit(Fts5Index * p,int bRev,int iSegid,int iLeafPg)1467 static Fts5DlidxIter *fts5DlidxIterInit(
1468   Fts5Index *p,                   /* Fts5 Backend to iterate within */
1469   int bRev,                       /* True for ORDER BY ASC */
1470   int iSegid,                     /* Segment id */
1471   int iLeafPg                     /* Leaf page number to load dlidx for */
1472 ){
1473   Fts5DlidxIter *pIter = 0;
1474   int i;
1475   int bDone = 0;
1476 
1477   for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
1478     sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
1479     Fts5DlidxIter *pNew;
1480 
1481     pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
1482     if( pNew==0 ){
1483       p->rc = SQLITE_NOMEM;
1484     }else{
1485       i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
1486       Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
1487       pIter = pNew;
1488       memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1489       pLvl->pData = fts5DataRead(p, iRowid);
1490       if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
1491         bDone = 1;
1492       }
1493       pIter->nLvl = i+1;
1494     }
1495   }
1496 
1497   if( p->rc==SQLITE_OK ){
1498     pIter->iSegid = iSegid;
1499     if( bRev==0 ){
1500       fts5DlidxIterFirst(pIter);
1501     }else{
1502       fts5DlidxIterLast(p, pIter);
1503     }
1504   }
1505 
1506   if( p->rc!=SQLITE_OK ){
1507     fts5DlidxIterFree(pIter);
1508     pIter = 0;
1509   }
1510 
1511   return pIter;
1512 }
1513 
fts5DlidxIterRowid(Fts5DlidxIter * pIter)1514 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
1515   return pIter->aLvl[0].iRowid;
1516 }
fts5DlidxIterPgno(Fts5DlidxIter * pIter)1517 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
1518   return pIter->aLvl[0].iLeafPgno;
1519 }
1520 
1521 /*
1522 ** Load the next leaf page into the segment iterator.
1523 */
fts5SegIterNextPage(Fts5Index * p,Fts5SegIter * pIter)1524 static void fts5SegIterNextPage(
1525   Fts5Index *p,                   /* FTS5 backend object */
1526   Fts5SegIter *pIter              /* Iterator to advance to next page */
1527 ){
1528   Fts5Data *pLeaf;
1529   Fts5StructureSegment *pSeg = pIter->pSeg;
1530   fts5DataRelease(pIter->pLeaf);
1531   pIter->iLeafPgno++;
1532   if( pIter->pNextLeaf ){
1533     pIter->pLeaf = pIter->pNextLeaf;
1534     pIter->pNextLeaf = 0;
1535   }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1536     pIter->pLeaf = fts5LeafRead(p,
1537         FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1538     );
1539   }else{
1540     pIter->pLeaf = 0;
1541   }
1542   pLeaf = pIter->pLeaf;
1543 
1544   if( pLeaf ){
1545     pIter->iPgidxOff = pLeaf->szLeaf;
1546     if( fts5LeafIsTermless(pLeaf) ){
1547       pIter->iEndofDoclist = pLeaf->nn+1;
1548     }else{
1549       pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
1550           pIter->iEndofDoclist
1551       );
1552     }
1553   }
1554 }
1555 
1556 /*
1557 ** Argument p points to a buffer containing a varint to be interpreted as a
1558 ** position list size field. Read the varint and return the number of bytes
1559 ** read. Before returning, set *pnSz to the number of bytes in the position
1560 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1561 */
fts5GetPoslistSize(const u8 * p,int * pnSz,int * pbDel)1562 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
1563   int nSz;
1564   int n = 0;
1565   fts5FastGetVarint32(p, n, nSz);
1566   assert_nc( nSz>=0 );
1567   *pnSz = nSz/2;
1568   *pbDel = nSz & 0x0001;
1569   return n;
1570 }
1571 
1572 /*
1573 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1574 ** position-list size field. Read the value of the field and store it
1575 ** in the following variables:
1576 **
1577 **   Fts5SegIter.nPos
1578 **   Fts5SegIter.bDel
1579 **
1580 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1581 ** position list content (if any).
1582 */
fts5SegIterLoadNPos(Fts5Index * p,Fts5SegIter * pIter)1583 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1584   if( p->rc==SQLITE_OK ){
1585     int iOff = pIter->iLeafOffset;  /* Offset to read at */
1586     ASSERT_SZLEAF_OK(pIter->pLeaf);
1587     if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1588       int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
1589       pIter->bDel = 0;
1590       pIter->nPos = 1;
1591       if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1592         pIter->bDel = 1;
1593         iOff++;
1594         if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1595           pIter->nPos = 1;
1596           iOff++;
1597         }else{
1598           pIter->nPos = 0;
1599         }
1600       }
1601     }else{
1602       int nSz;
1603       fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1604       pIter->bDel = (nSz & 0x0001);
1605       pIter->nPos = nSz>>1;
1606       assert_nc( pIter->nPos>=0 );
1607     }
1608     pIter->iLeafOffset = iOff;
1609   }
1610 }
1611 
fts5SegIterLoadRowid(Fts5Index * p,Fts5SegIter * pIter)1612 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1613   u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
1614   int iOff = pIter->iLeafOffset;
1615 
1616   ASSERT_SZLEAF_OK(pIter->pLeaf);
1617   if( iOff>=pIter->pLeaf->szLeaf ){
1618     fts5SegIterNextPage(p, pIter);
1619     if( pIter->pLeaf==0 ){
1620       if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
1621       return;
1622     }
1623     iOff = 4;
1624     a = pIter->pLeaf->p;
1625   }
1626   iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
1627   pIter->iLeafOffset = iOff;
1628 }
1629 
1630 /*
1631 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1632 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1633 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1634 ** the first term in the segment).
1635 **
1636 ** This function populates:
1637 **
1638 **   Fts5SegIter.term
1639 **   Fts5SegIter.rowid
1640 **
1641 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1642 ** the first position list. The position list belonging to document
1643 ** (Fts5SegIter.iRowid).
1644 */
fts5SegIterLoadTerm(Fts5Index * p,Fts5SegIter * pIter,int nKeep)1645 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1646   u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
1647   int iOff = pIter->iLeafOffset;  /* Offset to read at */
1648   int nNew;                       /* Bytes of new data */
1649 
1650   iOff += fts5GetVarint32(&a[iOff], nNew);
1651   if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
1652     p->rc = FTS5_CORRUPT;
1653     return;
1654   }
1655   pIter->term.n = nKeep;
1656   fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1657   assert( pIter->term.n<=pIter->term.nSpace );
1658   iOff += nNew;
1659   pIter->iTermLeafOffset = iOff;
1660   pIter->iTermLeafPgno = pIter->iLeafPgno;
1661   pIter->iLeafOffset = iOff;
1662 
1663   if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1664     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1665   }else{
1666     int nExtra;
1667     pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1668     pIter->iEndofDoclist += nExtra;
1669   }
1670 
1671   fts5SegIterLoadRowid(p, pIter);
1672 }
1673 
1674 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
1675 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
1676 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
1677 
fts5SegIterSetNext(Fts5Index * p,Fts5SegIter * pIter)1678 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
1679   if( pIter->flags & FTS5_SEGITER_REVERSE ){
1680     pIter->xNext = fts5SegIterNext_Reverse;
1681   }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1682     pIter->xNext = fts5SegIterNext_None;
1683   }else{
1684     pIter->xNext = fts5SegIterNext;
1685   }
1686 }
1687 
1688 /*
1689 ** Initialize the iterator object pIter to iterate through the entries in
1690 ** segment pSeg. The iterator is left pointing to the first entry when
1691 ** this function returns.
1692 **
1693 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1694 ** an error has already occurred when this function is called, it is a no-op.
1695 */
fts5SegIterInit(Fts5Index * p,Fts5StructureSegment * pSeg,Fts5SegIter * pIter)1696 static void fts5SegIterInit(
1697   Fts5Index *p,                   /* FTS index object */
1698   Fts5StructureSegment *pSeg,     /* Description of segment */
1699   Fts5SegIter *pIter              /* Object to populate */
1700 ){
1701   if( pSeg->pgnoFirst==0 ){
1702     /* This happens if the segment is being used as an input to an incremental
1703     ** merge and all data has already been "trimmed". See function
1704     ** fts5TrimSegments() for details. In this case leave the iterator empty.
1705     ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1706     ** at EOF already. */
1707     assert( pIter->pLeaf==0 );
1708     return;
1709   }
1710 
1711   if( p->rc==SQLITE_OK ){
1712     memset(pIter, 0, sizeof(*pIter));
1713     fts5SegIterSetNext(p, pIter);
1714     pIter->pSeg = pSeg;
1715     pIter->iLeafPgno = pSeg->pgnoFirst-1;
1716     fts5SegIterNextPage(p, pIter);
1717   }
1718 
1719   if( p->rc==SQLITE_OK ){
1720     pIter->iLeafOffset = 4;
1721     assert_nc( pIter->pLeaf->nn>4 );
1722     assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1723     pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
1724     fts5SegIterLoadTerm(p, pIter, 0);
1725     fts5SegIterLoadNPos(p, pIter);
1726   }
1727 }
1728 
1729 /*
1730 ** This function is only ever called on iterators created by calls to
1731 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1732 **
1733 ** The iterator is in an unusual state when this function is called: the
1734 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
1735 ** the position-list size field for the first relevant rowid on the page.
1736 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1737 **
1738 ** This function advances the iterator so that it points to the last
1739 ** relevant rowid on the page and, if necessary, initializes the
1740 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1741 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1742 ** byte of the position list content associated with said rowid.
1743 */
fts5SegIterReverseInitPage(Fts5Index * p,Fts5SegIter * pIter)1744 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
1745   int eDetail = p->pConfig->eDetail;
1746   int n = pIter->pLeaf->szLeaf;
1747   int i = pIter->iLeafOffset;
1748   u8 *a = pIter->pLeaf->p;
1749   int iRowidOffset = 0;
1750 
1751   if( n>pIter->iEndofDoclist ){
1752     n = pIter->iEndofDoclist;
1753   }
1754 
1755   ASSERT_SZLEAF_OK(pIter->pLeaf);
1756   while( 1 ){
1757     i64 iDelta = 0;
1758 
1759     if( eDetail==FTS5_DETAIL_NONE ){
1760       /* todo */
1761       if( i<n && a[i]==0 ){
1762         i++;
1763         if( i<n && a[i]==0 ) i++;
1764       }
1765     }else{
1766       int nPos;
1767       int bDummy;
1768       i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
1769       i += nPos;
1770     }
1771     if( i>=n ) break;
1772     i += fts5GetVarint(&a[i], (u64*)&iDelta);
1773     pIter->iRowid += iDelta;
1774 
1775     /* If necessary, grow the pIter->aRowidOffset[] array. */
1776     if( iRowidOffset>=pIter->nRowidOffset ){
1777       int nNew = pIter->nRowidOffset + 8;
1778       int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
1779       if( aNew==0 ){
1780         p->rc = SQLITE_NOMEM;
1781         break;
1782       }
1783       pIter->aRowidOffset = aNew;
1784       pIter->nRowidOffset = nNew;
1785     }
1786 
1787     pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
1788     pIter->iLeafOffset = i;
1789   }
1790   pIter->iRowidOffset = iRowidOffset;
1791   fts5SegIterLoadNPos(p, pIter);
1792 }
1793 
1794 /*
1795 **
1796 */
fts5SegIterReverseNewPage(Fts5Index * p,Fts5SegIter * pIter)1797 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
1798   assert( pIter->flags & FTS5_SEGITER_REVERSE );
1799   assert( pIter->flags & FTS5_SEGITER_ONETERM );
1800 
1801   fts5DataRelease(pIter->pLeaf);
1802   pIter->pLeaf = 0;
1803   while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
1804     Fts5Data *pNew;
1805     pIter->iLeafPgno--;
1806     pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
1807           pIter->pSeg->iSegid, pIter->iLeafPgno
1808     ));
1809     if( pNew ){
1810       /* iTermLeafOffset may be equal to szLeaf if the term is the last
1811       ** thing on the page - i.e. the first rowid is on the following page.
1812       ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
1813       if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
1814         assert( pIter->pLeaf==0 );
1815         if( pIter->iTermLeafOffset<pNew->szLeaf ){
1816           pIter->pLeaf = pNew;
1817           pIter->iLeafOffset = pIter->iTermLeafOffset;
1818         }
1819       }else{
1820         int iRowidOff;
1821         iRowidOff = fts5LeafFirstRowidOff(pNew);
1822         if( iRowidOff ){
1823           pIter->pLeaf = pNew;
1824           pIter->iLeafOffset = iRowidOff;
1825         }
1826       }
1827 
1828       if( pIter->pLeaf ){
1829         u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
1830         pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
1831         break;
1832       }else{
1833         fts5DataRelease(pNew);
1834       }
1835     }
1836   }
1837 
1838   if( pIter->pLeaf ){
1839     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1840     fts5SegIterReverseInitPage(p, pIter);
1841   }
1842 }
1843 
1844 /*
1845 ** Return true if the iterator passed as the second argument currently
1846 ** points to a delete marker. A delete marker is an entry with a 0 byte
1847 ** position-list.
1848 */
fts5MultiIterIsEmpty(Fts5Index * p,Fts5Iter * pIter)1849 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
1850   Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
1851   return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
1852 }
1853 
1854 /*
1855 ** Advance iterator pIter to the next entry.
1856 **
1857 ** This version of fts5SegIterNext() is only used by reverse iterators.
1858 */
fts5SegIterNext_Reverse(Fts5Index * p,Fts5SegIter * pIter,int * pbUnused)1859 static void fts5SegIterNext_Reverse(
1860   Fts5Index *p,                   /* FTS5 backend object */
1861   Fts5SegIter *pIter,             /* Iterator to advance */
1862   int *pbUnused                   /* Unused */
1863 ){
1864   assert( pIter->flags & FTS5_SEGITER_REVERSE );
1865   assert( pIter->pNextLeaf==0 );
1866   UNUSED_PARAM(pbUnused);
1867 
1868   if( pIter->iRowidOffset>0 ){
1869     u8 *a = pIter->pLeaf->p;
1870     int iOff;
1871     i64 iDelta;
1872 
1873     pIter->iRowidOffset--;
1874     pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
1875     fts5SegIterLoadNPos(p, pIter);
1876     iOff = pIter->iLeafOffset;
1877     if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
1878       iOff += pIter->nPos;
1879     }
1880     fts5GetVarint(&a[iOff], (u64*)&iDelta);
1881     pIter->iRowid -= iDelta;
1882   }else{
1883     fts5SegIterReverseNewPage(p, pIter);
1884   }
1885 }
1886 
1887 /*
1888 ** Advance iterator pIter to the next entry.
1889 **
1890 ** This version of fts5SegIterNext() is only used if detail=none and the
1891 ** iterator is not a reverse direction iterator.
1892 */
fts5SegIterNext_None(Fts5Index * p,Fts5SegIter * pIter,int * pbNewTerm)1893 static void fts5SegIterNext_None(
1894   Fts5Index *p,                   /* FTS5 backend object */
1895   Fts5SegIter *pIter,             /* Iterator to advance */
1896   int *pbNewTerm                  /* OUT: Set for new term */
1897 ){
1898   int iOff;
1899 
1900   assert( p->rc==SQLITE_OK );
1901   assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
1902   assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
1903 
1904   ASSERT_SZLEAF_OK(pIter->pLeaf);
1905   iOff = pIter->iLeafOffset;
1906 
1907   /* Next entry is on the next page */
1908   if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
1909     fts5SegIterNextPage(p, pIter);
1910     if( p->rc || pIter->pLeaf==0 ) return;
1911     pIter->iRowid = 0;
1912     iOff = 4;
1913   }
1914 
1915   if( iOff<pIter->iEndofDoclist ){
1916     /* Next entry is on the current page */
1917     i64 iDelta;
1918     iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
1919     pIter->iLeafOffset = iOff;
1920     pIter->iRowid += iDelta;
1921   }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
1922     if( pIter->pSeg ){
1923       int nKeep = 0;
1924       if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
1925         iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
1926       }
1927       pIter->iLeafOffset = iOff;
1928       fts5SegIterLoadTerm(p, pIter, nKeep);
1929     }else{
1930       const u8 *pList = 0;
1931       const char *zTerm = 0;
1932       int nList;
1933       sqlite3Fts5HashScanNext(p->pHash);
1934       sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
1935       if( pList==0 ) goto next_none_eof;
1936       pIter->pLeaf->p = (u8*)pList;
1937       pIter->pLeaf->nn = nList;
1938       pIter->pLeaf->szLeaf = nList;
1939       pIter->iEndofDoclist = nList;
1940       sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
1941       pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
1942     }
1943 
1944     if( pbNewTerm ) *pbNewTerm = 1;
1945   }else{
1946     goto next_none_eof;
1947   }
1948 
1949   fts5SegIterLoadNPos(p, pIter);
1950 
1951   return;
1952  next_none_eof:
1953   fts5DataRelease(pIter->pLeaf);
1954   pIter->pLeaf = 0;
1955 }
1956 
1957 
1958 /*
1959 ** Advance iterator pIter to the next entry.
1960 **
1961 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
1962 ** is not considered an error if the iterator reaches EOF. If an error has
1963 ** already occurred when this function is called, it is a no-op.
1964 */
fts5SegIterNext(Fts5Index * p,Fts5SegIter * pIter,int * pbNewTerm)1965 static void fts5SegIterNext(
1966   Fts5Index *p,                   /* FTS5 backend object */
1967   Fts5SegIter *pIter,             /* Iterator to advance */
1968   int *pbNewTerm                  /* OUT: Set for new term */
1969 ){
1970   Fts5Data *pLeaf = pIter->pLeaf;
1971   int iOff;
1972   int bNewTerm = 0;
1973   int nKeep = 0;
1974   u8 *a;
1975   int n;
1976 
1977   assert( pbNewTerm==0 || *pbNewTerm==0 );
1978   assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
1979 
1980   /* Search for the end of the position list within the current page. */
1981   a = pLeaf->p;
1982   n = pLeaf->szLeaf;
1983 
1984   ASSERT_SZLEAF_OK(pLeaf);
1985   iOff = pIter->iLeafOffset + pIter->nPos;
1986 
1987   if( iOff<n ){
1988     /* The next entry is on the current page. */
1989     assert_nc( iOff<=pIter->iEndofDoclist );
1990     if( iOff>=pIter->iEndofDoclist ){
1991       bNewTerm = 1;
1992       if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
1993         iOff += fts5GetVarint32(&a[iOff], nKeep);
1994       }
1995     }else{
1996       u64 iDelta;
1997       iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
1998       pIter->iRowid += iDelta;
1999       assert_nc( iDelta>0 );
2000     }
2001     pIter->iLeafOffset = iOff;
2002 
2003   }else if( pIter->pSeg==0 ){
2004     const u8 *pList = 0;
2005     const char *zTerm = 0;
2006     int nList = 0;
2007     assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
2008     if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
2009       sqlite3Fts5HashScanNext(p->pHash);
2010       sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2011     }
2012     if( pList==0 ){
2013       fts5DataRelease(pIter->pLeaf);
2014       pIter->pLeaf = 0;
2015     }else{
2016       pIter->pLeaf->p = (u8*)pList;
2017       pIter->pLeaf->nn = nList;
2018       pIter->pLeaf->szLeaf = nList;
2019       pIter->iEndofDoclist = nList+1;
2020       sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
2021           (u8*)zTerm);
2022       pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2023       *pbNewTerm = 1;
2024     }
2025   }else{
2026     iOff = 0;
2027     /* Next entry is not on the current page */
2028     while( iOff==0 ){
2029       fts5SegIterNextPage(p, pIter);
2030       pLeaf = pIter->pLeaf;
2031       if( pLeaf==0 ) break;
2032       ASSERT_SZLEAF_OK(pLeaf);
2033       if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
2034         iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
2035         pIter->iLeafOffset = iOff;
2036 
2037         if( pLeaf->nn>pLeaf->szLeaf ){
2038           pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2039               &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
2040           );
2041         }
2042       }
2043       else if( pLeaf->nn>pLeaf->szLeaf ){
2044         pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2045             &pLeaf->p[pLeaf->szLeaf], iOff
2046         );
2047         pIter->iLeafOffset = iOff;
2048         pIter->iEndofDoclist = iOff;
2049         bNewTerm = 1;
2050       }
2051       assert_nc( iOff<pLeaf->szLeaf );
2052       if( iOff>pLeaf->szLeaf ){
2053         p->rc = FTS5_CORRUPT;
2054         return;
2055       }
2056     }
2057   }
2058 
2059   /* Check if the iterator is now at EOF. If so, return early. */
2060   if( pIter->pLeaf ){
2061     if( bNewTerm ){
2062       if( pIter->flags & FTS5_SEGITER_ONETERM ){
2063         fts5DataRelease(pIter->pLeaf);
2064         pIter->pLeaf = 0;
2065       }else{
2066         fts5SegIterLoadTerm(p, pIter, nKeep);
2067         fts5SegIterLoadNPos(p, pIter);
2068         if( pbNewTerm ) *pbNewTerm = 1;
2069       }
2070     }else{
2071       /* The following could be done by calling fts5SegIterLoadNPos(). But
2072       ** this block is particularly performance critical, so equivalent
2073       ** code is inlined.
2074       **
2075       ** Later: Switched back to fts5SegIterLoadNPos() because it supports
2076       ** detail=none mode. Not ideal.
2077       */
2078       int nSz;
2079       assert( p->rc==SQLITE_OK );
2080       assert( pIter->iLeafOffset<=pIter->pLeaf->nn );
2081       fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
2082       pIter->bDel = (nSz & 0x0001);
2083       pIter->nPos = nSz>>1;
2084       assert_nc( pIter->nPos>=0 );
2085     }
2086   }
2087 }
2088 
2089 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2090 
2091 #define fts5IndexSkipVarint(a, iOff) {            \
2092   int iEnd = iOff+9;                              \
2093   while( (a[iOff++] & 0x80) && iOff<iEnd );       \
2094 }
2095 
2096 /*
2097 ** Iterator pIter currently points to the first rowid in a doclist. This
2098 ** function sets the iterator up so that iterates in reverse order through
2099 ** the doclist.
2100 */
fts5SegIterReverse(Fts5Index * p,Fts5SegIter * pIter)2101 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
2102   Fts5DlidxIter *pDlidx = pIter->pDlidx;
2103   Fts5Data *pLast = 0;
2104   int pgnoLast = 0;
2105 
2106   if( pDlidx ){
2107     int iSegid = pIter->pSeg->iSegid;
2108     pgnoLast = fts5DlidxIterPgno(pDlidx);
2109     pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
2110   }else{
2111     Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */
2112 
2113     /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2114     ** position-list content for the current rowid. Back it up so that it
2115     ** points to the start of the position-list size field. */
2116     int iPoslist;
2117     if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
2118       iPoslist = pIter->iTermLeafOffset;
2119     }else{
2120       iPoslist = 4;
2121     }
2122     fts5IndexSkipVarint(pLeaf->p, iPoslist);
2123     pIter->iLeafOffset = iPoslist;
2124 
2125     /* If this condition is true then the largest rowid for the current
2126     ** term may not be stored on the current page. So search forward to
2127     ** see where said rowid really is.  */
2128     if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
2129       int pgno;
2130       Fts5StructureSegment *pSeg = pIter->pSeg;
2131 
2132       /* The last rowid in the doclist may not be on the current page. Search
2133       ** forward to find the page containing the last rowid.  */
2134       for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
2135         i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
2136         Fts5Data *pNew = fts5DataRead(p, iAbs);
2137         if( pNew ){
2138           int iRowid, bTermless;
2139           iRowid = fts5LeafFirstRowidOff(pNew);
2140           bTermless = fts5LeafIsTermless(pNew);
2141           if( iRowid ){
2142             SWAPVAL(Fts5Data*, pNew, pLast);
2143             pgnoLast = pgno;
2144           }
2145           fts5DataRelease(pNew);
2146           if( bTermless==0 ) break;
2147         }
2148       }
2149     }
2150   }
2151 
2152   /* If pLast is NULL at this point, then the last rowid for this doclist
2153   ** lies on the page currently indicated by the iterator. In this case
2154   ** pIter->iLeafOffset is already set to point to the position-list size
2155   ** field associated with the first relevant rowid on the page.
2156   **
2157   ** Or, if pLast is non-NULL, then it is the page that contains the last
2158   ** rowid. In this case configure the iterator so that it points to the
2159   ** first rowid on this page.
2160   */
2161   if( pLast ){
2162     int iOff;
2163     fts5DataRelease(pIter->pLeaf);
2164     pIter->pLeaf = pLast;
2165     pIter->iLeafPgno = pgnoLast;
2166     iOff = fts5LeafFirstRowidOff(pLast);
2167     iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
2168     pIter->iLeafOffset = iOff;
2169 
2170     if( fts5LeafIsTermless(pLast) ){
2171       pIter->iEndofDoclist = pLast->nn+1;
2172     }else{
2173       pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
2174     }
2175 
2176   }
2177 
2178   fts5SegIterReverseInitPage(p, pIter);
2179 }
2180 
2181 /*
2182 ** Iterator pIter currently points to the first rowid of a doclist.
2183 ** There is a doclist-index associated with the final term on the current
2184 ** page. If the current term is the last term on the page, load the
2185 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2186 */
fts5SegIterLoadDlidx(Fts5Index * p,Fts5SegIter * pIter)2187 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
2188   int iSeg = pIter->pSeg->iSegid;
2189   int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2190   Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2191 
2192   assert( pIter->flags & FTS5_SEGITER_ONETERM );
2193   assert( pIter->pDlidx==0 );
2194 
2195   /* Check if the current doclist ends on this page. If it does, return
2196   ** early without loading the doclist-index (as it belongs to a different
2197   ** term. */
2198   if( pIter->iTermLeafPgno==pIter->iLeafPgno
2199    && pIter->iEndofDoclist<pLeaf->szLeaf
2200   ){
2201     return;
2202   }
2203 
2204   pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
2205 }
2206 
2207 /*
2208 ** The iterator object passed as the second argument currently contains
2209 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2210 ** function searches the leaf page for a term matching (pTerm/nTerm).
2211 **
2212 ** If the specified term is found on the page, then the iterator is left
2213 ** pointing to it. If argument bGe is zero and the term is not found,
2214 ** the iterator is left pointing at EOF.
2215 **
2216 ** If bGe is non-zero and the specified term is not found, then the
2217 ** iterator is left pointing to the smallest term in the segment that
2218 ** is larger than the specified term, even if this term is not on the
2219 ** current page.
2220 */
fts5LeafSeek(Fts5Index * p,int bGe,Fts5SegIter * pIter,const u8 * pTerm,int nTerm)2221 static void fts5LeafSeek(
2222   Fts5Index *p,                   /* Leave any error code here */
2223   int bGe,                        /* True for a >= search */
2224   Fts5SegIter *pIter,             /* Iterator to seek */
2225   const u8 *pTerm, int nTerm      /* Term to search for */
2226 ){
2227   int iOff;
2228   const u8 *a = pIter->pLeaf->p;
2229   int szLeaf = pIter->pLeaf->szLeaf;
2230   int n = pIter->pLeaf->nn;
2231 
2232   u32 nMatch = 0;
2233   u32 nKeep = 0;
2234   u32 nNew = 0;
2235   u32 iTermOff;
2236   int iPgidx;                     /* Current offset in pgidx */
2237   int bEndOfPage = 0;
2238 
2239   assert( p->rc==SQLITE_OK );
2240 
2241   iPgidx = szLeaf;
2242   iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2243   iOff = iTermOff;
2244   if( iOff>n ){
2245     p->rc = FTS5_CORRUPT;
2246     return;
2247   }
2248 
2249   while( 1 ){
2250 
2251     /* Figure out how many new bytes are in this term */
2252     fts5FastGetVarint32(a, iOff, nNew);
2253     if( nKeep<nMatch ){
2254       goto search_failed;
2255     }
2256 
2257     assert( nKeep>=nMatch );
2258     if( nKeep==nMatch ){
2259       u32 nCmp;
2260       u32 i;
2261       nCmp = (u32)MIN(nNew, nTerm-nMatch);
2262       for(i=0; i<nCmp; i++){
2263         if( a[iOff+i]!=pTerm[nMatch+i] ) break;
2264       }
2265       nMatch += i;
2266 
2267       if( (u32)nTerm==nMatch ){
2268         if( i==nNew ){
2269           goto search_success;
2270         }else{
2271           goto search_failed;
2272         }
2273       }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
2274         goto search_failed;
2275       }
2276     }
2277 
2278     if( iPgidx>=n ){
2279       bEndOfPage = 1;
2280       break;
2281     }
2282 
2283     iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2284     iTermOff += nKeep;
2285     iOff = iTermOff;
2286 
2287     if( iOff>=n ){
2288       p->rc = FTS5_CORRUPT;
2289       return;
2290     }
2291 
2292     /* Read the nKeep field of the next term. */
2293     fts5FastGetVarint32(a, iOff, nKeep);
2294   }
2295 
2296  search_failed:
2297   if( bGe==0 ){
2298     fts5DataRelease(pIter->pLeaf);
2299     pIter->pLeaf = 0;
2300     return;
2301   }else if( bEndOfPage ){
2302     do {
2303       fts5SegIterNextPage(p, pIter);
2304       if( pIter->pLeaf==0 ) return;
2305       a = pIter->pLeaf->p;
2306       if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
2307         iPgidx = pIter->pLeaf->szLeaf;
2308         iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
2309         if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
2310           p->rc = FTS5_CORRUPT;
2311           return;
2312         }else{
2313           nKeep = 0;
2314           iTermOff = iOff;
2315           n = pIter->pLeaf->nn;
2316           iOff += fts5GetVarint32(&a[iOff], nNew);
2317           break;
2318         }
2319       }
2320     }while( 1 );
2321   }
2322 
2323  search_success:
2324   pIter->iLeafOffset = iOff + nNew;
2325   if( pIter->iLeafOffset>n || nNew<1 ){
2326     p->rc = FTS5_CORRUPT;
2327     return;
2328   }
2329   pIter->iTermLeafOffset = pIter->iLeafOffset;
2330   pIter->iTermLeafPgno = pIter->iLeafPgno;
2331 
2332   fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
2333   fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
2334 
2335   if( iPgidx>=n ){
2336     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2337   }else{
2338     int nExtra;
2339     iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2340     pIter->iEndofDoclist = iTermOff + nExtra;
2341   }
2342   pIter->iPgidxOff = iPgidx;
2343 
2344   fts5SegIterLoadRowid(p, pIter);
2345   fts5SegIterLoadNPos(p, pIter);
2346 }
2347 
fts5IdxSelectStmt(Fts5Index * p)2348 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
2349   if( p->pIdxSelect==0 ){
2350     Fts5Config *pConfig = p->pConfig;
2351     fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2352           "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2353           "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2354           pConfig->zDb, pConfig->zName
2355     ));
2356   }
2357   return p->pIdxSelect;
2358 }
2359 
2360 /*
2361 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2362 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2363 **
2364 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2365 ** an error has already occurred when this function is called, it is a no-op.
2366 */
fts5SegIterSeekInit(Fts5Index * p,const u8 * pTerm,int nTerm,int flags,Fts5StructureSegment * pSeg,Fts5SegIter * pIter)2367 static void fts5SegIterSeekInit(
2368   Fts5Index *p,                   /* FTS5 backend */
2369   const u8 *pTerm, int nTerm,     /* Term to seek to */
2370   int flags,                      /* Mask of FTS5INDEX_XXX flags */
2371   Fts5StructureSegment *pSeg,     /* Description of segment */
2372   Fts5SegIter *pIter              /* Object to populate */
2373 ){
2374   int iPg = 1;
2375   int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2376   int bDlidx = 0;                 /* True if there is a doclist-index */
2377   sqlite3_stmt *pIdxSelect = 0;
2378 
2379   assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2380   assert( pTerm && nTerm );
2381   memset(pIter, 0, sizeof(*pIter));
2382   pIter->pSeg = pSeg;
2383 
2384   /* This block sets stack variable iPg to the leaf page number that may
2385   ** contain term (pTerm/nTerm), if it is present in the segment. */
2386   pIdxSelect = fts5IdxSelectStmt(p);
2387   if( p->rc ) return;
2388   sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
2389   sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2390   if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
2391     i64 val = sqlite3_column_int(pIdxSelect, 0);
2392     iPg = (int)(val>>1);
2393     bDlidx = (val & 0x0001);
2394   }
2395   p->rc = sqlite3_reset(pIdxSelect);
2396   sqlite3_bind_null(pIdxSelect, 2);
2397 
2398   if( iPg<pSeg->pgnoFirst ){
2399     iPg = pSeg->pgnoFirst;
2400     bDlidx = 0;
2401   }
2402 
2403   pIter->iLeafPgno = iPg - 1;
2404   fts5SegIterNextPage(p, pIter);
2405 
2406   if( pIter->pLeaf ){
2407     fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2408   }
2409 
2410   if( p->rc==SQLITE_OK && bGe==0 ){
2411     pIter->flags |= FTS5_SEGITER_ONETERM;
2412     if( pIter->pLeaf ){
2413       if( flags & FTS5INDEX_QUERY_DESC ){
2414         pIter->flags |= FTS5_SEGITER_REVERSE;
2415       }
2416       if( bDlidx ){
2417         fts5SegIterLoadDlidx(p, pIter);
2418       }
2419       if( flags & FTS5INDEX_QUERY_DESC ){
2420         fts5SegIterReverse(p, pIter);
2421       }
2422     }
2423   }
2424 
2425   fts5SegIterSetNext(p, pIter);
2426 
2427   /* Either:
2428   **
2429   **   1) an error has occurred, or
2430   **   2) the iterator points to EOF, or
2431   **   3) the iterator points to an entry with term (pTerm/nTerm), or
2432   **   4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2433   **      to an entry with a term greater than or equal to (pTerm/nTerm).
2434   */
2435   assert_nc( p->rc!=SQLITE_OK                                       /* 1 */
2436    || pIter->pLeaf==0                                               /* 2 */
2437    || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0          /* 3 */
2438    || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0)  /* 4 */
2439   );
2440 }
2441 
2442 /*
2443 ** Initialize the object pIter to point to term pTerm/nTerm within the
2444 ** in-memory hash table. If there is no such term in the hash-table, the
2445 ** iterator is set to EOF.
2446 **
2447 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2448 ** an error has already occurred when this function is called, it is a no-op.
2449 */
fts5SegIterHashInit(Fts5Index * p,const u8 * pTerm,int nTerm,int flags,Fts5SegIter * pIter)2450 static void fts5SegIterHashInit(
2451   Fts5Index *p,                   /* FTS5 backend */
2452   const u8 *pTerm, int nTerm,     /* Term to seek to */
2453   int flags,                      /* Mask of FTS5INDEX_XXX flags */
2454   Fts5SegIter *pIter              /* Object to populate */
2455 ){
2456   int nList = 0;
2457   const u8 *z = 0;
2458   int n = 0;
2459   Fts5Data *pLeaf = 0;
2460 
2461   assert( p->pHash );
2462   assert( p->rc==SQLITE_OK );
2463 
2464   if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
2465     const u8 *pList = 0;
2466 
2467     p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
2468     sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
2469     n = (z ? (int)strlen((const char*)z) : 0);
2470     if( pList ){
2471       pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2472       if( pLeaf ){
2473         pLeaf->p = (u8*)pList;
2474       }
2475     }
2476   }else{
2477     p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
2478         (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
2479     );
2480     if( pLeaf ){
2481       pLeaf->p = (u8*)&pLeaf[1];
2482     }
2483     z = pTerm;
2484     n = nTerm;
2485     pIter->flags |= FTS5_SEGITER_ONETERM;
2486   }
2487 
2488   if( pLeaf ){
2489     sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2490     pLeaf->nn = pLeaf->szLeaf = nList;
2491     pIter->pLeaf = pLeaf;
2492     pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2493     pIter->iEndofDoclist = pLeaf->nn;
2494 
2495     if( flags & FTS5INDEX_QUERY_DESC ){
2496       pIter->flags |= FTS5_SEGITER_REVERSE;
2497       fts5SegIterReverseInitPage(p, pIter);
2498     }else{
2499       fts5SegIterLoadNPos(p, pIter);
2500     }
2501   }
2502 
2503   fts5SegIterSetNext(p, pIter);
2504 }
2505 
2506 /*
2507 ** Zero the iterator passed as the only argument.
2508 */
fts5SegIterClear(Fts5SegIter * pIter)2509 static void fts5SegIterClear(Fts5SegIter *pIter){
2510   fts5BufferFree(&pIter->term);
2511   fts5DataRelease(pIter->pLeaf);
2512   fts5DataRelease(pIter->pNextLeaf);
2513   fts5DlidxIterFree(pIter->pDlidx);
2514   sqlite3_free(pIter->aRowidOffset);
2515   memset(pIter, 0, sizeof(Fts5SegIter));
2516 }
2517 
2518 #ifdef SQLITE_DEBUG
2519 
2520 /*
2521 ** This function is used as part of the big assert() procedure implemented by
2522 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2523 ** in *pRes is the correct result of comparing the current positions of the
2524 ** two iterators.
2525 */
fts5AssertComparisonResult(Fts5Iter * pIter,Fts5SegIter * p1,Fts5SegIter * p2,Fts5CResult * pRes)2526 static void fts5AssertComparisonResult(
2527   Fts5Iter *pIter,
2528   Fts5SegIter *p1,
2529   Fts5SegIter *p2,
2530   Fts5CResult *pRes
2531 ){
2532   int i1 = p1 - pIter->aSeg;
2533   int i2 = p2 - pIter->aSeg;
2534 
2535   if( p1->pLeaf || p2->pLeaf ){
2536     if( p1->pLeaf==0 ){
2537       assert( pRes->iFirst==i2 );
2538     }else if( p2->pLeaf==0 ){
2539       assert( pRes->iFirst==i1 );
2540     }else{
2541       int nMin = MIN(p1->term.n, p2->term.n);
2542       int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
2543       if( res==0 ) res = p1->term.n - p2->term.n;
2544 
2545       if( res==0 ){
2546         assert( pRes->bTermEq==1 );
2547         assert( p1->iRowid!=p2->iRowid );
2548         res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
2549       }else{
2550         assert( pRes->bTermEq==0 );
2551       }
2552 
2553       if( res<0 ){
2554         assert( pRes->iFirst==i1 );
2555       }else{
2556         assert( pRes->iFirst==i2 );
2557       }
2558     }
2559   }
2560 }
2561 
2562 /*
2563 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2564 ** is compiled. In that case, this function is essentially an assert()
2565 ** statement used to verify that the contents of the pIter->aFirst[] array
2566 ** are correct.
2567 */
fts5AssertMultiIterSetup(Fts5Index * p,Fts5Iter * pIter)2568 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
2569   if( p->rc==SQLITE_OK ){
2570     Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2571     int i;
2572 
2573     assert( (pFirst->pLeaf==0)==pIter->base.bEof );
2574 
2575     /* Check that pIter->iSwitchRowid is set correctly. */
2576     for(i=0; i<pIter->nSeg; i++){
2577       Fts5SegIter *p1 = &pIter->aSeg[i];
2578       assert( p1==pFirst
2579            || p1->pLeaf==0
2580            || fts5BufferCompare(&pFirst->term, &p1->term)
2581            || p1->iRowid==pIter->iSwitchRowid
2582            || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2583       );
2584     }
2585 
2586     for(i=0; i<pIter->nSeg; i+=2){
2587       Fts5SegIter *p1 = &pIter->aSeg[i];
2588       Fts5SegIter *p2 = &pIter->aSeg[i+1];
2589       Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
2590       fts5AssertComparisonResult(pIter, p1, p2, pRes);
2591     }
2592 
2593     for(i=1; i<(pIter->nSeg / 2); i+=2){
2594       Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
2595       Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
2596       Fts5CResult *pRes = &pIter->aFirst[i];
2597       fts5AssertComparisonResult(pIter, p1, p2, pRes);
2598     }
2599   }
2600 }
2601 #else
2602 # define fts5AssertMultiIterSetup(x,y)
2603 #endif
2604 
2605 /*
2606 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2607 **
2608 ** If the returned value is non-zero, then it is the index of an entry
2609 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2610 ** to a key that is a duplicate of another, higher priority,
2611 ** segment-iterator in the pSeg->aSeg[] array.
2612 */
fts5MultiIterDoCompare(Fts5Iter * pIter,int iOut)2613 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
2614   int i1;                         /* Index of left-hand Fts5SegIter */
2615   int i2;                         /* Index of right-hand Fts5SegIter */
2616   int iRes;
2617   Fts5SegIter *p1;                /* Left-hand Fts5SegIter */
2618   Fts5SegIter *p2;                /* Right-hand Fts5SegIter */
2619   Fts5CResult *pRes = &pIter->aFirst[iOut];
2620 
2621   assert( iOut<pIter->nSeg && iOut>0 );
2622   assert( pIter->bRev==0 || pIter->bRev==1 );
2623 
2624   if( iOut>=(pIter->nSeg/2) ){
2625     i1 = (iOut - pIter->nSeg/2) * 2;
2626     i2 = i1 + 1;
2627   }else{
2628     i1 = pIter->aFirst[iOut*2].iFirst;
2629     i2 = pIter->aFirst[iOut*2+1].iFirst;
2630   }
2631   p1 = &pIter->aSeg[i1];
2632   p2 = &pIter->aSeg[i2];
2633 
2634   pRes->bTermEq = 0;
2635   if( p1->pLeaf==0 ){           /* If p1 is at EOF */
2636     iRes = i2;
2637   }else if( p2->pLeaf==0 ){     /* If p2 is at EOF */
2638     iRes = i1;
2639   }else{
2640     int res = fts5BufferCompare(&p1->term, &p2->term);
2641     if( res==0 ){
2642       assert_nc( i2>i1 );
2643       assert_nc( i2!=0 );
2644       pRes->bTermEq = 1;
2645       if( p1->iRowid==p2->iRowid ){
2646         p1->bDel = p2->bDel;
2647         return i2;
2648       }
2649       res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
2650     }
2651     assert( res!=0 );
2652     if( res<0 ){
2653       iRes = i1;
2654     }else{
2655       iRes = i2;
2656     }
2657   }
2658 
2659   pRes->iFirst = (u16)iRes;
2660   return 0;
2661 }
2662 
2663 /*
2664 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
2665 ** It is an error if leaf iLeafPgno does not exist or contains no rowids.
2666 */
fts5SegIterGotoPage(Fts5Index * p,Fts5SegIter * pIter,int iLeafPgno)2667 static void fts5SegIterGotoPage(
2668   Fts5Index *p,                   /* FTS5 backend object */
2669   Fts5SegIter *pIter,             /* Iterator to advance */
2670   int iLeafPgno
2671 ){
2672   assert( iLeafPgno>pIter->iLeafPgno );
2673 
2674   if( iLeafPgno>pIter->pSeg->pgnoLast ){
2675     p->rc = FTS5_CORRUPT;
2676   }else{
2677     fts5DataRelease(pIter->pNextLeaf);
2678     pIter->pNextLeaf = 0;
2679     pIter->iLeafPgno = iLeafPgno-1;
2680     fts5SegIterNextPage(p, pIter);
2681     assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
2682 
2683     if( p->rc==SQLITE_OK ){
2684       int iOff;
2685       u8 *a = pIter->pLeaf->p;
2686       int n = pIter->pLeaf->szLeaf;
2687 
2688       iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
2689       if( iOff<4 || iOff>=n ){
2690         p->rc = FTS5_CORRUPT;
2691       }else{
2692         iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
2693         pIter->iLeafOffset = iOff;
2694         fts5SegIterLoadNPos(p, pIter);
2695       }
2696     }
2697   }
2698 }
2699 
2700 /*
2701 ** Advance the iterator passed as the second argument until it is at or
2702 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2703 ** always advanced at least once.
2704 */
fts5SegIterNextFrom(Fts5Index * p,Fts5SegIter * pIter,i64 iMatch)2705 static void fts5SegIterNextFrom(
2706   Fts5Index *p,                   /* FTS5 backend object */
2707   Fts5SegIter *pIter,             /* Iterator to advance */
2708   i64 iMatch                      /* Advance iterator at least this far */
2709 ){
2710   int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2711   Fts5DlidxIter *pDlidx = pIter->pDlidx;
2712   int iLeafPgno = pIter->iLeafPgno;
2713   int bMove = 1;
2714 
2715   assert( pIter->flags & FTS5_SEGITER_ONETERM );
2716   assert( pIter->pDlidx );
2717   assert( pIter->pLeaf );
2718 
2719   if( bRev==0 ){
2720     while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
2721       iLeafPgno = fts5DlidxIterPgno(pDlidx);
2722       fts5DlidxIterNext(p, pDlidx);
2723     }
2724     assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
2725     if( iLeafPgno>pIter->iLeafPgno ){
2726       fts5SegIterGotoPage(p, pIter, iLeafPgno);
2727       bMove = 0;
2728     }
2729   }else{
2730     assert( pIter->pNextLeaf==0 );
2731     assert( iMatch<pIter->iRowid );
2732     while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
2733       fts5DlidxIterPrev(p, pDlidx);
2734     }
2735     iLeafPgno = fts5DlidxIterPgno(pDlidx);
2736 
2737     assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
2738 
2739     if( iLeafPgno<pIter->iLeafPgno ){
2740       pIter->iLeafPgno = iLeafPgno+1;
2741       fts5SegIterReverseNewPage(p, pIter);
2742       bMove = 0;
2743     }
2744   }
2745 
2746   do{
2747     if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
2748     if( pIter->pLeaf==0 ) break;
2749     if( bRev==0 && pIter->iRowid>=iMatch ) break;
2750     if( bRev!=0 && pIter->iRowid<=iMatch ) break;
2751     bMove = 1;
2752   }while( p->rc==SQLITE_OK );
2753 }
2754 
2755 
2756 /*
2757 ** Free the iterator object passed as the second argument.
2758 */
fts5MultiIterFree(Fts5Iter * pIter)2759 static void fts5MultiIterFree(Fts5Iter *pIter){
2760   if( pIter ){
2761     int i;
2762     for(i=0; i<pIter->nSeg; i++){
2763       fts5SegIterClear(&pIter->aSeg[i]);
2764     }
2765     fts5BufferFree(&pIter->poslist);
2766     sqlite3_free(pIter);
2767   }
2768 }
2769 
fts5MultiIterAdvanced(Fts5Index * p,Fts5Iter * pIter,int iChanged,int iMinset)2770 static void fts5MultiIterAdvanced(
2771   Fts5Index *p,                   /* FTS5 backend to iterate within */
2772   Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
2773   int iChanged,                   /* Index of sub-iterator just advanced */
2774   int iMinset                     /* Minimum entry in aFirst[] to set */
2775 ){
2776   int i;
2777   for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
2778     int iEq;
2779     if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
2780       Fts5SegIter *pSeg = &pIter->aSeg[iEq];
2781       assert( p->rc==SQLITE_OK );
2782       pSeg->xNext(p, pSeg, 0);
2783       i = pIter->nSeg + iEq;
2784     }
2785   }
2786 }
2787 
2788 /*
2789 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
2790 ** points to the same term though - just a different rowid. This function
2791 ** attempts to update the contents of the pIter->aFirst[] accordingly.
2792 ** If it does so successfully, 0 is returned. Otherwise 1.
2793 **
2794 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
2795 ** on the iterator instead. That function does the same as this one, except
2796 ** that it deals with more complicated cases as well.
2797 */
fts5MultiIterAdvanceRowid(Fts5Iter * pIter,int iChanged,Fts5SegIter ** ppFirst)2798 static int fts5MultiIterAdvanceRowid(
2799   Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
2800   int iChanged,                   /* Index of sub-iterator just advanced */
2801   Fts5SegIter **ppFirst
2802 ){
2803   Fts5SegIter *pNew = &pIter->aSeg[iChanged];
2804 
2805   if( pNew->iRowid==pIter->iSwitchRowid
2806    || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
2807   ){
2808     int i;
2809     Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
2810     pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
2811     for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
2812       Fts5CResult *pRes = &pIter->aFirst[i];
2813 
2814       assert( pNew->pLeaf );
2815       assert( pRes->bTermEq==0 || pOther->pLeaf );
2816 
2817       if( pRes->bTermEq ){
2818         if( pNew->iRowid==pOther->iRowid ){
2819           return 1;
2820         }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
2821           pIter->iSwitchRowid = pOther->iRowid;
2822           pNew = pOther;
2823         }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
2824           pIter->iSwitchRowid = pOther->iRowid;
2825         }
2826       }
2827       pRes->iFirst = (u16)(pNew - pIter->aSeg);
2828       if( i==1 ) break;
2829 
2830       pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
2831     }
2832   }
2833 
2834   *ppFirst = pNew;
2835   return 0;
2836 }
2837 
2838 /*
2839 ** Set the pIter->bEof variable based on the state of the sub-iterators.
2840 */
fts5MultiIterSetEof(Fts5Iter * pIter)2841 static void fts5MultiIterSetEof(Fts5Iter *pIter){
2842   Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2843   pIter->base.bEof = pSeg->pLeaf==0;
2844   pIter->iSwitchRowid = pSeg->iRowid;
2845 }
2846 
2847 /*
2848 ** Move the iterator to the next entry.
2849 **
2850 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
2851 ** considered an error if the iterator reaches EOF, or if it is already at
2852 ** EOF when this function is called.
2853 */
fts5MultiIterNext(Fts5Index * p,Fts5Iter * pIter,int bFrom,i64 iFrom)2854 static void fts5MultiIterNext(
2855   Fts5Index *p,
2856   Fts5Iter *pIter,
2857   int bFrom,                      /* True if argument iFrom is valid */
2858   i64 iFrom                       /* Advance at least as far as this */
2859 ){
2860   int bUseFrom = bFrom;
2861   assert( pIter->base.bEof==0 );
2862   while( p->rc==SQLITE_OK ){
2863     int iFirst = pIter->aFirst[1].iFirst;
2864     int bNewTerm = 0;
2865     Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2866     assert( p->rc==SQLITE_OK );
2867     if( bUseFrom && pSeg->pDlidx ){
2868       fts5SegIterNextFrom(p, pSeg, iFrom);
2869     }else{
2870       pSeg->xNext(p, pSeg, &bNewTerm);
2871     }
2872 
2873     if( pSeg->pLeaf==0 || bNewTerm
2874      || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2875     ){
2876       fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2877       fts5MultiIterSetEof(pIter);
2878       pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
2879       if( pSeg->pLeaf==0 ) return;
2880     }
2881 
2882     fts5AssertMultiIterSetup(p, pIter);
2883     assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
2884     if( pIter->bSkipEmpty==0 || pSeg->nPos ){
2885       pIter->xSetOutputs(pIter, pSeg);
2886       return;
2887     }
2888     bUseFrom = 0;
2889   }
2890 }
2891 
fts5MultiIterNext2(Fts5Index * p,Fts5Iter * pIter,int * pbNewTerm)2892 static void fts5MultiIterNext2(
2893   Fts5Index *p,
2894   Fts5Iter *pIter,
2895   int *pbNewTerm                  /* OUT: True if *might* be new term */
2896 ){
2897   assert( pIter->bSkipEmpty );
2898   if( p->rc==SQLITE_OK ){
2899     *pbNewTerm = 0;
2900     do{
2901       int iFirst = pIter->aFirst[1].iFirst;
2902       Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2903       int bNewTerm = 0;
2904 
2905       assert( p->rc==SQLITE_OK );
2906       pSeg->xNext(p, pSeg, &bNewTerm);
2907       if( pSeg->pLeaf==0 || bNewTerm
2908        || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2909       ){
2910         fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2911         fts5MultiIterSetEof(pIter);
2912         *pbNewTerm = 1;
2913       }
2914       fts5AssertMultiIterSetup(p, pIter);
2915 
2916     }while( fts5MultiIterIsEmpty(p, pIter) );
2917   }
2918 }
2919 
fts5IterSetOutputs_Noop(Fts5Iter * pUnused1,Fts5SegIter * pUnused2)2920 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
2921   UNUSED_PARAM2(pUnused1, pUnused2);
2922 }
2923 
fts5MultiIterAlloc(Fts5Index * p,int nSeg)2924 static Fts5Iter *fts5MultiIterAlloc(
2925   Fts5Index *p,                   /* FTS5 backend to iterate within */
2926   int nSeg
2927 ){
2928   Fts5Iter *pNew;
2929   int nSlot;                      /* Power of two >= nSeg */
2930 
2931   for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
2932   pNew = fts5IdxMalloc(p,
2933       sizeof(Fts5Iter) +                  /* pNew */
2934       sizeof(Fts5SegIter) * (nSlot-1) +   /* pNew->aSeg[] */
2935       sizeof(Fts5CResult) * nSlot         /* pNew->aFirst[] */
2936   );
2937   if( pNew ){
2938     pNew->nSeg = nSlot;
2939     pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
2940     pNew->pIndex = p;
2941     pNew->xSetOutputs = fts5IterSetOutputs_Noop;
2942   }
2943   return pNew;
2944 }
2945 
fts5PoslistCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)2946 static void fts5PoslistCallback(
2947   Fts5Index *pUnused,
2948   void *pContext,
2949   const u8 *pChunk, int nChunk
2950 ){
2951   UNUSED_PARAM(pUnused);
2952   assert_nc( nChunk>=0 );
2953   if( nChunk>0 ){
2954     fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
2955   }
2956 }
2957 
2958 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
2959 struct PoslistCallbackCtx {
2960   Fts5Buffer *pBuf;               /* Append to this buffer */
2961   Fts5Colset *pColset;            /* Restrict matches to this column */
2962   int eState;                     /* See above */
2963 };
2964 
2965 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
2966 struct PoslistOffsetsCtx {
2967   Fts5Buffer *pBuf;               /* Append to this buffer */
2968   Fts5Colset *pColset;            /* Restrict matches to this column */
2969   int iRead;
2970   int iWrite;
2971 };
2972 
2973 /*
2974 ** TODO: Make this more efficient!
2975 */
fts5IndexColsetTest(Fts5Colset * pColset,int iCol)2976 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
2977   int i;
2978   for(i=0; i<pColset->nCol; i++){
2979     if( pColset->aiCol[i]==iCol ) return 1;
2980   }
2981   return 0;
2982 }
2983 
fts5PoslistOffsetsCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)2984 static void fts5PoslistOffsetsCallback(
2985   Fts5Index *pUnused,
2986   void *pContext,
2987   const u8 *pChunk, int nChunk
2988 ){
2989   PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
2990   UNUSED_PARAM(pUnused);
2991   assert_nc( nChunk>=0 );
2992   if( nChunk>0 ){
2993     int i = 0;
2994     while( i<nChunk ){
2995       int iVal;
2996       i += fts5GetVarint32(&pChunk[i], iVal);
2997       iVal += pCtx->iRead - 2;
2998       pCtx->iRead = iVal;
2999       if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
3000         fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
3001         pCtx->iWrite = iVal;
3002       }
3003     }
3004   }
3005 }
3006 
fts5PoslistFilterCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)3007 static void fts5PoslistFilterCallback(
3008   Fts5Index *pUnused,
3009   void *pContext,
3010   const u8 *pChunk, int nChunk
3011 ){
3012   PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
3013   UNUSED_PARAM(pUnused);
3014   assert_nc( nChunk>=0 );
3015   if( nChunk>0 ){
3016     /* Search through to find the first varint with value 1. This is the
3017     ** start of the next columns hits. */
3018     int i = 0;
3019     int iStart = 0;
3020 
3021     if( pCtx->eState==2 ){
3022       int iCol;
3023       fts5FastGetVarint32(pChunk, i, iCol);
3024       if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
3025         pCtx->eState = 1;
3026         fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
3027       }else{
3028         pCtx->eState = 0;
3029       }
3030     }
3031 
3032     do {
3033       while( i<nChunk && pChunk[i]!=0x01 ){
3034         while( pChunk[i] & 0x80 ) i++;
3035         i++;
3036       }
3037       if( pCtx->eState ){
3038         fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3039       }
3040       if( i<nChunk ){
3041         int iCol;
3042         iStart = i;
3043         i++;
3044         if( i>=nChunk ){
3045           pCtx->eState = 2;
3046         }else{
3047           fts5FastGetVarint32(pChunk, i, iCol);
3048           pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
3049           if( pCtx->eState ){
3050             fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3051             iStart = i;
3052           }
3053         }
3054       }
3055     }while( i<nChunk );
3056   }
3057 }
3058 
fts5ChunkIterate(Fts5Index * p,Fts5SegIter * pSeg,void * pCtx,void (* xChunk)(Fts5Index *,void *,const u8 *,int))3059 static void fts5ChunkIterate(
3060   Fts5Index *p,                   /* Index object */
3061   Fts5SegIter *pSeg,              /* Poslist of this iterator */
3062   void *pCtx,                     /* Context pointer for xChunk callback */
3063   void (*xChunk)(Fts5Index*, void*, const u8*, int)
3064 ){
3065   int nRem = pSeg->nPos;          /* Number of bytes still to come */
3066   Fts5Data *pData = 0;
3067   u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3068   int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
3069   int pgno = pSeg->iLeafPgno;
3070   int pgnoSave = 0;
3071 
3072   /* This function does notmwork with detail=none databases. */
3073   assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
3074 
3075   if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
3076     pgnoSave = pgno+1;
3077   }
3078 
3079   while( 1 ){
3080     xChunk(p, pCtx, pChunk, nChunk);
3081     nRem -= nChunk;
3082     fts5DataRelease(pData);
3083     if( nRem<=0 ){
3084       break;
3085     }else{
3086       pgno++;
3087       pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
3088       if( pData==0 ) break;
3089       pChunk = &pData->p[4];
3090       nChunk = MIN(nRem, pData->szLeaf - 4);
3091       if( pgno==pgnoSave ){
3092         assert( pSeg->pNextLeaf==0 );
3093         pSeg->pNextLeaf = pData;
3094         pData = 0;
3095       }
3096     }
3097   }
3098 }
3099 
3100 /*
3101 ** Iterator pIter currently points to a valid entry (not EOF). This
3102 ** function appends the position list data for the current entry to
3103 ** buffer pBuf. It does not make a copy of the position-list size
3104 ** field.
3105 */
fts5SegiterPoslist(Fts5Index * p,Fts5SegIter * pSeg,Fts5Colset * pColset,Fts5Buffer * pBuf)3106 static void fts5SegiterPoslist(
3107   Fts5Index *p,
3108   Fts5SegIter *pSeg,
3109   Fts5Colset *pColset,
3110   Fts5Buffer *pBuf
3111 ){
3112   if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
3113     memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
3114     if( pColset==0 ){
3115       fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
3116     }else{
3117       if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
3118         PoslistCallbackCtx sCtx;
3119         sCtx.pBuf = pBuf;
3120         sCtx.pColset = pColset;
3121         sCtx.eState = fts5IndexColsetTest(pColset, 0);
3122         assert( sCtx.eState==0 || sCtx.eState==1 );
3123         fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
3124       }else{
3125         PoslistOffsetsCtx sCtx;
3126         memset(&sCtx, 0, sizeof(sCtx));
3127         sCtx.pBuf = pBuf;
3128         sCtx.pColset = pColset;
3129         fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
3130       }
3131     }
3132   }
3133 }
3134 
3135 /*
3136 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If
3137 ** the position list contains entries for column iCol, then (*pa) is set
3138 ** to point to the sub-position-list for that column and the number of
3139 ** bytes in it returned. Or, if the argument position list does not
3140 ** contain any entries for column iCol, return 0.
3141 */
fts5IndexExtractCol(const u8 ** pa,int n,int iCol)3142 static int fts5IndexExtractCol(
3143   const u8 **pa,                  /* IN/OUT: Pointer to poslist */
3144   int n,                          /* IN: Size of poslist in bytes */
3145   int iCol                        /* Column to extract from poslist */
3146 ){
3147   int iCurrent = 0;               /* Anything before the first 0x01 is col 0 */
3148   const u8 *p = *pa;
3149   const u8 *pEnd = &p[n];         /* One byte past end of position list */
3150 
3151   while( iCol>iCurrent ){
3152     /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3153     ** not part of a varint. Note that it is not possible for a negative
3154     ** or extremely large varint to occur within an uncorrupted position
3155     ** list. So the last byte of each varint may be assumed to have a clear
3156     ** 0x80 bit.  */
3157     while( *p!=0x01 ){
3158       while( *p++ & 0x80 );
3159       if( p>=pEnd ) return 0;
3160     }
3161     *pa = p++;
3162     iCurrent = *p++;
3163     if( iCurrent & 0x80 ){
3164       p--;
3165       p += fts5GetVarint32(p, iCurrent);
3166     }
3167   }
3168   if( iCol!=iCurrent ) return 0;
3169 
3170   /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3171   ** not part of a varint */
3172   while( p<pEnd && *p!=0x01 ){
3173     while( *p++ & 0x80 );
3174   }
3175 
3176   return p - (*pa);
3177 }
3178 
fts5IndexExtractColset(int * pRc,Fts5Colset * pColset,const u8 * pPos,int nPos,Fts5Buffer * pBuf)3179 static void fts5IndexExtractColset(
3180   int *pRc,
3181   Fts5Colset *pColset,            /* Colset to filter on */
3182   const u8 *pPos, int nPos,       /* Position list */
3183   Fts5Buffer *pBuf                /* Output buffer */
3184 ){
3185   if( *pRc==SQLITE_OK ){
3186     int i;
3187     fts5BufferZero(pBuf);
3188     for(i=0; i<pColset->nCol; i++){
3189       const u8 *pSub = pPos;
3190       int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
3191       if( nSub ){
3192         fts5BufferAppendBlob(pRc, pBuf, nSub, pSub);
3193       }
3194     }
3195   }
3196 }
3197 
3198 /*
3199 ** xSetOutputs callback used by detail=none tables.
3200 */
fts5IterSetOutputs_None(Fts5Iter * pIter,Fts5SegIter * pSeg)3201 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
3202   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
3203   pIter->base.iRowid = pSeg->iRowid;
3204   pIter->base.nData = pSeg->nPos;
3205 }
3206 
3207 /*
3208 ** xSetOutputs callback used by detail=full and detail=col tables when no
3209 ** column filters are specified.
3210 */
fts5IterSetOutputs_Nocolset(Fts5Iter * pIter,Fts5SegIter * pSeg)3211 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3212   pIter->base.iRowid = pSeg->iRowid;
3213   pIter->base.nData = pSeg->nPos;
3214 
3215   assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
3216   assert( pIter->pColset==0 );
3217 
3218   if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3219     /* All data is stored on the current page. Populate the output
3220     ** variables to point into the body of the page object. */
3221     pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3222   }else{
3223     /* The data is distributed over two or more pages. Copy it into the
3224     ** Fts5Iter.poslist buffer and then set the output pointer to point
3225     ** to this buffer.  */
3226     fts5BufferZero(&pIter->poslist);
3227     fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
3228     pIter->base.pData = pIter->poslist.p;
3229   }
3230 }
3231 
3232 /*
3233 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3234 ** against no columns at all).
3235 */
fts5IterSetOutputs_ZeroColset(Fts5Iter * pIter,Fts5SegIter * pSeg)3236 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3237   UNUSED_PARAM(pSeg);
3238   pIter->base.nData = 0;
3239 }
3240 
3241 /*
3242 ** xSetOutputs callback used by detail=col when there is a column filter
3243 ** and there are 100 or more columns. Also called as a fallback from
3244 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3245 */
fts5IterSetOutputs_Col(Fts5Iter * pIter,Fts5SegIter * pSeg)3246 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
3247   fts5BufferZero(&pIter->poslist);
3248   fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
3249   pIter->base.iRowid = pSeg->iRowid;
3250   pIter->base.pData = pIter->poslist.p;
3251   pIter->base.nData = pIter->poslist.n;
3252 }
3253 
3254 /*
3255 ** xSetOutputs callback used when:
3256 **
3257 **   * detail=col,
3258 **   * there is a column filter, and
3259 **   * the table contains 100 or fewer columns.
3260 **
3261 ** The last point is to ensure all column numbers are stored as
3262 ** single-byte varints.
3263 */
fts5IterSetOutputs_Col100(Fts5Iter * pIter,Fts5SegIter * pSeg)3264 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
3265 
3266   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3267   assert( pIter->pColset );
3268 
3269   if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
3270     fts5IterSetOutputs_Col(pIter, pSeg);
3271   }else{
3272     u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
3273     u8 *pEnd = (u8*)&a[pSeg->nPos];
3274     int iPrev = 0;
3275     int *aiCol = pIter->pColset->aiCol;
3276     int *aiColEnd = &aiCol[pIter->pColset->nCol];
3277 
3278     u8 *aOut = pIter->poslist.p;
3279     int iPrevOut = 0;
3280 
3281     pIter->base.iRowid = pSeg->iRowid;
3282 
3283     while( a<pEnd ){
3284       iPrev += (int)a++[0] - 2;
3285       while( *aiCol<iPrev ){
3286         aiCol++;
3287         if( aiCol==aiColEnd ) goto setoutputs_col_out;
3288       }
3289       if( *aiCol==iPrev ){
3290         *aOut++ = (u8)((iPrev - iPrevOut) + 2);
3291         iPrevOut = iPrev;
3292       }
3293     }
3294 
3295 setoutputs_col_out:
3296     pIter->base.pData = pIter->poslist.p;
3297     pIter->base.nData = aOut - pIter->poslist.p;
3298   }
3299 }
3300 
3301 /*
3302 ** xSetOutputs callback used by detail=full when there is a column filter.
3303 */
fts5IterSetOutputs_Full(Fts5Iter * pIter,Fts5SegIter * pSeg)3304 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
3305   Fts5Colset *pColset = pIter->pColset;
3306   pIter->base.iRowid = pSeg->iRowid;
3307 
3308   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
3309   assert( pColset );
3310 
3311   if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3312     /* All data is stored on the current page. Populate the output
3313     ** variables to point into the body of the page object. */
3314     const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3315     if( pColset->nCol==1 ){
3316       pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]);
3317       pIter->base.pData = a;
3318     }else{
3319       int *pRc = &pIter->pIndex->rc;
3320       fts5BufferZero(&pIter->poslist);
3321       fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, &pIter->poslist);
3322       pIter->base.pData = pIter->poslist.p;
3323       pIter->base.nData = pIter->poslist.n;
3324     }
3325   }else{
3326     /* The data is distributed over two or more pages. Copy it into the
3327     ** Fts5Iter.poslist buffer and then set the output pointer to point
3328     ** to this buffer.  */
3329     fts5BufferZero(&pIter->poslist);
3330     fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
3331     pIter->base.pData = pIter->poslist.p;
3332     pIter->base.nData = pIter->poslist.n;
3333   }
3334 }
3335 
fts5IterSetOutputCb(int * pRc,Fts5Iter * pIter)3336 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
3337   if( *pRc==SQLITE_OK ){
3338     Fts5Config *pConfig = pIter->pIndex->pConfig;
3339     if( pConfig->eDetail==FTS5_DETAIL_NONE ){
3340       pIter->xSetOutputs = fts5IterSetOutputs_None;
3341     }
3342 
3343     else if( pIter->pColset==0 ){
3344       pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
3345     }
3346 
3347     else if( pIter->pColset->nCol==0 ){
3348       pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
3349     }
3350 
3351     else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
3352       pIter->xSetOutputs = fts5IterSetOutputs_Full;
3353     }
3354 
3355     else{
3356       assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3357       if( pConfig->nCol<=100 ){
3358         pIter->xSetOutputs = fts5IterSetOutputs_Col100;
3359         sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
3360       }else{
3361         pIter->xSetOutputs = fts5IterSetOutputs_Col;
3362       }
3363     }
3364   }
3365 }
3366 
3367 
3368 /*
3369 ** Allocate a new Fts5Iter object.
3370 **
3371 ** The new object will be used to iterate through data in structure pStruct.
3372 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3373 ** is zero or greater, data from the first nSegment segments on level iLevel
3374 ** is merged.
3375 **
3376 ** The iterator initially points to the first term/rowid entry in the
3377 ** iterated data.
3378 */
fts5MultiIterNew(Fts5Index * p,Fts5Structure * pStruct,int flags,Fts5Colset * pColset,const u8 * pTerm,int nTerm,int iLevel,int nSegment,Fts5Iter ** ppOut)3379 static void fts5MultiIterNew(
3380   Fts5Index *p,                   /* FTS5 backend to iterate within */
3381   Fts5Structure *pStruct,         /* Structure of specific index */
3382   int flags,                      /* FTS5INDEX_QUERY_XXX flags */
3383   Fts5Colset *pColset,            /* Colset to filter on (or NULL) */
3384   const u8 *pTerm, int nTerm,     /* Term to seek to (or NULL/0) */
3385   int iLevel,                     /* Level to iterate (-1 for all) */
3386   int nSegment,                   /* Number of segments to merge (iLevel>=0) */
3387   Fts5Iter **ppOut                /* New object */
3388 ){
3389   int nSeg = 0;                   /* Number of segment-iters in use */
3390   int iIter = 0;                  /* */
3391   int iSeg;                       /* Used to iterate through segments */
3392   Fts5StructureLevel *pLvl;
3393   Fts5Iter *pNew;
3394 
3395   assert( (pTerm==0 && nTerm==0) || iLevel<0 );
3396 
3397   /* Allocate space for the new multi-seg-iterator. */
3398   if( p->rc==SQLITE_OK ){
3399     if( iLevel<0 ){
3400       assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3401       nSeg = pStruct->nSegment;
3402       nSeg += (p->pHash ? 1 : 0);
3403     }else{
3404       nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
3405     }
3406   }
3407   *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
3408   if( pNew==0 ) return;
3409   pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
3410   pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
3411   pNew->pColset = pColset;
3412   if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
3413     fts5IterSetOutputCb(&p->rc, pNew);
3414   }
3415 
3416   /* Initialize each of the component segment iterators. */
3417   if( p->rc==SQLITE_OK ){
3418     if( iLevel<0 ){
3419       Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
3420       if( p->pHash ){
3421         /* Add a segment iterator for the current contents of the hash table. */
3422         Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3423         fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
3424       }
3425       for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
3426         for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
3427           Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
3428           Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3429           if( pTerm==0 ){
3430             fts5SegIterInit(p, pSeg, pIter);
3431           }else{
3432             fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
3433           }
3434         }
3435       }
3436     }else{
3437       pLvl = &pStruct->aLevel[iLevel];
3438       for(iSeg=nSeg-1; iSeg>=0; iSeg--){
3439         fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
3440       }
3441     }
3442     assert( iIter==nSeg );
3443   }
3444 
3445   /* If the above was successful, each component iterators now points
3446   ** to the first entry in its segment. In this case initialize the
3447   ** aFirst[] array. Or, if an error has occurred, free the iterator
3448   ** object and set the output variable to NULL.  */
3449   if( p->rc==SQLITE_OK ){
3450     for(iIter=pNew->nSeg-1; iIter>0; iIter--){
3451       int iEq;
3452       if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
3453         Fts5SegIter *pSeg = &pNew->aSeg[iEq];
3454         if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
3455         fts5MultiIterAdvanced(p, pNew, iEq, iIter);
3456       }
3457     }
3458     fts5MultiIterSetEof(pNew);
3459     fts5AssertMultiIterSetup(p, pNew);
3460 
3461     if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
3462       fts5MultiIterNext(p, pNew, 0, 0);
3463     }else if( pNew->base.bEof==0 ){
3464       Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
3465       pNew->xSetOutputs(pNew, pSeg);
3466     }
3467 
3468   }else{
3469     fts5MultiIterFree(pNew);
3470     *ppOut = 0;
3471   }
3472 }
3473 
3474 /*
3475 ** Create an Fts5Iter that iterates through the doclist provided
3476 ** as the second argument.
3477 */
fts5MultiIterNew2(Fts5Index * p,Fts5Data * pData,int bDesc,Fts5Iter ** ppOut)3478 static void fts5MultiIterNew2(
3479   Fts5Index *p,                   /* FTS5 backend to iterate within */
3480   Fts5Data *pData,                /* Doclist to iterate through */
3481   int bDesc,                      /* True for descending rowid order */
3482   Fts5Iter **ppOut                /* New object */
3483 ){
3484   Fts5Iter *pNew;
3485   pNew = fts5MultiIterAlloc(p, 2);
3486   if( pNew ){
3487     Fts5SegIter *pIter = &pNew->aSeg[1];
3488 
3489     pIter->flags = FTS5_SEGITER_ONETERM;
3490     if( pData->szLeaf>0 ){
3491       pIter->pLeaf = pData;
3492       pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
3493       pIter->iEndofDoclist = pData->nn;
3494       pNew->aFirst[1].iFirst = 1;
3495       if( bDesc ){
3496         pNew->bRev = 1;
3497         pIter->flags |= FTS5_SEGITER_REVERSE;
3498         fts5SegIterReverseInitPage(p, pIter);
3499       }else{
3500         fts5SegIterLoadNPos(p, pIter);
3501       }
3502       pData = 0;
3503     }else{
3504       pNew->base.bEof = 1;
3505     }
3506     fts5SegIterSetNext(p, pIter);
3507 
3508     *ppOut = pNew;
3509   }
3510 
3511   fts5DataRelease(pData);
3512 }
3513 
3514 /*
3515 ** Return true if the iterator is at EOF or if an error has occurred.
3516 ** False otherwise.
3517 */
fts5MultiIterEof(Fts5Index * p,Fts5Iter * pIter)3518 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
3519   assert( p->rc
3520       || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
3521   );
3522   return (p->rc || pIter->base.bEof);
3523 }
3524 
3525 /*
3526 ** Return the rowid of the entry that the iterator currently points
3527 ** to. If the iterator points to EOF when this function is called the
3528 ** results are undefined.
3529 */
fts5MultiIterRowid(Fts5Iter * pIter)3530 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
3531   assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
3532   return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
3533 }
3534 
3535 /*
3536 ** Move the iterator to the next entry at or following iMatch.
3537 */
fts5MultiIterNextFrom(Fts5Index * p,Fts5Iter * pIter,i64 iMatch)3538 static void fts5MultiIterNextFrom(
3539   Fts5Index *p,
3540   Fts5Iter *pIter,
3541   i64 iMatch
3542 ){
3543   while( 1 ){
3544     i64 iRowid;
3545     fts5MultiIterNext(p, pIter, 1, iMatch);
3546     if( fts5MultiIterEof(p, pIter) ) break;
3547     iRowid = fts5MultiIterRowid(pIter);
3548     if( pIter->bRev==0 && iRowid>=iMatch ) break;
3549     if( pIter->bRev!=0 && iRowid<=iMatch ) break;
3550   }
3551 }
3552 
3553 /*
3554 ** Return a pointer to a buffer containing the term associated with the
3555 ** entry that the iterator currently points to.
3556 */
fts5MultiIterTerm(Fts5Iter * pIter,int * pn)3557 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
3558   Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3559   *pn = p->term.n;
3560   return p->term.p;
3561 }
3562 
3563 /*
3564 ** Allocate a new segment-id for the structure pStruct. The new segment
3565 ** id must be between 1 and 65335 inclusive, and must not be used by
3566 ** any currently existing segment. If a free segment id cannot be found,
3567 ** SQLITE_FULL is returned.
3568 **
3569 ** If an error has already occurred, this function is a no-op. 0 is
3570 ** returned in this case.
3571 */
fts5AllocateSegid(Fts5Index * p,Fts5Structure * pStruct)3572 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
3573   int iSegid = 0;
3574 
3575   if( p->rc==SQLITE_OK ){
3576     if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
3577       p->rc = SQLITE_FULL;
3578     }else{
3579       /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
3580       ** array is 63 elements, or 252 bytes, in size.  */
3581       u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
3582       int iLvl, iSeg;
3583       int i;
3584       u32 mask;
3585       memset(aUsed, 0, sizeof(aUsed));
3586       for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3587         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3588           int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
3589           if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
3590             aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
3591           }
3592         }
3593       }
3594 
3595       for(i=0; aUsed[i]==0xFFFFFFFF; i++);
3596       mask = aUsed[i];
3597       for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
3598       iSegid += 1 + i*32;
3599 
3600 #ifdef SQLITE_DEBUG
3601       for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3602         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3603           assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
3604         }
3605       }
3606       assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
3607 
3608       {
3609         sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
3610         if( p->rc==SQLITE_OK ){
3611           u8 aBlob[2] = {0xff, 0xff};
3612           sqlite3_bind_int(pIdxSelect, 1, iSegid);
3613           sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
3614           assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
3615           p->rc = sqlite3_reset(pIdxSelect);
3616           sqlite3_bind_null(pIdxSelect, 2);
3617         }
3618       }
3619 #endif
3620     }
3621   }
3622 
3623   return iSegid;
3624 }
3625 
3626 /*
3627 ** Discard all data currently cached in the hash-tables.
3628 */
fts5IndexDiscardData(Fts5Index * p)3629 static void fts5IndexDiscardData(Fts5Index *p){
3630   assert( p->pHash || p->nPendingData==0 );
3631   if( p->pHash ){
3632     sqlite3Fts5HashClear(p->pHash);
3633     p->nPendingData = 0;
3634   }
3635 }
3636 
3637 /*
3638 ** Return the size of the prefix, in bytes, that buffer
3639 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
3640 **
3641 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
3642 ** than buffer (pOld/nOld).
3643 */
fts5PrefixCompress(int nOld,const u8 * pOld,const u8 * pNew)3644 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
3645   int i;
3646   for(i=0; i<nOld; i++){
3647     if( pOld[i]!=pNew[i] ) break;
3648   }
3649   return i;
3650 }
3651 
fts5WriteDlidxClear(Fts5Index * p,Fts5SegWriter * pWriter,int bFlush)3652 static void fts5WriteDlidxClear(
3653   Fts5Index *p,
3654   Fts5SegWriter *pWriter,
3655   int bFlush                      /* If true, write dlidx to disk */
3656 ){
3657   int i;
3658   assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
3659   for(i=0; i<pWriter->nDlidx; i++){
3660     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3661     if( pDlidx->buf.n==0 ) break;
3662     if( bFlush ){
3663       assert( pDlidx->pgno!=0 );
3664       fts5DataWrite(p,
3665           FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3666           pDlidx->buf.p, pDlidx->buf.n
3667       );
3668     }
3669     sqlite3Fts5BufferZero(&pDlidx->buf);
3670     pDlidx->bPrevValid = 0;
3671   }
3672 }
3673 
3674 /*
3675 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
3676 ** Any new array elements are zeroed before returning.
3677 */
fts5WriteDlidxGrow(Fts5Index * p,Fts5SegWriter * pWriter,int nLvl)3678 static int fts5WriteDlidxGrow(
3679   Fts5Index *p,
3680   Fts5SegWriter *pWriter,
3681   int nLvl
3682 ){
3683   if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
3684     Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
3685         pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
3686     );
3687     if( aDlidx==0 ){
3688       p->rc = SQLITE_NOMEM;
3689     }else{
3690       size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
3691       memset(&aDlidx[pWriter->nDlidx], 0, nByte);
3692       pWriter->aDlidx = aDlidx;
3693       pWriter->nDlidx = nLvl;
3694     }
3695   }
3696   return p->rc;
3697 }
3698 
3699 /*
3700 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
3701 ** enough, flush it to disk and return 1. Otherwise discard it and return
3702 ** zero.
3703 */
fts5WriteFlushDlidx(Fts5Index * p,Fts5SegWriter * pWriter)3704 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
3705   int bFlag = 0;
3706 
3707   /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
3708   ** to the database, also write the doclist-index to disk.  */
3709   if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
3710     bFlag = 1;
3711   }
3712   fts5WriteDlidxClear(p, pWriter, bFlag);
3713   pWriter->nEmpty = 0;
3714   return bFlag;
3715 }
3716 
3717 /*
3718 ** This function is called whenever processing of the doclist for the
3719 ** last term on leaf page (pWriter->iBtPage) is completed.
3720 **
3721 ** The doclist-index for that term is currently stored in-memory within the
3722 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
3723 ** writes it out to disk. Or, if it is too small to bother with, discards
3724 ** it.
3725 **
3726 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
3727 */
fts5WriteFlushBtree(Fts5Index * p,Fts5SegWriter * pWriter)3728 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
3729   int bFlag;
3730 
3731   assert( pWriter->iBtPage || pWriter->nEmpty==0 );
3732   if( pWriter->iBtPage==0 ) return;
3733   bFlag = fts5WriteFlushDlidx(p, pWriter);
3734 
3735   if( p->rc==SQLITE_OK ){
3736     const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
3737     /* The following was already done in fts5WriteInit(): */
3738     /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
3739     sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
3740     sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
3741     sqlite3_step(p->pIdxWriter);
3742     p->rc = sqlite3_reset(p->pIdxWriter);
3743     sqlite3_bind_null(p->pIdxWriter, 2);
3744   }
3745   pWriter->iBtPage = 0;
3746 }
3747 
3748 /*
3749 ** This is called once for each leaf page except the first that contains
3750 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
3751 ** is larger than all terms written to earlier leaves, and equal to or
3752 ** smaller than the first term on the new leaf.
3753 **
3754 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
3755 ** has already occurred when this function is called, it is a no-op.
3756 */
fts5WriteBtreeTerm(Fts5Index * p,Fts5SegWriter * pWriter,int nTerm,const u8 * pTerm)3757 static void fts5WriteBtreeTerm(
3758   Fts5Index *p,                   /* FTS5 backend object */
3759   Fts5SegWriter *pWriter,         /* Writer object */
3760   int nTerm, const u8 *pTerm      /* First term on new page */
3761 ){
3762   fts5WriteFlushBtree(p, pWriter);
3763   if( p->rc==SQLITE_OK ){
3764     fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
3765     pWriter->iBtPage = pWriter->writer.pgno;
3766   }
3767 }
3768 
3769 /*
3770 ** This function is called when flushing a leaf page that contains no
3771 ** terms at all to disk.
3772 */
fts5WriteBtreeNoTerm(Fts5Index * p,Fts5SegWriter * pWriter)3773 static void fts5WriteBtreeNoTerm(
3774   Fts5Index *p,                   /* FTS5 backend object */
3775   Fts5SegWriter *pWriter          /* Writer object */
3776 ){
3777   /* If there were no rowids on the leaf page either and the doclist-index
3778   ** has already been started, append an 0x00 byte to it.  */
3779   if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
3780     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
3781     assert( pDlidx->bPrevValid );
3782     sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
3783   }
3784 
3785   /* Increment the "number of sequential leaves without a term" counter. */
3786   pWriter->nEmpty++;
3787 }
3788 
fts5DlidxExtractFirstRowid(Fts5Buffer * pBuf)3789 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
3790   i64 iRowid;
3791   int iOff;
3792 
3793   iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
3794   fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
3795   return iRowid;
3796 }
3797 
3798 /*
3799 ** Rowid iRowid has just been appended to the current leaf page. It is the
3800 ** first on the page. This function appends an appropriate entry to the current
3801 ** doclist-index.
3802 */
fts5WriteDlidxAppend(Fts5Index * p,Fts5SegWriter * pWriter,i64 iRowid)3803 static void fts5WriteDlidxAppend(
3804   Fts5Index *p,
3805   Fts5SegWriter *pWriter,
3806   i64 iRowid
3807 ){
3808   int i;
3809   int bDone = 0;
3810 
3811   for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
3812     i64 iVal;
3813     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3814 
3815     if( pDlidx->buf.n>=p->pConfig->pgsz ){
3816       /* The current doclist-index page is full. Write it to disk and push
3817       ** a copy of iRowid (which will become the first rowid on the next
3818       ** doclist-index leaf page) up into the next level of the b-tree
3819       ** hierarchy. If the node being flushed is currently the root node,
3820       ** also push its first rowid upwards. */
3821       pDlidx->buf.p[0] = 0x01;    /* Not the root node */
3822       fts5DataWrite(p,
3823           FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3824           pDlidx->buf.p, pDlidx->buf.n
3825       );
3826       fts5WriteDlidxGrow(p, pWriter, i+2);
3827       pDlidx = &pWriter->aDlidx[i];
3828       if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
3829         i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
3830 
3831         /* This was the root node. Push its first rowid up to the new root. */
3832         pDlidx[1].pgno = pDlidx->pgno;
3833         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
3834         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
3835         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
3836         pDlidx[1].bPrevValid = 1;
3837         pDlidx[1].iPrev = iFirst;
3838       }
3839 
3840       sqlite3Fts5BufferZero(&pDlidx->buf);
3841       pDlidx->bPrevValid = 0;
3842       pDlidx->pgno++;
3843     }else{
3844       bDone = 1;
3845     }
3846 
3847     if( pDlidx->bPrevValid ){
3848       iVal = iRowid - pDlidx->iPrev;
3849     }else{
3850       i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
3851       assert( pDlidx->buf.n==0 );
3852       sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
3853       sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
3854       iVal = iRowid;
3855     }
3856 
3857     sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
3858     pDlidx->bPrevValid = 1;
3859     pDlidx->iPrev = iRowid;
3860   }
3861 }
3862 
fts5WriteFlushLeaf(Fts5Index * p,Fts5SegWriter * pWriter)3863 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
3864   static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
3865   Fts5PageWriter *pPage = &pWriter->writer;
3866   i64 iRowid;
3867 
3868   assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
3869 
3870   /* Set the szLeaf header field. */
3871   assert( 0==fts5GetU16(&pPage->buf.p[2]) );
3872   fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
3873 
3874   if( pWriter->bFirstTermInPage ){
3875     /* No term was written to this page. */
3876     assert( pPage->pgidx.n==0 );
3877     fts5WriteBtreeNoTerm(p, pWriter);
3878   }else{
3879     /* Append the pgidx to the page buffer. Set the szLeaf header field. */
3880     fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
3881   }
3882 
3883   /* Write the page out to disk */
3884   iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
3885   fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
3886 
3887   /* Initialize the next page. */
3888   fts5BufferZero(&pPage->buf);
3889   fts5BufferZero(&pPage->pgidx);
3890   fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
3891   pPage->iPrevPgidx = 0;
3892   pPage->pgno++;
3893 
3894   /* Increase the leaves written counter */
3895   pWriter->nLeafWritten++;
3896 
3897   /* The new leaf holds no terms or rowids */
3898   pWriter->bFirstTermInPage = 1;
3899   pWriter->bFirstRowidInPage = 1;
3900 }
3901 
3902 /*
3903 ** Append term pTerm/nTerm to the segment being written by the writer passed
3904 ** as the second argument.
3905 **
3906 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3907 ** already occurred, this function is a no-op.
3908 */
fts5WriteAppendTerm(Fts5Index * p,Fts5SegWriter * pWriter,int nTerm,const u8 * pTerm)3909 static void fts5WriteAppendTerm(
3910   Fts5Index *p,
3911   Fts5SegWriter *pWriter,
3912   int nTerm, const u8 *pTerm
3913 ){
3914   int nPrefix;                    /* Bytes of prefix compression for term */
3915   Fts5PageWriter *pPage = &pWriter->writer;
3916   Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
3917   int nMin = MIN(pPage->term.n, nTerm);
3918 
3919   assert( p->rc==SQLITE_OK );
3920   assert( pPage->buf.n>=4 );
3921   assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
3922 
3923   /* If the current leaf page is full, flush it to disk. */
3924   if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
3925     if( pPage->buf.n>4 ){
3926       fts5WriteFlushLeaf(p, pWriter);
3927       if( p->rc!=SQLITE_OK ) return;
3928     }
3929     fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
3930   }
3931 
3932   /* TODO1: Updating pgidx here. */
3933   pPgidx->n += sqlite3Fts5PutVarint(
3934       &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
3935   );
3936   pPage->iPrevPgidx = pPage->buf.n;
3937 #if 0
3938   fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
3939   pPgidx->n += 2;
3940 #endif
3941 
3942   if( pWriter->bFirstTermInPage ){
3943     nPrefix = 0;
3944     if( pPage->pgno!=1 ){
3945       /* This is the first term on a leaf that is not the leftmost leaf in
3946       ** the segment b-tree. In this case it is necessary to add a term to
3947       ** the b-tree hierarchy that is (a) larger than the largest term
3948       ** already written to the segment and (b) smaller than or equal to
3949       ** this term. In other words, a prefix of (pTerm/nTerm) that is one
3950       ** byte longer than the longest prefix (pTerm/nTerm) shares with the
3951       ** previous term.
3952       **
3953       ** Usually, the previous term is available in pPage->term. The exception
3954       ** is if this is the first term written in an incremental-merge step.
3955       ** In this case the previous term is not available, so just write a
3956       ** copy of (pTerm/nTerm) into the parent node. This is slightly
3957       ** inefficient, but still correct.  */
3958       int n = nTerm;
3959       if( pPage->term.n ){
3960         n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
3961       }
3962       fts5WriteBtreeTerm(p, pWriter, n, pTerm);
3963       if( p->rc!=SQLITE_OK ) return;
3964       pPage = &pWriter->writer;
3965     }
3966   }else{
3967     nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
3968     fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
3969   }
3970 
3971   /* Append the number of bytes of new data, then the term data itself
3972   ** to the page. */
3973   fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
3974   fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
3975 
3976   /* Update the Fts5PageWriter.term field. */
3977   fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
3978   pWriter->bFirstTermInPage = 0;
3979 
3980   pWriter->bFirstRowidInPage = 0;
3981   pWriter->bFirstRowidInDoclist = 1;
3982 
3983   assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
3984   pWriter->aDlidx[0].pgno = pPage->pgno;
3985 }
3986 
3987 /*
3988 ** Append a rowid and position-list size field to the writers output.
3989 */
fts5WriteAppendRowid(Fts5Index * p,Fts5SegWriter * pWriter,i64 iRowid)3990 static void fts5WriteAppendRowid(
3991   Fts5Index *p,
3992   Fts5SegWriter *pWriter,
3993   i64 iRowid
3994 ){
3995   if( p->rc==SQLITE_OK ){
3996     Fts5PageWriter *pPage = &pWriter->writer;
3997 
3998     if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
3999       fts5WriteFlushLeaf(p, pWriter);
4000     }
4001 
4002     /* If this is to be the first rowid written to the page, set the
4003     ** rowid-pointer in the page-header. Also append a value to the dlidx
4004     ** buffer, in case a doclist-index is required.  */
4005     if( pWriter->bFirstRowidInPage ){
4006       fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
4007       fts5WriteDlidxAppend(p, pWriter, iRowid);
4008     }
4009 
4010     /* Write the rowid. */
4011     if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
4012       fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
4013     }else{
4014       assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
4015       fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
4016     }
4017     pWriter->iPrevRowid = iRowid;
4018     pWriter->bFirstRowidInDoclist = 0;
4019     pWriter->bFirstRowidInPage = 0;
4020   }
4021 }
4022 
fts5WriteAppendPoslistData(Fts5Index * p,Fts5SegWriter * pWriter,const u8 * aData,int nData)4023 static void fts5WriteAppendPoslistData(
4024   Fts5Index *p,
4025   Fts5SegWriter *pWriter,
4026   const u8 *aData,
4027   int nData
4028 ){
4029   Fts5PageWriter *pPage = &pWriter->writer;
4030   const u8 *a = aData;
4031   int n = nData;
4032 
4033   assert( p->pConfig->pgsz>0 );
4034   while( p->rc==SQLITE_OK
4035      && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
4036   ){
4037     int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
4038     int nCopy = 0;
4039     while( nCopy<nReq ){
4040       i64 dummy;
4041       nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
4042     }
4043     fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
4044     a += nCopy;
4045     n -= nCopy;
4046     fts5WriteFlushLeaf(p, pWriter);
4047   }
4048   if( n>0 ){
4049     fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
4050   }
4051 }
4052 
4053 /*
4054 ** Flush any data cached by the writer object to the database. Free any
4055 ** allocations associated with the writer.
4056 */
fts5WriteFinish(Fts5Index * p,Fts5SegWriter * pWriter,int * pnLeaf)4057 static void fts5WriteFinish(
4058   Fts5Index *p,
4059   Fts5SegWriter *pWriter,         /* Writer object */
4060   int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
4061 ){
4062   int i;
4063   Fts5PageWriter *pLeaf = &pWriter->writer;
4064   if( p->rc==SQLITE_OK ){
4065     assert( pLeaf->pgno>=1 );
4066     if( pLeaf->buf.n>4 ){
4067       fts5WriteFlushLeaf(p, pWriter);
4068     }
4069     *pnLeaf = pLeaf->pgno-1;
4070     if( pLeaf->pgno>1 ){
4071       fts5WriteFlushBtree(p, pWriter);
4072     }
4073   }
4074   fts5BufferFree(&pLeaf->term);
4075   fts5BufferFree(&pLeaf->buf);
4076   fts5BufferFree(&pLeaf->pgidx);
4077   fts5BufferFree(&pWriter->btterm);
4078 
4079   for(i=0; i<pWriter->nDlidx; i++){
4080     sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
4081   }
4082   sqlite3_free(pWriter->aDlidx);
4083 }
4084 
fts5WriteInit(Fts5Index * p,Fts5SegWriter * pWriter,int iSegid)4085 static void fts5WriteInit(
4086   Fts5Index *p,
4087   Fts5SegWriter *pWriter,
4088   int iSegid
4089 ){
4090   const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
4091 
4092   memset(pWriter, 0, sizeof(Fts5SegWriter));
4093   pWriter->iSegid = iSegid;
4094 
4095   fts5WriteDlidxGrow(p, pWriter, 1);
4096   pWriter->writer.pgno = 1;
4097   pWriter->bFirstTermInPage = 1;
4098   pWriter->iBtPage = 1;
4099 
4100   assert( pWriter->writer.buf.n==0 );
4101   assert( pWriter->writer.pgidx.n==0 );
4102 
4103   /* Grow the two buffers to pgsz + padding bytes in size. */
4104   sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
4105   sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
4106 
4107   if( p->pIdxWriter==0 ){
4108     Fts5Config *pConfig = p->pConfig;
4109     fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
4110           "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4111           pConfig->zDb, pConfig->zName
4112     ));
4113   }
4114 
4115   if( p->rc==SQLITE_OK ){
4116     /* Initialize the 4-byte leaf-page header to 0x00. */
4117     memset(pWriter->writer.buf.p, 0, 4);
4118     pWriter->writer.buf.n = 4;
4119 
4120     /* Bind the current output segment id to the index-writer. This is an
4121     ** optimization over binding the same value over and over as rows are
4122     ** inserted into %_idx by the current writer.  */
4123     sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
4124   }
4125 }
4126 
4127 /*
4128 ** Iterator pIter was used to iterate through the input segments of on an
4129 ** incremental merge operation. This function is called if the incremental
4130 ** merge step has finished but the input has not been completely exhausted.
4131 */
fts5TrimSegments(Fts5Index * p,Fts5Iter * pIter)4132 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
4133   int i;
4134   Fts5Buffer buf;
4135   memset(&buf, 0, sizeof(Fts5Buffer));
4136   for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
4137     Fts5SegIter *pSeg = &pIter->aSeg[i];
4138     if( pSeg->pSeg==0 ){
4139       /* no-op */
4140     }else if( pSeg->pLeaf==0 ){
4141       /* All keys from this input segment have been transfered to the output.
4142       ** Set both the first and last page-numbers to 0 to indicate that the
4143       ** segment is now empty. */
4144       pSeg->pSeg->pgnoLast = 0;
4145       pSeg->pSeg->pgnoFirst = 0;
4146     }else{
4147       int iOff = pSeg->iTermLeafOffset;     /* Offset on new first leaf page */
4148       i64 iLeafRowid;
4149       Fts5Data *pData;
4150       int iId = pSeg->pSeg->iSegid;
4151       u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
4152 
4153       iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
4154       pData = fts5LeafRead(p, iLeafRowid);
4155       if( pData ){
4156         if( iOff>pData->szLeaf ){
4157           /* This can occur if the pages that the segments occupy overlap - if
4158           ** a single page has been assigned to more than one segment. In
4159           ** this case a prior iteration of this loop may have corrupted the
4160           ** segment currently being trimmed.  */
4161           p->rc = FTS5_CORRUPT;
4162         }else{
4163           fts5BufferZero(&buf);
4164           fts5BufferGrow(&p->rc, &buf, pData->nn);
4165           fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
4166           fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
4167           fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
4168           fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff,&pData->p[iOff]);
4169           if( p->rc==SQLITE_OK ){
4170             /* Set the szLeaf field */
4171             fts5PutU16(&buf.p[2], (u16)buf.n);
4172           }
4173 
4174           /* Set up the new page-index array */
4175           fts5BufferAppendVarint(&p->rc, &buf, 4);
4176           if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
4177            && pSeg->iEndofDoclist<pData->szLeaf
4178            && pSeg->iPgidxOff<=pData->nn
4179           ){
4180             int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
4181             fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
4182             fts5BufferAppendBlob(&p->rc, &buf,
4183                 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
4184             );
4185           }
4186 
4187           pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
4188           fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
4189           fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
4190         }
4191         fts5DataRelease(pData);
4192       }
4193     }
4194   }
4195   fts5BufferFree(&buf);
4196 }
4197 
fts5MergeChunkCallback(Fts5Index * p,void * pCtx,const u8 * pChunk,int nChunk)4198 static void fts5MergeChunkCallback(
4199   Fts5Index *p,
4200   void *pCtx,
4201   const u8 *pChunk, int nChunk
4202 ){
4203   Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
4204   fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
4205 }
4206 
4207 /*
4208 **
4209 */
fts5IndexMergeLevel(Fts5Index * p,Fts5Structure ** ppStruct,int iLvl,int * pnRem)4210 static void fts5IndexMergeLevel(
4211   Fts5Index *p,                   /* FTS5 backend object */
4212   Fts5Structure **ppStruct,       /* IN/OUT: Stucture of index */
4213   int iLvl,                       /* Level to read input from */
4214   int *pnRem                      /* Write up to this many output leaves */
4215 ){
4216   Fts5Structure *pStruct = *ppStruct;
4217   Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4218   Fts5StructureLevel *pLvlOut;
4219   Fts5Iter *pIter = 0;       /* Iterator to read input data */
4220   int nRem = pnRem ? *pnRem : 0;  /* Output leaf pages left to write */
4221   int nInput;                     /* Number of input segments */
4222   Fts5SegWriter writer;           /* Writer object */
4223   Fts5StructureSegment *pSeg;     /* Output segment */
4224   Fts5Buffer term;
4225   int bOldest;                    /* True if the output segment is the oldest */
4226   int eDetail = p->pConfig->eDetail;
4227   const int flags = FTS5INDEX_QUERY_NOOUTPUT;
4228   int bTermWritten = 0;           /* True if current term already output */
4229 
4230   assert( iLvl<pStruct->nLevel );
4231   assert( pLvl->nMerge<=pLvl->nSeg );
4232 
4233   memset(&writer, 0, sizeof(Fts5SegWriter));
4234   memset(&term, 0, sizeof(Fts5Buffer));
4235   if( pLvl->nMerge ){
4236     pLvlOut = &pStruct->aLevel[iLvl+1];
4237     assert( pLvlOut->nSeg>0 );
4238     nInput = pLvl->nMerge;
4239     pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
4240 
4241     fts5WriteInit(p, &writer, pSeg->iSegid);
4242     writer.writer.pgno = pSeg->pgnoLast+1;
4243     writer.iBtPage = 0;
4244   }else{
4245     int iSegid = fts5AllocateSegid(p, pStruct);
4246 
4247     /* Extend the Fts5Structure object as required to ensure the output
4248     ** segment exists. */
4249     if( iLvl==pStruct->nLevel-1 ){
4250       fts5StructureAddLevel(&p->rc, ppStruct);
4251       pStruct = *ppStruct;
4252     }
4253     fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
4254     if( p->rc ) return;
4255     pLvl = &pStruct->aLevel[iLvl];
4256     pLvlOut = &pStruct->aLevel[iLvl+1];
4257 
4258     fts5WriteInit(p, &writer, iSegid);
4259 
4260     /* Add the new segment to the output level */
4261     pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
4262     pLvlOut->nSeg++;
4263     pSeg->pgnoFirst = 1;
4264     pSeg->iSegid = iSegid;
4265     pStruct->nSegment++;
4266 
4267     /* Read input from all segments in the input level */
4268     nInput = pLvl->nSeg;
4269   }
4270   bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
4271 
4272   assert( iLvl>=0 );
4273   for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
4274       fts5MultiIterEof(p, pIter)==0;
4275       fts5MultiIterNext(p, pIter, 0, 0)
4276   ){
4277     Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4278     int nPos;                     /* position-list size field value */
4279     int nTerm;
4280     const u8 *pTerm;
4281 
4282     pTerm = fts5MultiIterTerm(pIter, &nTerm);
4283     if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
4284       if( pnRem && writer.nLeafWritten>nRem ){
4285         break;
4286       }
4287       fts5BufferSet(&p->rc, &term, nTerm, pTerm);
4288       bTermWritten =0;
4289     }
4290 
4291     /* Check for key annihilation. */
4292     if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
4293 
4294     if( p->rc==SQLITE_OK && bTermWritten==0 ){
4295       /* This is a new term. Append a term to the output segment. */
4296       fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
4297       bTermWritten = 1;
4298     }
4299 
4300     /* Append the rowid to the output */
4301     /* WRITEPOSLISTSIZE */
4302     fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
4303 
4304     if( eDetail==FTS5_DETAIL_NONE ){
4305       if( pSegIter->bDel ){
4306         fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4307         if( pSegIter->nPos>0 ){
4308           fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4309         }
4310       }
4311     }else{
4312       /* Append the position-list data to the output */
4313       nPos = pSegIter->nPos*2 + pSegIter->bDel;
4314       fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
4315       fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
4316     }
4317   }
4318 
4319   /* Flush the last leaf page to disk. Set the output segment b-tree height
4320   ** and last leaf page number at the same time.  */
4321   fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
4322 
4323   if( fts5MultiIterEof(p, pIter) ){
4324     int i;
4325 
4326     /* Remove the redundant segments from the %_data table */
4327     for(i=0; i<nInput; i++){
4328       fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
4329     }
4330 
4331     /* Remove the redundant segments from the input level */
4332     if( pLvl->nSeg!=nInput ){
4333       int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
4334       memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
4335     }
4336     pStruct->nSegment -= nInput;
4337     pLvl->nSeg -= nInput;
4338     pLvl->nMerge = 0;
4339     if( pSeg->pgnoLast==0 ){
4340       pLvlOut->nSeg--;
4341       pStruct->nSegment--;
4342     }
4343   }else{
4344     assert( pSeg->pgnoLast>0 );
4345     fts5TrimSegments(p, pIter);
4346     pLvl->nMerge = nInput;
4347   }
4348 
4349   fts5MultiIterFree(pIter);
4350   fts5BufferFree(&term);
4351   if( pnRem ) *pnRem -= writer.nLeafWritten;
4352 }
4353 
4354 /*
4355 ** Do up to nPg pages of automerge work on the index.
4356 **
4357 ** Return true if any changes were actually made, or false otherwise.
4358 */
fts5IndexMerge(Fts5Index * p,Fts5Structure ** ppStruct,int nPg,int nMin)4359 static int fts5IndexMerge(
4360   Fts5Index *p,                   /* FTS5 backend object */
4361   Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
4362   int nPg,                        /* Pages of work to do */
4363   int nMin                        /* Minimum number of segments to merge */
4364 ){
4365   int nRem = nPg;
4366   int bRet = 0;
4367   Fts5Structure *pStruct = *ppStruct;
4368   while( nRem>0 && p->rc==SQLITE_OK ){
4369     int iLvl;                   /* To iterate through levels */
4370     int iBestLvl = 0;           /* Level offering the most input segments */
4371     int nBest = 0;              /* Number of input segments on best level */
4372 
4373     /* Set iBestLvl to the level to read input segments from. */
4374     assert( pStruct->nLevel>0 );
4375     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4376       Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4377       if( pLvl->nMerge ){
4378         if( pLvl->nMerge>nBest ){
4379           iBestLvl = iLvl;
4380           nBest = pLvl->nMerge;
4381         }
4382         break;
4383       }
4384       if( pLvl->nSeg>nBest ){
4385         nBest = pLvl->nSeg;
4386         iBestLvl = iLvl;
4387       }
4388     }
4389 
4390     /* If nBest is still 0, then the index must be empty. */
4391 #ifdef SQLITE_DEBUG
4392     for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
4393       assert( pStruct->aLevel[iLvl].nSeg==0 );
4394     }
4395 #endif
4396 
4397     if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
4398       break;
4399     }
4400     bRet = 1;
4401     fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
4402     if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
4403       fts5StructurePromote(p, iBestLvl+1, pStruct);
4404     }
4405   }
4406   *ppStruct = pStruct;
4407   return bRet;
4408 }
4409 
4410 /*
4411 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4412 ** segment. This function updates the write-counter accordingly and, if
4413 ** necessary, performs incremental merge work.
4414 **
4415 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4416 ** already occurred, this function is a no-op.
4417 */
fts5IndexAutomerge(Fts5Index * p,Fts5Structure ** ppStruct,int nLeaf)4418 static void fts5IndexAutomerge(
4419   Fts5Index *p,                   /* FTS5 backend object */
4420   Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
4421   int nLeaf                       /* Number of output leaves just written */
4422 ){
4423   if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){
4424     Fts5Structure *pStruct = *ppStruct;
4425     u64 nWrite;                   /* Initial value of write-counter */
4426     int nWork;                    /* Number of work-quanta to perform */
4427     int nRem;                     /* Number of leaf pages left to write */
4428 
4429     /* Update the write-counter. While doing so, set nWork. */
4430     nWrite = pStruct->nWriteCounter;
4431     nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
4432     pStruct->nWriteCounter += nLeaf;
4433     nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
4434 
4435     fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
4436   }
4437 }
4438 
fts5IndexCrisismerge(Fts5Index * p,Fts5Structure ** ppStruct)4439 static void fts5IndexCrisismerge(
4440   Fts5Index *p,                   /* FTS5 backend object */
4441   Fts5Structure **ppStruct        /* IN/OUT: Current structure of index */
4442 ){
4443   const int nCrisis = p->pConfig->nCrisisMerge;
4444   Fts5Structure *pStruct = *ppStruct;
4445   int iLvl = 0;
4446 
4447   assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
4448   while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
4449     fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
4450     assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
4451     fts5StructurePromote(p, iLvl+1, pStruct);
4452     iLvl++;
4453   }
4454   *ppStruct = pStruct;
4455 }
4456 
fts5IndexReturn(Fts5Index * p)4457 static int fts5IndexReturn(Fts5Index *p){
4458   int rc = p->rc;
4459   p->rc = SQLITE_OK;
4460   return rc;
4461 }
4462 
4463 typedef struct Fts5FlushCtx Fts5FlushCtx;
4464 struct Fts5FlushCtx {
4465   Fts5Index *pIdx;
4466   Fts5SegWriter writer;
4467 };
4468 
4469 /*
4470 ** Buffer aBuf[] contains a list of varints, all small enough to fit
4471 ** in a 32-bit integer. Return the size of the largest prefix of this
4472 ** list nMax bytes or less in size.
4473 */
fts5PoslistPrefix(const u8 * aBuf,int nMax)4474 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
4475   int ret;
4476   u32 dummy;
4477   ret = fts5GetVarint32(aBuf, dummy);
4478   if( ret<nMax ){
4479     while( 1 ){
4480       int i = fts5GetVarint32(&aBuf[ret], dummy);
4481       if( (ret + i) > nMax ) break;
4482       ret += i;
4483     }
4484   }
4485   return ret;
4486 }
4487 
4488 /*
4489 ** Flush the contents of in-memory hash table iHash to a new level-0
4490 ** segment on disk. Also update the corresponding structure record.
4491 **
4492 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4493 ** already occurred, this function is a no-op.
4494 */
fts5FlushOneHash(Fts5Index * p)4495 static void fts5FlushOneHash(Fts5Index *p){
4496   Fts5Hash *pHash = p->pHash;
4497   Fts5Structure *pStruct;
4498   int iSegid;
4499   int pgnoLast = 0;                 /* Last leaf page number in segment */
4500 
4501   /* Obtain a reference to the index structure and allocate a new segment-id
4502   ** for the new level-0 segment.  */
4503   pStruct = fts5StructureRead(p);
4504   iSegid = fts5AllocateSegid(p, pStruct);
4505   fts5StructureInvalidate(p);
4506 
4507   if( iSegid ){
4508     const int pgsz = p->pConfig->pgsz;
4509     int eDetail = p->pConfig->eDetail;
4510     Fts5StructureSegment *pSeg;   /* New segment within pStruct */
4511     Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
4512     Fts5Buffer *pPgidx;           /* Buffer in which to assemble pgidx */
4513 
4514     Fts5SegWriter writer;
4515     fts5WriteInit(p, &writer, iSegid);
4516 
4517     pBuf = &writer.writer.buf;
4518     pPgidx = &writer.writer.pgidx;
4519 
4520     /* fts5WriteInit() should have initialized the buffers to (most likely)
4521     ** the maximum space required. */
4522     assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4523     assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4524 
4525     /* Begin scanning through hash table entries. This loop runs once for each
4526     ** term/doclist currently stored within the hash table. */
4527     if( p->rc==SQLITE_OK ){
4528       p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
4529     }
4530     while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
4531       const char *zTerm;          /* Buffer containing term */
4532       const u8 *pDoclist;         /* Pointer to doclist for this term */
4533       int nDoclist;               /* Size of doclist in bytes */
4534 
4535       /* Write the term for this entry to disk. */
4536       sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
4537       fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
4538       if( p->rc!=SQLITE_OK ) break;
4539 
4540       assert( writer.bFirstRowidInPage==0 );
4541       if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
4542         /* The entire doclist will fit on the current leaf. */
4543         fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
4544       }else{
4545         i64 iRowid = 0;
4546         i64 iDelta = 0;
4547         int iOff = 0;
4548 
4549         /* The entire doclist will not fit on this leaf. The following
4550         ** loop iterates through the poslists that make up the current
4551         ** doclist.  */
4552         while( p->rc==SQLITE_OK && iOff<nDoclist ){
4553           iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
4554           iRowid += iDelta;
4555 
4556           if( writer.bFirstRowidInPage ){
4557             fts5PutU16(&pBuf->p[0], (u16)pBuf->n);   /* first rowid on page */
4558             pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
4559             writer.bFirstRowidInPage = 0;
4560             fts5WriteDlidxAppend(p, &writer, iRowid);
4561             if( p->rc!=SQLITE_OK ) break;
4562           }else{
4563             pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
4564           }
4565           assert( pBuf->n<=pBuf->nSpace );
4566 
4567           if( eDetail==FTS5_DETAIL_NONE ){
4568             if( iOff<nDoclist && pDoclist[iOff]==0 ){
4569               pBuf->p[pBuf->n++] = 0;
4570               iOff++;
4571               if( iOff<nDoclist && pDoclist[iOff]==0 ){
4572                 pBuf->p[pBuf->n++] = 0;
4573                 iOff++;
4574               }
4575             }
4576             if( (pBuf->n + pPgidx->n)>=pgsz ){
4577               fts5WriteFlushLeaf(p, &writer);
4578             }
4579           }else{
4580             int bDummy;
4581             int nPos;
4582             int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
4583             nCopy += nPos;
4584             if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
4585               /* The entire poslist will fit on the current leaf. So copy
4586               ** it in one go. */
4587               fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
4588             }else{
4589               /* The entire poslist will not fit on this leaf. So it needs
4590               ** to be broken into sections. The only qualification being
4591               ** that each varint must be stored contiguously.  */
4592               const u8 *pPoslist = &pDoclist[iOff];
4593               int iPos = 0;
4594               while( p->rc==SQLITE_OK ){
4595                 int nSpace = pgsz - pBuf->n - pPgidx->n;
4596                 int n = 0;
4597                 if( (nCopy - iPos)<=nSpace ){
4598                   n = nCopy - iPos;
4599                 }else{
4600                   n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
4601                 }
4602                 assert( n>0 );
4603                 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
4604                 iPos += n;
4605                 if( (pBuf->n + pPgidx->n)>=pgsz ){
4606                   fts5WriteFlushLeaf(p, &writer);
4607                 }
4608                 if( iPos>=nCopy ) break;
4609               }
4610             }
4611             iOff += nCopy;
4612           }
4613         }
4614       }
4615 
4616       /* TODO2: Doclist terminator written here. */
4617       /* pBuf->p[pBuf->n++] = '\0'; */
4618       assert( pBuf->n<=pBuf->nSpace );
4619       if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
4620     }
4621     sqlite3Fts5HashClear(pHash);
4622     fts5WriteFinish(p, &writer, &pgnoLast);
4623 
4624     /* Update the Fts5Structure. It is written back to the database by the
4625     ** fts5StructureRelease() call below.  */
4626     if( pStruct->nLevel==0 ){
4627       fts5StructureAddLevel(&p->rc, &pStruct);
4628     }
4629     fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
4630     if( p->rc==SQLITE_OK ){
4631       pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
4632       pSeg->iSegid = iSegid;
4633       pSeg->pgnoFirst = 1;
4634       pSeg->pgnoLast = pgnoLast;
4635       pStruct->nSegment++;
4636     }
4637     fts5StructurePromote(p, 0, pStruct);
4638   }
4639 
4640   fts5IndexAutomerge(p, &pStruct, pgnoLast);
4641   fts5IndexCrisismerge(p, &pStruct);
4642   fts5StructureWrite(p, pStruct);
4643   fts5StructureRelease(pStruct);
4644 }
4645 
4646 /*
4647 ** Flush any data stored in the in-memory hash tables to the database.
4648 */
fts5IndexFlush(Fts5Index * p)4649 static void fts5IndexFlush(Fts5Index *p){
4650   /* Unless it is empty, flush the hash table to disk */
4651   if( p->nPendingData ){
4652     assert( p->pHash );
4653     p->nPendingData = 0;
4654     fts5FlushOneHash(p);
4655   }
4656 }
4657 
fts5IndexOptimizeStruct(Fts5Index * p,Fts5Structure * pStruct)4658 static Fts5Structure *fts5IndexOptimizeStruct(
4659   Fts5Index *p,
4660   Fts5Structure *pStruct
4661 ){
4662   Fts5Structure *pNew = 0;
4663   sqlite3_int64 nByte = sizeof(Fts5Structure);
4664   int nSeg = pStruct->nSegment;
4665   int i;
4666 
4667   /* Figure out if this structure requires optimization. A structure does
4668   ** not require optimization if either:
4669   **
4670   **  + it consists of fewer than two segments, or
4671   **  + all segments are on the same level, or
4672   **  + all segments except one are currently inputs to a merge operation.
4673   **
4674   ** In the first case, return NULL. In the second, increment the ref-count
4675   ** on *pStruct and return a copy of the pointer to it.
4676   */
4677   if( nSeg<2 ) return 0;
4678   for(i=0; i<pStruct->nLevel; i++){
4679     int nThis = pStruct->aLevel[i].nSeg;
4680     if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
4681       fts5StructureRef(pStruct);
4682       return pStruct;
4683     }
4684     assert( pStruct->aLevel[i].nMerge<=nThis );
4685   }
4686 
4687   nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
4688   pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
4689 
4690   if( pNew ){
4691     Fts5StructureLevel *pLvl;
4692     nByte = nSeg * sizeof(Fts5StructureSegment);
4693     pNew->nLevel = pStruct->nLevel+1;
4694     pNew->nRef = 1;
4695     pNew->nWriteCounter = pStruct->nWriteCounter;
4696     pLvl = &pNew->aLevel[pStruct->nLevel];
4697     pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
4698     if( pLvl->aSeg ){
4699       int iLvl, iSeg;
4700       int iSegOut = 0;
4701       /* Iterate through all segments, from oldest to newest. Add them to
4702       ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
4703       ** segment in the data structure.  */
4704       for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
4705         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
4706           pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
4707           iSegOut++;
4708         }
4709       }
4710       pNew->nSegment = pLvl->nSeg = nSeg;
4711     }else{
4712       sqlite3_free(pNew);
4713       pNew = 0;
4714     }
4715   }
4716 
4717   return pNew;
4718 }
4719 
sqlite3Fts5IndexOptimize(Fts5Index * p)4720 int sqlite3Fts5IndexOptimize(Fts5Index *p){
4721   Fts5Structure *pStruct;
4722   Fts5Structure *pNew = 0;
4723 
4724   assert( p->rc==SQLITE_OK );
4725   fts5IndexFlush(p);
4726   pStruct = fts5StructureRead(p);
4727   fts5StructureInvalidate(p);
4728 
4729   if( pStruct ){
4730     pNew = fts5IndexOptimizeStruct(p, pStruct);
4731   }
4732   fts5StructureRelease(pStruct);
4733 
4734   assert( pNew==0 || pNew->nSegment>0 );
4735   if( pNew ){
4736     int iLvl;
4737     for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
4738     while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
4739       int nRem = FTS5_OPT_WORK_UNIT;
4740       fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
4741     }
4742 
4743     fts5StructureWrite(p, pNew);
4744     fts5StructureRelease(pNew);
4745   }
4746 
4747   return fts5IndexReturn(p);
4748 }
4749 
4750 /*
4751 ** This is called to implement the special "VALUES('merge', $nMerge)"
4752 ** INSERT command.
4753 */
sqlite3Fts5IndexMerge(Fts5Index * p,int nMerge)4754 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
4755   Fts5Structure *pStruct = fts5StructureRead(p);
4756   if( pStruct ){
4757     int nMin = p->pConfig->nUsermerge;
4758     fts5StructureInvalidate(p);
4759     if( nMerge<0 ){
4760       Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
4761       fts5StructureRelease(pStruct);
4762       pStruct = pNew;
4763       nMin = 2;
4764       nMerge = nMerge*-1;
4765     }
4766     if( pStruct && pStruct->nLevel ){
4767       if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
4768         fts5StructureWrite(p, pStruct);
4769       }
4770     }
4771     fts5StructureRelease(pStruct);
4772   }
4773   return fts5IndexReturn(p);
4774 }
4775 
fts5AppendRowid(Fts5Index * p,i64 iDelta,Fts5Iter * pUnused,Fts5Buffer * pBuf)4776 static void fts5AppendRowid(
4777   Fts5Index *p,
4778   i64 iDelta,
4779   Fts5Iter *pUnused,
4780   Fts5Buffer *pBuf
4781 ){
4782   UNUSED_PARAM(pUnused);
4783   fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
4784 }
4785 
fts5AppendPoslist(Fts5Index * p,i64 iDelta,Fts5Iter * pMulti,Fts5Buffer * pBuf)4786 static void fts5AppendPoslist(
4787   Fts5Index *p,
4788   i64 iDelta,
4789   Fts5Iter *pMulti,
4790   Fts5Buffer *pBuf
4791 ){
4792   int nData = pMulti->base.nData;
4793   int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
4794   assert( nData>0 );
4795   if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
4796     fts5BufferSafeAppendVarint(pBuf, iDelta);
4797     fts5BufferSafeAppendVarint(pBuf, nData*2);
4798     fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
4799     memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
4800   }
4801 }
4802 
4803 
fts5DoclistIterNext(Fts5DoclistIter * pIter)4804 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
4805   u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
4806 
4807   assert( pIter->aPoslist );
4808   if( p>=pIter->aEof ){
4809     pIter->aPoslist = 0;
4810   }else{
4811     i64 iDelta;
4812 
4813     p += fts5GetVarint(p, (u64*)&iDelta);
4814     pIter->iRowid += iDelta;
4815 
4816     /* Read position list size */
4817     if( p[0] & 0x80 ){
4818       int nPos;
4819       pIter->nSize = fts5GetVarint32(p, nPos);
4820       pIter->nPoslist = (nPos>>1);
4821     }else{
4822       pIter->nPoslist = ((int)(p[0])) >> 1;
4823       pIter->nSize = 1;
4824     }
4825 
4826     pIter->aPoslist = p;
4827   }
4828 }
4829 
fts5DoclistIterInit(Fts5Buffer * pBuf,Fts5DoclistIter * pIter)4830 static void fts5DoclistIterInit(
4831   Fts5Buffer *pBuf,
4832   Fts5DoclistIter *pIter
4833 ){
4834   memset(pIter, 0, sizeof(*pIter));
4835   pIter->aPoslist = pBuf->p;
4836   pIter->aEof = &pBuf->p[pBuf->n];
4837   fts5DoclistIterNext(pIter);
4838 }
4839 
4840 #if 0
4841 /*
4842 ** Append a doclist to buffer pBuf.
4843 **
4844 ** This function assumes that space within the buffer has already been
4845 ** allocated.
4846 */
4847 static void fts5MergeAppendDocid(
4848   Fts5Buffer *pBuf,               /* Buffer to write to */
4849   i64 *piLastRowid,               /* IN/OUT: Previous rowid written (if any) */
4850   i64 iRowid                      /* Rowid to append */
4851 ){
4852   assert( pBuf->n!=0 || (*piLastRowid)==0 );
4853   fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
4854   *piLastRowid = iRowid;
4855 }
4856 #endif
4857 
4858 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) {       \
4859   assert( (pBuf)->n!=0 || (iLastRowid)==0 );                   \
4860   fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
4861   (iLastRowid) = (iRowid);                                     \
4862 }
4863 
4864 /*
4865 ** Swap the contents of buffer *p1 with that of *p2.
4866 */
fts5BufferSwap(Fts5Buffer * p1,Fts5Buffer * p2)4867 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
4868   Fts5Buffer tmp = *p1;
4869   *p1 = *p2;
4870   *p2 = tmp;
4871 }
4872 
fts5NextRowid(Fts5Buffer * pBuf,int * piOff,i64 * piRowid)4873 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
4874   int i = *piOff;
4875   if( i>=pBuf->n ){
4876     *piOff = -1;
4877   }else{
4878     u64 iVal;
4879     *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
4880     *piRowid += iVal;
4881   }
4882 }
4883 
4884 /*
4885 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
4886 ** In this case the buffers consist of a delta-encoded list of rowids only.
4887 */
fts5MergeRowidLists(Fts5Index * p,Fts5Buffer * p1,Fts5Buffer * p2)4888 static void fts5MergeRowidLists(
4889   Fts5Index *p,                   /* FTS5 backend object */
4890   Fts5Buffer *p1,                 /* First list to merge */
4891   Fts5Buffer *p2                  /* Second list to merge */
4892 ){
4893   int i1 = 0;
4894   int i2 = 0;
4895   i64 iRowid1 = 0;
4896   i64 iRowid2 = 0;
4897   i64 iOut = 0;
4898 
4899   Fts5Buffer out;
4900   memset(&out, 0, sizeof(out));
4901   sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
4902   if( p->rc ) return;
4903 
4904   fts5NextRowid(p1, &i1, &iRowid1);
4905   fts5NextRowid(p2, &i2, &iRowid2);
4906   while( i1>=0 || i2>=0 ){
4907     if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
4908       assert( iOut==0 || iRowid1>iOut );
4909       fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
4910       iOut = iRowid1;
4911       fts5NextRowid(p1, &i1, &iRowid1);
4912     }else{
4913       assert( iOut==0 || iRowid2>iOut );
4914       fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
4915       iOut = iRowid2;
4916       if( i1>=0 && iRowid1==iRowid2 ){
4917         fts5NextRowid(p1, &i1, &iRowid1);
4918       }
4919       fts5NextRowid(p2, &i2, &iRowid2);
4920     }
4921   }
4922 
4923   fts5BufferSwap(&out, p1);
4924   fts5BufferFree(&out);
4925 }
4926 
4927 /*
4928 ** Buffers p1 and p2 contain doclists. This function merges the content
4929 ** of the two doclists together and sets buffer p1 to the result before
4930 ** returning.
4931 **
4932 ** If an error occurs, an error code is left in p->rc. If an error has
4933 ** already occurred, this function is a no-op.
4934 */
fts5MergePrefixLists(Fts5Index * p,Fts5Buffer * p1,Fts5Buffer * p2)4935 static void fts5MergePrefixLists(
4936   Fts5Index *p,                   /* FTS5 backend object */
4937   Fts5Buffer *p1,                 /* First list to merge */
4938   Fts5Buffer *p2                  /* Second list to merge */
4939 ){
4940   if( p2->n ){
4941     i64 iLastRowid = 0;
4942     Fts5DoclistIter i1;
4943     Fts5DoclistIter i2;
4944     Fts5Buffer out = {0, 0, 0};
4945     Fts5Buffer tmp = {0, 0, 0};
4946 
4947     /* The maximum size of the output is equal to the sum of the two
4948     ** input sizes + 1 varint (9 bytes). The extra varint is because if the
4949     ** first rowid in one input is a large negative number, and the first in
4950     ** the other a non-negative number, the delta for the non-negative
4951     ** number will be larger on disk than the literal integer value
4952     ** was.
4953     **
4954     ** Or, if the input position-lists are corrupt, then the output might
4955     ** include up to 2 extra 10-byte positions created by interpreting -1
4956     ** (the value PoslistNext64() uses for EOF) as a position and appending
4957     ** it to the output. This can happen at most once for each input
4958     ** position-list, hence two 10 byte paddings.  */
4959     if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n + 9+10+10) ) return;
4960     fts5DoclistIterInit(p1, &i1);
4961     fts5DoclistIterInit(p2, &i2);
4962 
4963     while( 1 ){
4964       if( i1.iRowid<i2.iRowid ){
4965         /* Copy entry from i1 */
4966         fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
4967         fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize);
4968         fts5DoclistIterNext(&i1);
4969         if( i1.aPoslist==0 ) break;
4970         assert( out.n<=((i1.aPoslist-p1->p) + (i2.aPoslist-p2->p)+9+10+10) );
4971       }
4972       else if( i2.iRowid!=i1.iRowid ){
4973         /* Copy entry from i2 */
4974         fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4975         fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize);
4976         fts5DoclistIterNext(&i2);
4977         if( i2.aPoslist==0 ) break;
4978         assert( out.n<=((i1.aPoslist-p1->p) + (i2.aPoslist-p2->p)+9+10+10) );
4979       }
4980       else{
4981         /* Merge the two position lists. */
4982         i64 iPos1 = 0;
4983         i64 iPos2 = 0;
4984         int iOff1 = 0;
4985         int iOff2 = 0;
4986         u8 *a1 = &i1.aPoslist[i1.nSize];
4987         u8 *a2 = &i2.aPoslist[i2.nSize];
4988         int nCopy;
4989         u8 *aCopy;
4990 
4991         i64 iPrev = 0;
4992         Fts5PoslistWriter writer;
4993         memset(&writer, 0, sizeof(writer));
4994 
4995         /* See the earlier comment in this function for an explanation of why
4996         ** corrupt input position lists might cause the output to consume
4997         ** at most 20 bytes of unexpected space. */
4998         fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4999         fts5BufferZero(&tmp);
5000         sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist + 10 + 10);
5001         if( p->rc ) break;
5002 
5003         sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
5004         sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
5005         assert_nc( iPos1>=0 && iPos2>=0 );
5006 
5007         if( iPos1<iPos2 ){
5008           sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
5009           sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
5010         }else{
5011           sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
5012           sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
5013         }
5014         if( iPos1>=0 && iPos2>=0 ){
5015           while( 1 ){
5016             if( iPos1<iPos2 ){
5017               if( iPos1!=iPrev ){
5018                 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
5019               }
5020               sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
5021               if( iPos1<0 ) break;
5022             }else{
5023               assert_nc( iPos2!=iPrev );
5024               sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
5025               sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
5026               if( iPos2<0 ) break;
5027             }
5028           }
5029         }
5030 
5031         if( iPos1>=0 ){
5032           if( iPos1!=iPrev ){
5033             sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
5034           }
5035           aCopy = &a1[iOff1];
5036           nCopy = i1.nPoslist - iOff1;
5037         }else{
5038           assert_nc( iPos2>=0 && iPos2!=iPrev );
5039           sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
5040           aCopy = &a2[iOff2];
5041           nCopy = i2.nPoslist - iOff2;
5042         }
5043         if( nCopy>0 ){
5044           fts5BufferSafeAppendBlob(&tmp, aCopy, nCopy);
5045         }
5046 
5047         /* WRITEPOSLISTSIZE */
5048         assert_nc( tmp.n<=i1.nPoslist+i2.nPoslist );
5049         assert( tmp.n<=i1.nPoslist+i2.nPoslist+10+10 );
5050         if( tmp.n>i1.nPoslist+i2.nPoslist ){
5051           if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
5052           break;
5053         }
5054         fts5BufferSafeAppendVarint(&out, tmp.n * 2);
5055         fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
5056         fts5DoclistIterNext(&i1);
5057         fts5DoclistIterNext(&i2);
5058         assert_nc( out.n<=(p1->n+p2->n+9) );
5059         if( i1.aPoslist==0 || i2.aPoslist==0 ) break;
5060         assert( out.n<=((i1.aPoslist-p1->p) + (i2.aPoslist-p2->p)+9+10+10) );
5061       }
5062     }
5063 
5064     if( i1.aPoslist ){
5065       fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
5066       fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist);
5067     }
5068     else if( i2.aPoslist ){
5069       fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
5070       fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist);
5071     }
5072     assert_nc( out.n<=(p1->n+p2->n+9) );
5073 
5074     fts5BufferSet(&p->rc, p1, out.n, out.p);
5075     fts5BufferFree(&tmp);
5076     fts5BufferFree(&out);
5077   }
5078 }
5079 
fts5SetupPrefixIter(Fts5Index * p,int bDesc,const u8 * pToken,int nToken,Fts5Colset * pColset,Fts5Iter ** ppIter)5080 static void fts5SetupPrefixIter(
5081   Fts5Index *p,                   /* Index to read from */
5082   int bDesc,                      /* True for "ORDER BY rowid DESC" */
5083   const u8 *pToken,               /* Buffer containing prefix to match */
5084   int nToken,                     /* Size of buffer pToken in bytes */
5085   Fts5Colset *pColset,            /* Restrict matches to these columns */
5086   Fts5Iter **ppIter          /* OUT: New iterator */
5087 ){
5088   Fts5Structure *pStruct;
5089   Fts5Buffer *aBuf;
5090   const int nBuf = 32;
5091 
5092   void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*);
5093   void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*);
5094   if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
5095     xMerge = fts5MergeRowidLists;
5096     xAppend = fts5AppendRowid;
5097   }else{
5098     xMerge = fts5MergePrefixLists;
5099     xAppend = fts5AppendPoslist;
5100   }
5101 
5102   aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
5103   pStruct = fts5StructureRead(p);
5104 
5105   if( aBuf && pStruct ){
5106     const int flags = FTS5INDEX_QUERY_SCAN
5107                     | FTS5INDEX_QUERY_SKIPEMPTY
5108                     | FTS5INDEX_QUERY_NOOUTPUT;
5109     int i;
5110     i64 iLastRowid = 0;
5111     Fts5Iter *p1 = 0;     /* Iterator used to gather data from index */
5112     Fts5Data *pData;
5113     Fts5Buffer doclist;
5114     int bNewTerm = 1;
5115 
5116     memset(&doclist, 0, sizeof(doclist));
5117     fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
5118     fts5IterSetOutputCb(&p->rc, p1);
5119     for( /* no-op */ ;
5120         fts5MultiIterEof(p, p1)==0;
5121         fts5MultiIterNext2(p, p1, &bNewTerm)
5122     ){
5123       Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
5124       int nTerm = pSeg->term.n;
5125       const u8 *pTerm = pSeg->term.p;
5126       p1->xSetOutputs(p1, pSeg);
5127 
5128       assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
5129       if( bNewTerm ){
5130         if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
5131       }
5132 
5133       if( p1->base.nData==0 ) continue;
5134 
5135       if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
5136         for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
5137           assert( i<nBuf );
5138           if( aBuf[i].n==0 ){
5139             fts5BufferSwap(&doclist, &aBuf[i]);
5140             fts5BufferZero(&doclist);
5141           }else{
5142             xMerge(p, &doclist, &aBuf[i]);
5143             fts5BufferZero(&aBuf[i]);
5144           }
5145         }
5146         iLastRowid = 0;
5147       }
5148 
5149       xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
5150       iLastRowid = p1->base.iRowid;
5151     }
5152 
5153     for(i=0; i<nBuf; i++){
5154       if( p->rc==SQLITE_OK ){
5155         xMerge(p, &doclist, &aBuf[i]);
5156       }
5157       fts5BufferFree(&aBuf[i]);
5158     }
5159     fts5MultiIterFree(p1);
5160 
5161     pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING);
5162     if( pData ){
5163       pData->p = (u8*)&pData[1];
5164       pData->nn = pData->szLeaf = doclist.n;
5165       if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
5166       fts5MultiIterNew2(p, pData, bDesc, ppIter);
5167     }
5168     fts5BufferFree(&doclist);
5169   }
5170 
5171   fts5StructureRelease(pStruct);
5172   sqlite3_free(aBuf);
5173 }
5174 
5175 
5176 /*
5177 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
5178 ** to the document with rowid iRowid.
5179 */
sqlite3Fts5IndexBeginWrite(Fts5Index * p,int bDelete,i64 iRowid)5180 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
5181   assert( p->rc==SQLITE_OK );
5182 
5183   /* Allocate the hash table if it has not already been allocated */
5184   if( p->pHash==0 ){
5185     p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
5186   }
5187 
5188   /* Flush the hash table to disk if required */
5189   if( iRowid<p->iWriteRowid
5190    || (iRowid==p->iWriteRowid && p->bDelete==0)
5191    || (p->nPendingData > p->pConfig->nHashSize)
5192   ){
5193     fts5IndexFlush(p);
5194   }
5195 
5196   p->iWriteRowid = iRowid;
5197   p->bDelete = bDelete;
5198   return fts5IndexReturn(p);
5199 }
5200 
5201 /*
5202 ** Commit data to disk.
5203 */
sqlite3Fts5IndexSync(Fts5Index * p)5204 int sqlite3Fts5IndexSync(Fts5Index *p){
5205   assert( p->rc==SQLITE_OK );
5206   fts5IndexFlush(p);
5207   sqlite3Fts5IndexCloseReader(p);
5208   return fts5IndexReturn(p);
5209 }
5210 
5211 /*
5212 ** Discard any data stored in the in-memory hash tables. Do not write it
5213 ** to the database. Additionally, assume that the contents of the %_data
5214 ** table may have changed on disk. So any in-memory caches of %_data
5215 ** records must be invalidated.
5216 */
sqlite3Fts5IndexRollback(Fts5Index * p)5217 int sqlite3Fts5IndexRollback(Fts5Index *p){
5218   sqlite3Fts5IndexCloseReader(p);
5219   fts5IndexDiscardData(p);
5220   fts5StructureInvalidate(p);
5221   /* assert( p->rc==SQLITE_OK ); */
5222   return SQLITE_OK;
5223 }
5224 
5225 /*
5226 ** The %_data table is completely empty when this function is called. This
5227 ** function populates it with the initial structure objects for each index,
5228 ** and the initial version of the "averages" record (a zero-byte blob).
5229 */
sqlite3Fts5IndexReinit(Fts5Index * p)5230 int sqlite3Fts5IndexReinit(Fts5Index *p){
5231   Fts5Structure s;
5232   fts5StructureInvalidate(p);
5233   fts5IndexDiscardData(p);
5234   memset(&s, 0, sizeof(Fts5Structure));
5235   fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
5236   fts5StructureWrite(p, &s);
5237   return fts5IndexReturn(p);
5238 }
5239 
5240 /*
5241 ** Open a new Fts5Index handle. If the bCreate argument is true, create
5242 ** and initialize the underlying %_data table.
5243 **
5244 ** If successful, set *pp to point to the new object and return SQLITE_OK.
5245 ** Otherwise, set *pp to NULL and return an SQLite error code.
5246 */
sqlite3Fts5IndexOpen(Fts5Config * pConfig,int bCreate,Fts5Index ** pp,char ** pzErr)5247 int sqlite3Fts5IndexOpen(
5248   Fts5Config *pConfig,
5249   int bCreate,
5250   Fts5Index **pp,
5251   char **pzErr
5252 ){
5253   int rc = SQLITE_OK;
5254   Fts5Index *p;                   /* New object */
5255 
5256   *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
5257   if( rc==SQLITE_OK ){
5258     p->pConfig = pConfig;
5259     p->nWorkUnit = FTS5_WORK_UNIT;
5260     p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
5261     if( p->zDataTbl && bCreate ){
5262       rc = sqlite3Fts5CreateTable(
5263           pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
5264       );
5265       if( rc==SQLITE_OK ){
5266         rc = sqlite3Fts5CreateTable(pConfig, "idx",
5267             "segid, term, pgno, PRIMARY KEY(segid, term)",
5268             1, pzErr
5269         );
5270       }
5271       if( rc==SQLITE_OK ){
5272         rc = sqlite3Fts5IndexReinit(p);
5273       }
5274     }
5275   }
5276 
5277   assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
5278   if( rc ){
5279     sqlite3Fts5IndexClose(p);
5280     *pp = 0;
5281   }
5282   return rc;
5283 }
5284 
5285 /*
5286 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
5287 */
sqlite3Fts5IndexClose(Fts5Index * p)5288 int sqlite3Fts5IndexClose(Fts5Index *p){
5289   int rc = SQLITE_OK;
5290   if( p ){
5291     assert( p->pReader==0 );
5292     fts5StructureInvalidate(p);
5293     sqlite3_finalize(p->pWriter);
5294     sqlite3_finalize(p->pDeleter);
5295     sqlite3_finalize(p->pIdxWriter);
5296     sqlite3_finalize(p->pIdxDeleter);
5297     sqlite3_finalize(p->pIdxSelect);
5298     sqlite3_finalize(p->pDataVersion);
5299     sqlite3Fts5HashFree(p->pHash);
5300     sqlite3_free(p->zDataTbl);
5301     sqlite3_free(p);
5302   }
5303   return rc;
5304 }
5305 
5306 /*
5307 ** Argument p points to a buffer containing utf-8 text that is n bytes in
5308 ** size. Return the number of bytes in the nChar character prefix of the
5309 ** buffer, or 0 if there are less than nChar characters in total.
5310 */
sqlite3Fts5IndexCharlenToBytelen(const char * p,int nByte,int nChar)5311 int sqlite3Fts5IndexCharlenToBytelen(
5312   const char *p,
5313   int nByte,
5314   int nChar
5315 ){
5316   int n = 0;
5317   int i;
5318   for(i=0; i<nChar; i++){
5319     if( n>=nByte ) return 0;      /* Input contains fewer than nChar chars */
5320     if( (unsigned char)p[n++]>=0xc0 ){
5321       if( n>=nByte ) return 0;
5322       while( (p[n] & 0xc0)==0x80 ){
5323         n++;
5324         if( n>=nByte ){
5325           if( i+1==nChar ) break;
5326           return 0;
5327         }
5328       }
5329     }
5330   }
5331   return n;
5332 }
5333 
5334 /*
5335 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
5336 ** unicode characters in the string.
5337 */
fts5IndexCharlen(const char * pIn,int nIn)5338 static int fts5IndexCharlen(const char *pIn, int nIn){
5339   int nChar = 0;
5340   int i = 0;
5341   while( i<nIn ){
5342     if( (unsigned char)pIn[i++]>=0xc0 ){
5343       while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
5344     }
5345     nChar++;
5346   }
5347   return nChar;
5348 }
5349 
5350 /*
5351 ** Insert or remove data to or from the index. Each time a document is
5352 ** added to or removed from the index, this function is called one or more
5353 ** times.
5354 **
5355 ** For an insert, it must be called once for each token in the new document.
5356 ** If the operation is a delete, it must be called (at least) once for each
5357 ** unique token in the document with an iCol value less than zero. The iPos
5358 ** argument is ignored for a delete.
5359 */
sqlite3Fts5IndexWrite(Fts5Index * p,int iCol,int iPos,const char * pToken,int nToken)5360 int sqlite3Fts5IndexWrite(
5361   Fts5Index *p,                   /* Index to write to */
5362   int iCol,                       /* Column token appears in (-ve -> delete) */
5363   int iPos,                       /* Position of token within column */
5364   const char *pToken, int nToken  /* Token to add or remove to or from index */
5365 ){
5366   int i;                          /* Used to iterate through indexes */
5367   int rc = SQLITE_OK;             /* Return code */
5368   Fts5Config *pConfig = p->pConfig;
5369 
5370   assert( p->rc==SQLITE_OK );
5371   assert( (iCol<0)==p->bDelete );
5372 
5373   /* Add the entry to the main terms index. */
5374   rc = sqlite3Fts5HashWrite(
5375       p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
5376   );
5377 
5378   for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
5379     const int nChar = pConfig->aPrefix[i];
5380     int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
5381     if( nByte ){
5382       rc = sqlite3Fts5HashWrite(p->pHash,
5383           p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
5384           nByte
5385       );
5386     }
5387   }
5388 
5389   return rc;
5390 }
5391 
5392 /*
5393 ** Open a new iterator to iterate though all rowid that match the
5394 ** specified token or token prefix.
5395 */
sqlite3Fts5IndexQuery(Fts5Index * p,const char * pToken,int nToken,int flags,Fts5Colset * pColset,Fts5IndexIter ** ppIter)5396 int sqlite3Fts5IndexQuery(
5397   Fts5Index *p,                   /* FTS index to query */
5398   const char *pToken, int nToken, /* Token (or prefix) to query for */
5399   int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
5400   Fts5Colset *pColset,            /* Match these columns only */
5401   Fts5IndexIter **ppIter          /* OUT: New iterator object */
5402 ){
5403   Fts5Config *pConfig = p->pConfig;
5404   Fts5Iter *pRet = 0;
5405   Fts5Buffer buf = {0, 0, 0};
5406 
5407   /* If the QUERY_SCAN flag is set, all other flags must be clear. */
5408   assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
5409 
5410   if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
5411     int iIdx = 0;                 /* Index to search */
5412     if( nToken ) memcpy(&buf.p[1], pToken, nToken);
5413 
5414     /* Figure out which index to search and set iIdx accordingly. If this
5415     ** is a prefix query for which there is no prefix index, set iIdx to
5416     ** greater than pConfig->nPrefix to indicate that the query will be
5417     ** satisfied by scanning multiple terms in the main index.
5418     **
5419     ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
5420     ** prefix-query. Instead of using a prefix-index (if one exists),
5421     ** evaluate the prefix query using the main FTS index. This is used
5422     ** for internal sanity checking by the integrity-check in debug
5423     ** mode only.  */
5424 #ifdef SQLITE_DEBUG
5425     if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
5426       assert( flags & FTS5INDEX_QUERY_PREFIX );
5427       iIdx = 1+pConfig->nPrefix;
5428     }else
5429 #endif
5430     if( flags & FTS5INDEX_QUERY_PREFIX ){
5431       int nChar = fts5IndexCharlen(pToken, nToken);
5432       for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
5433         if( pConfig->aPrefix[iIdx-1]==nChar ) break;
5434       }
5435     }
5436 
5437     if( iIdx<=pConfig->nPrefix ){
5438       /* Straight index lookup */
5439       Fts5Structure *pStruct = fts5StructureRead(p);
5440       buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
5441       if( pStruct ){
5442         fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
5443             pColset, buf.p, nToken+1, -1, 0, &pRet
5444         );
5445         fts5StructureRelease(pStruct);
5446       }
5447     }else{
5448       /* Scan multiple terms in the main index */
5449       int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
5450       buf.p[0] = FTS5_MAIN_PREFIX;
5451       fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
5452       assert( p->rc!=SQLITE_OK || pRet->pColset==0 );
5453       fts5IterSetOutputCb(&p->rc, pRet);
5454       if( p->rc==SQLITE_OK ){
5455         Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
5456         if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
5457       }
5458     }
5459 
5460     if( p->rc ){
5461       sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
5462       pRet = 0;
5463       sqlite3Fts5IndexCloseReader(p);
5464     }
5465 
5466     *ppIter = (Fts5IndexIter*)pRet;
5467     sqlite3Fts5BufferFree(&buf);
5468   }
5469   return fts5IndexReturn(p);
5470 }
5471 
5472 /*
5473 ** Return true if the iterator passed as the only argument is at EOF.
5474 */
5475 /*
5476 ** Move to the next matching rowid.
5477 */
sqlite3Fts5IterNext(Fts5IndexIter * pIndexIter)5478 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
5479   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5480   assert( pIter->pIndex->rc==SQLITE_OK );
5481   fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
5482   return fts5IndexReturn(pIter->pIndex);
5483 }
5484 
5485 /*
5486 ** Move to the next matching term/rowid. Used by the fts5vocab module.
5487 */
sqlite3Fts5IterNextScan(Fts5IndexIter * pIndexIter)5488 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
5489   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5490   Fts5Index *p = pIter->pIndex;
5491 
5492   assert( pIter->pIndex->rc==SQLITE_OK );
5493 
5494   fts5MultiIterNext(p, pIter, 0, 0);
5495   if( p->rc==SQLITE_OK ){
5496     Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
5497     if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
5498       fts5DataRelease(pSeg->pLeaf);
5499       pSeg->pLeaf = 0;
5500       pIter->base.bEof = 1;
5501     }
5502   }
5503 
5504   return fts5IndexReturn(pIter->pIndex);
5505 }
5506 
5507 /*
5508 ** Move to the next matching rowid that occurs at or after iMatch. The
5509 ** definition of "at or after" depends on whether this iterator iterates
5510 ** in ascending or descending rowid order.
5511 */
sqlite3Fts5IterNextFrom(Fts5IndexIter * pIndexIter,i64 iMatch)5512 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
5513   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5514   fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
5515   return fts5IndexReturn(pIter->pIndex);
5516 }
5517 
5518 /*
5519 ** Return the current term.
5520 */
sqlite3Fts5IterTerm(Fts5IndexIter * pIndexIter,int * pn)5521 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
5522   int n;
5523   const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
5524   *pn = n-1;
5525   return &z[1];
5526 }
5527 
5528 /*
5529 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
5530 */
sqlite3Fts5IterClose(Fts5IndexIter * pIndexIter)5531 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
5532   if( pIndexIter ){
5533     Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5534     Fts5Index *pIndex = pIter->pIndex;
5535     fts5MultiIterFree(pIter);
5536     sqlite3Fts5IndexCloseReader(pIndex);
5537   }
5538 }
5539 
5540 /*
5541 ** Read and decode the "averages" record from the database.
5542 **
5543 ** Parameter anSize must point to an array of size nCol, where nCol is
5544 ** the number of user defined columns in the FTS table.
5545 */
sqlite3Fts5IndexGetAverages(Fts5Index * p,i64 * pnRow,i64 * anSize)5546 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
5547   int nCol = p->pConfig->nCol;
5548   Fts5Data *pData;
5549 
5550   *pnRow = 0;
5551   memset(anSize, 0, sizeof(i64) * nCol);
5552   pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
5553   if( p->rc==SQLITE_OK && pData->nn ){
5554     int i = 0;
5555     int iCol;
5556     i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
5557     for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
5558       i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
5559     }
5560   }
5561 
5562   fts5DataRelease(pData);
5563   return fts5IndexReturn(p);
5564 }
5565 
5566 /*
5567 ** Replace the current "averages" record with the contents of the buffer
5568 ** supplied as the second argument.
5569 */
sqlite3Fts5IndexSetAverages(Fts5Index * p,const u8 * pData,int nData)5570 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
5571   assert( p->rc==SQLITE_OK );
5572   fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
5573   return fts5IndexReturn(p);
5574 }
5575 
5576 /*
5577 ** Return the total number of blocks this module has read from the %_data
5578 ** table since it was created.
5579 */
sqlite3Fts5IndexReads(Fts5Index * p)5580 int sqlite3Fts5IndexReads(Fts5Index *p){
5581   return p->nRead;
5582 }
5583 
5584 /*
5585 ** Set the 32-bit cookie value stored at the start of all structure
5586 ** records to the value passed as the second argument.
5587 **
5588 ** Return SQLITE_OK if successful, or an SQLite error code if an error
5589 ** occurs.
5590 */
sqlite3Fts5IndexSetCookie(Fts5Index * p,int iNew)5591 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
5592   int rc;                              /* Return code */
5593   Fts5Config *pConfig = p->pConfig;    /* Configuration object */
5594   u8 aCookie[4];                       /* Binary representation of iNew */
5595   sqlite3_blob *pBlob = 0;
5596 
5597   assert( p->rc==SQLITE_OK );
5598   sqlite3Fts5Put32(aCookie, iNew);
5599 
5600   rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
5601       "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
5602   );
5603   if( rc==SQLITE_OK ){
5604     sqlite3_blob_write(pBlob, aCookie, 4, 0);
5605     rc = sqlite3_blob_close(pBlob);
5606   }
5607 
5608   return rc;
5609 }
5610 
sqlite3Fts5IndexLoadConfig(Fts5Index * p)5611 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
5612   Fts5Structure *pStruct;
5613   pStruct = fts5StructureRead(p);
5614   fts5StructureRelease(pStruct);
5615   return fts5IndexReturn(p);
5616 }
5617 
5618 
5619 /*************************************************************************
5620 **************************************************************************
5621 ** Below this point is the implementation of the integrity-check
5622 ** functionality.
5623 */
5624 
5625 /*
5626 ** Return a simple checksum value based on the arguments.
5627 */
sqlite3Fts5IndexEntryCksum(i64 iRowid,int iCol,int iPos,int iIdx,const char * pTerm,int nTerm)5628 u64 sqlite3Fts5IndexEntryCksum(
5629   i64 iRowid,
5630   int iCol,
5631   int iPos,
5632   int iIdx,
5633   const char *pTerm,
5634   int nTerm
5635 ){
5636   int i;
5637   u64 ret = iRowid;
5638   ret += (ret<<3) + iCol;
5639   ret += (ret<<3) + iPos;
5640   if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
5641   for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
5642   return ret;
5643 }
5644 
5645 #ifdef SQLITE_DEBUG
5646 /*
5647 ** This function is purely an internal test. It does not contribute to
5648 ** FTS functionality, or even the integrity-check, in any way.
5649 **
5650 ** Instead, it tests that the same set of pgno/rowid combinations are
5651 ** visited regardless of whether the doclist-index identified by parameters
5652 ** iSegid/iLeaf is iterated in forwards or reverse order.
5653 */
fts5TestDlidxReverse(Fts5Index * p,int iSegid,int iLeaf)5654 static void fts5TestDlidxReverse(
5655   Fts5Index *p,
5656   int iSegid,                     /* Segment id to load from */
5657   int iLeaf                       /* Load doclist-index for this leaf */
5658 ){
5659   Fts5DlidxIter *pDlidx = 0;
5660   u64 cksum1 = 13;
5661   u64 cksum2 = 13;
5662 
5663   for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
5664       fts5DlidxIterEof(p, pDlidx)==0;
5665       fts5DlidxIterNext(p, pDlidx)
5666   ){
5667     i64 iRowid = fts5DlidxIterRowid(pDlidx);
5668     int pgno = fts5DlidxIterPgno(pDlidx);
5669     assert( pgno>iLeaf );
5670     cksum1 += iRowid + ((i64)pgno<<32);
5671   }
5672   fts5DlidxIterFree(pDlidx);
5673   pDlidx = 0;
5674 
5675   for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
5676       fts5DlidxIterEof(p, pDlidx)==0;
5677       fts5DlidxIterPrev(p, pDlidx)
5678   ){
5679     i64 iRowid = fts5DlidxIterRowid(pDlidx);
5680     int pgno = fts5DlidxIterPgno(pDlidx);
5681     assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
5682     cksum2 += iRowid + ((i64)pgno<<32);
5683   }
5684   fts5DlidxIterFree(pDlidx);
5685   pDlidx = 0;
5686 
5687   if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
5688 }
5689 
fts5QueryCksum(Fts5Index * p,int iIdx,const char * z,int n,int flags,u64 * pCksum)5690 static int fts5QueryCksum(
5691   Fts5Index *p,                   /* Fts5 index object */
5692   int iIdx,
5693   const char *z,                  /* Index key to query for */
5694   int n,                          /* Size of index key in bytes */
5695   int flags,                      /* Flags for Fts5IndexQuery */
5696   u64 *pCksum                     /* IN/OUT: Checksum value */
5697 ){
5698   int eDetail = p->pConfig->eDetail;
5699   u64 cksum = *pCksum;
5700   Fts5IndexIter *pIter = 0;
5701   int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
5702 
5703   while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
5704     i64 rowid = pIter->iRowid;
5705 
5706     if( eDetail==FTS5_DETAIL_NONE ){
5707       cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
5708     }else{
5709       Fts5PoslistReader sReader;
5710       for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
5711           sReader.bEof==0;
5712           sqlite3Fts5PoslistReaderNext(&sReader)
5713       ){
5714         int iCol = FTS5_POS2COLUMN(sReader.iPos);
5715         int iOff = FTS5_POS2OFFSET(sReader.iPos);
5716         cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
5717       }
5718     }
5719     if( rc==SQLITE_OK ){
5720       rc = sqlite3Fts5IterNext(pIter);
5721     }
5722   }
5723   sqlite3Fts5IterClose(pIter);
5724 
5725   *pCksum = cksum;
5726   return rc;
5727 }
5728 
5729 /*
5730 ** Check if buffer z[], size n bytes, contains as series of valid utf-8
5731 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
5732 ** contain valid utf-8, return non-zero.
5733 */
fts5TestUtf8(const char * z,int n)5734 static int fts5TestUtf8(const char *z, int n){
5735   int i = 0;
5736   assert_nc( n>0 );
5737   while( i<n ){
5738     if( (z[i] & 0x80)==0x00 ){
5739       i++;
5740     }else
5741     if( (z[i] & 0xE0)==0xC0 ){
5742       if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
5743       i += 2;
5744     }else
5745     if( (z[i] & 0xF0)==0xE0 ){
5746       if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
5747       i += 3;
5748     }else
5749     if( (z[i] & 0xF8)==0xF0 ){
5750       if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
5751       if( (z[i+2] & 0xC0)!=0x80 ) return 1;
5752       i += 3;
5753     }else{
5754       return 1;
5755     }
5756   }
5757 
5758   return 0;
5759 }
5760 
5761 /*
5762 ** This function is also purely an internal test. It does not contribute to
5763 ** FTS functionality, or even the integrity-check, in any way.
5764 */
fts5TestTerm(Fts5Index * p,Fts5Buffer * pPrev,const char * z,int n,u64 expected,u64 * pCksum)5765 static void fts5TestTerm(
5766   Fts5Index *p,
5767   Fts5Buffer *pPrev,              /* Previous term */
5768   const char *z, int n,           /* Possibly new term to test */
5769   u64 expected,
5770   u64 *pCksum
5771 ){
5772   int rc = p->rc;
5773   if( pPrev->n==0 ){
5774     fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5775   }else
5776   if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
5777     u64 cksum3 = *pCksum;
5778     const char *zTerm = (const char*)&pPrev->p[1];  /* term sans prefix-byte */
5779     int nTerm = pPrev->n-1;            /* Size of zTerm in bytes */
5780     int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
5781     int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
5782     u64 ck1 = 0;
5783     u64 ck2 = 0;
5784 
5785     /* Check that the results returned for ASC and DESC queries are
5786     ** the same. If not, call this corruption.  */
5787     rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
5788     if( rc==SQLITE_OK ){
5789       int f = flags|FTS5INDEX_QUERY_DESC;
5790       rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5791     }
5792     if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5793 
5794     /* If this is a prefix query, check that the results returned if the
5795     ** the index is disabled are the same. In both ASC and DESC order.
5796     **
5797     ** This check may only be performed if the hash table is empty. This
5798     ** is because the hash table only supports a single scan query at
5799     ** a time, and the multi-iter loop from which this function is called
5800     ** is already performing such a scan.
5801     **
5802     ** Also only do this if buffer zTerm contains nTerm bytes of valid
5803     ** utf-8. Otherwise, the last part of the buffer contents might contain
5804     ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
5805     ** character stored in the main fts index, which will cause the
5806     ** test to fail.  */
5807     if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
5808       if( iIdx>0 && rc==SQLITE_OK ){
5809         int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
5810         ck2 = 0;
5811         rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5812         if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5813       }
5814       if( iIdx>0 && rc==SQLITE_OK ){
5815         int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
5816         ck2 = 0;
5817         rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5818         if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5819       }
5820     }
5821 
5822     cksum3 ^= ck1;
5823     fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5824 
5825     if( rc==SQLITE_OK && cksum3!=expected ){
5826       rc = FTS5_CORRUPT;
5827     }
5828     *pCksum = cksum3;
5829   }
5830   p->rc = rc;
5831 }
5832 
5833 #else
5834 # define fts5TestDlidxReverse(x,y,z)
5835 # define fts5TestTerm(u,v,w,x,y,z)
5836 #endif
5837 
5838 /*
5839 ** Check that:
5840 **
5841 **   1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
5842 **      contain zero terms.
5843 **   2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
5844 **      contain zero rowids.
5845 */
fts5IndexIntegrityCheckEmpty(Fts5Index * p,Fts5StructureSegment * pSeg,int iFirst,int iNoRowid,int iLast)5846 static void fts5IndexIntegrityCheckEmpty(
5847   Fts5Index *p,
5848   Fts5StructureSegment *pSeg,     /* Segment to check internal consistency */
5849   int iFirst,
5850   int iNoRowid,
5851   int iLast
5852 ){
5853   int i;
5854 
5855   /* Now check that the iter.nEmpty leaves following the current leaf
5856   ** (a) exist and (b) contain no terms. */
5857   for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
5858     Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
5859     if( pLeaf ){
5860       if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
5861       if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
5862     }
5863     fts5DataRelease(pLeaf);
5864   }
5865 }
5866 
fts5IntegrityCheckPgidx(Fts5Index * p,Fts5Data * pLeaf)5867 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
5868   int iTermOff = 0;
5869   int ii;
5870 
5871   Fts5Buffer buf1 = {0,0,0};
5872   Fts5Buffer buf2 = {0,0,0};
5873 
5874   ii = pLeaf->szLeaf;
5875   while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
5876     int res;
5877     int iOff;
5878     int nIncr;
5879 
5880     ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
5881     iTermOff += nIncr;
5882     iOff = iTermOff;
5883 
5884     if( iOff>=pLeaf->szLeaf ){
5885       p->rc = FTS5_CORRUPT;
5886     }else if( iTermOff==nIncr ){
5887       int nByte;
5888       iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
5889       if( (iOff+nByte)>pLeaf->szLeaf ){
5890         p->rc = FTS5_CORRUPT;
5891       }else{
5892         fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
5893       }
5894     }else{
5895       int nKeep, nByte;
5896       iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
5897       iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
5898       if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
5899         p->rc = FTS5_CORRUPT;
5900       }else{
5901         buf1.n = nKeep;
5902         fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
5903       }
5904 
5905       if( p->rc==SQLITE_OK ){
5906         res = fts5BufferCompare(&buf1, &buf2);
5907         if( res<=0 ) p->rc = FTS5_CORRUPT;
5908       }
5909     }
5910     fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
5911   }
5912 
5913   fts5BufferFree(&buf1);
5914   fts5BufferFree(&buf2);
5915 }
5916 
fts5IndexIntegrityCheckSegment(Fts5Index * p,Fts5StructureSegment * pSeg)5917 static void fts5IndexIntegrityCheckSegment(
5918   Fts5Index *p,                   /* FTS5 backend object */
5919   Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
5920 ){
5921   Fts5Config *pConfig = p->pConfig;
5922   sqlite3_stmt *pStmt = 0;
5923   int rc2;
5924   int iIdxPrevLeaf = pSeg->pgnoFirst-1;
5925   int iDlidxPrevLeaf = pSeg->pgnoLast;
5926 
5927   if( pSeg->pgnoFirst==0 ) return;
5928 
5929   fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
5930       "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
5931       "ORDER BY 1, 2",
5932       pConfig->zDb, pConfig->zName, pSeg->iSegid
5933   ));
5934 
5935   /* Iterate through the b-tree hierarchy.  */
5936   while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
5937     i64 iRow;                     /* Rowid for this leaf */
5938     Fts5Data *pLeaf;              /* Data for this leaf */
5939 
5940     const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
5941     int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
5942     int iIdxLeaf = sqlite3_column_int(pStmt, 2);
5943     int bIdxDlidx = sqlite3_column_int(pStmt, 3);
5944 
5945     /* If the leaf in question has already been trimmed from the segment,
5946     ** ignore this b-tree entry. Otherwise, load it into memory. */
5947     if( iIdxLeaf<pSeg->pgnoFirst ) continue;
5948     iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
5949     pLeaf = fts5LeafRead(p, iRow);
5950     if( pLeaf==0 ) break;
5951 
5952     /* Check that the leaf contains at least one term, and that it is equal
5953     ** to or larger than the split-key in zIdxTerm.  Also check that if there
5954     ** is also a rowid pointer within the leaf page header, it points to a
5955     ** location before the term.  */
5956     if( pLeaf->nn<=pLeaf->szLeaf ){
5957       p->rc = FTS5_CORRUPT;
5958     }else{
5959       int iOff;                   /* Offset of first term on leaf */
5960       int iRowidOff;              /* Offset of first rowid on leaf */
5961       int nTerm;                  /* Size of term on leaf in bytes */
5962       int res;                    /* Comparison of term and split-key */
5963 
5964       iOff = fts5LeafFirstTermOff(pLeaf);
5965       iRowidOff = fts5LeafFirstRowidOff(pLeaf);
5966       if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
5967         p->rc = FTS5_CORRUPT;
5968       }else{
5969         iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
5970         res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
5971         if( res==0 ) res = nTerm - nIdxTerm;
5972         if( res<0 ) p->rc = FTS5_CORRUPT;
5973       }
5974 
5975       fts5IntegrityCheckPgidx(p, pLeaf);
5976     }
5977     fts5DataRelease(pLeaf);
5978     if( p->rc ) break;
5979 
5980     /* Now check that the iter.nEmpty leaves following the current leaf
5981     ** (a) exist and (b) contain no terms. */
5982     fts5IndexIntegrityCheckEmpty(
5983         p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
5984     );
5985     if( p->rc ) break;
5986 
5987     /* If there is a doclist-index, check that it looks right. */
5988     if( bIdxDlidx ){
5989       Fts5DlidxIter *pDlidx = 0;  /* For iterating through doclist index */
5990       int iPrevLeaf = iIdxLeaf;
5991       int iSegid = pSeg->iSegid;
5992       int iPg = 0;
5993       i64 iKey;
5994 
5995       for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
5996           fts5DlidxIterEof(p, pDlidx)==0;
5997           fts5DlidxIterNext(p, pDlidx)
5998       ){
5999 
6000         /* Check any rowid-less pages that occur before the current leaf. */
6001         for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
6002           iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
6003           pLeaf = fts5DataRead(p, iKey);
6004           if( pLeaf ){
6005             if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
6006             fts5DataRelease(pLeaf);
6007           }
6008         }
6009         iPrevLeaf = fts5DlidxIterPgno(pDlidx);
6010 
6011         /* Check that the leaf page indicated by the iterator really does
6012         ** contain the rowid suggested by the same. */
6013         iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
6014         pLeaf = fts5DataRead(p, iKey);
6015         if( pLeaf ){
6016           i64 iRowid;
6017           int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
6018           ASSERT_SZLEAF_OK(pLeaf);
6019           if( iRowidOff>=pLeaf->szLeaf ){
6020             p->rc = FTS5_CORRUPT;
6021           }else{
6022             fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
6023             if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
6024           }
6025           fts5DataRelease(pLeaf);
6026         }
6027       }
6028 
6029       iDlidxPrevLeaf = iPg;
6030       fts5DlidxIterFree(pDlidx);
6031       fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
6032     }else{
6033       iDlidxPrevLeaf = pSeg->pgnoLast;
6034       /* TODO: Check there is no doclist index */
6035     }
6036 
6037     iIdxPrevLeaf = iIdxLeaf;
6038   }
6039 
6040   rc2 = sqlite3_finalize(pStmt);
6041   if( p->rc==SQLITE_OK ) p->rc = rc2;
6042 
6043   /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
6044 #if 0
6045   if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
6046     p->rc = FTS5_CORRUPT;
6047   }
6048 #endif
6049 }
6050 
6051 
6052 /*
6053 ** Run internal checks to ensure that the FTS index (a) is internally
6054 ** consistent and (b) contains entries for which the XOR of the checksums
6055 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
6056 **
6057 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
6058 ** checksum does not match. Return SQLITE_OK if all checks pass without
6059 ** error, or some other SQLite error code if another error (e.g. OOM)
6060 ** occurs.
6061 */
sqlite3Fts5IndexIntegrityCheck(Fts5Index * p,u64 cksum)6062 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
6063   int eDetail = p->pConfig->eDetail;
6064   u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
6065   Fts5Buffer poslist = {0,0,0};   /* Buffer used to hold a poslist */
6066   Fts5Iter *pIter;                /* Used to iterate through entire index */
6067   Fts5Structure *pStruct;         /* Index structure */
6068 
6069 #ifdef SQLITE_DEBUG
6070   /* Used by extra internal tests only run if NDEBUG is not defined */
6071   u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
6072   Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */
6073 #endif
6074   const int flags = FTS5INDEX_QUERY_NOOUTPUT;
6075 
6076   /* Load the FTS index structure */
6077   pStruct = fts5StructureRead(p);
6078 
6079   /* Check that the internal nodes of each segment match the leaves */
6080   if( pStruct ){
6081     int iLvl, iSeg;
6082     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
6083       for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
6084         Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
6085         fts5IndexIntegrityCheckSegment(p, pSeg);
6086       }
6087     }
6088   }
6089 
6090   /* The cksum argument passed to this function is a checksum calculated
6091   ** based on all expected entries in the FTS index (including prefix index
6092   ** entries). This block checks that a checksum calculated based on the
6093   ** actual contents of FTS index is identical.
6094   **
6095   ** Two versions of the same checksum are calculated. The first (stack
6096   ** variable cksum2) based on entries extracted from the full-text index
6097   ** while doing a linear scan of each individual index in turn.
6098   **
6099   ** As each term visited by the linear scans, a separate query for the
6100   ** same term is performed. cksum3 is calculated based on the entries
6101   ** extracted by these queries.
6102   */
6103   for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
6104       fts5MultiIterEof(p, pIter)==0;
6105       fts5MultiIterNext(p, pIter, 0, 0)
6106   ){
6107     int n;                      /* Size of term in bytes */
6108     i64 iPos = 0;               /* Position read from poslist */
6109     int iOff = 0;               /* Offset within poslist */
6110     i64 iRowid = fts5MultiIterRowid(pIter);
6111     char *z = (char*)fts5MultiIterTerm(pIter, &n);
6112 
6113     /* If this is a new term, query for it. Update cksum3 with the results. */
6114     fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
6115 
6116     if( eDetail==FTS5_DETAIL_NONE ){
6117       if( 0==fts5MultiIterIsEmpty(p, pIter) ){
6118         cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
6119       }
6120     }else{
6121       poslist.n = 0;
6122       fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
6123       while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
6124         int iCol = FTS5_POS2COLUMN(iPos);
6125         int iTokOff = FTS5_POS2OFFSET(iPos);
6126         cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
6127       }
6128     }
6129   }
6130   fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
6131 
6132   fts5MultiIterFree(pIter);
6133   if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
6134 
6135   fts5StructureRelease(pStruct);
6136 #ifdef SQLITE_DEBUG
6137   fts5BufferFree(&term);
6138 #endif
6139   fts5BufferFree(&poslist);
6140   return fts5IndexReturn(p);
6141 }
6142 
6143 /*************************************************************************
6144 **************************************************************************
6145 ** Below this point is the implementation of the fts5_decode() scalar
6146 ** function only.
6147 */
6148 
6149 /*
6150 ** Decode a segment-data rowid from the %_data table. This function is
6151 ** the opposite of macro FTS5_SEGMENT_ROWID().
6152 */
fts5DecodeRowid(i64 iRowid,int * piSegid,int * pbDlidx,int * piHeight,int * piPgno)6153 static void fts5DecodeRowid(
6154   i64 iRowid,                     /* Rowid from %_data table */
6155   int *piSegid,                   /* OUT: Segment id */
6156   int *pbDlidx,                   /* OUT: Dlidx flag */
6157   int *piHeight,                  /* OUT: Height */
6158   int *piPgno                     /* OUT: Page number */
6159 ){
6160   *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
6161   iRowid >>= FTS5_DATA_PAGE_B;
6162 
6163   *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
6164   iRowid >>= FTS5_DATA_HEIGHT_B;
6165 
6166   *pbDlidx = (int)(iRowid & 0x0001);
6167   iRowid >>= FTS5_DATA_DLI_B;
6168 
6169   *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
6170 }
6171 
fts5DebugRowid(int * pRc,Fts5Buffer * pBuf,i64 iKey)6172 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
6173   int iSegid, iHeight, iPgno, bDlidx;       /* Rowid compenents */
6174   fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
6175 
6176   if( iSegid==0 ){
6177     if( iKey==FTS5_AVERAGES_ROWID ){
6178       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
6179     }else{
6180       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
6181     }
6182   }
6183   else{
6184     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
6185         bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
6186     );
6187   }
6188 }
6189 
fts5DebugStructure(int * pRc,Fts5Buffer * pBuf,Fts5Structure * p)6190 static void fts5DebugStructure(
6191   int *pRc,                       /* IN/OUT: error code */
6192   Fts5Buffer *pBuf,
6193   Fts5Structure *p
6194 ){
6195   int iLvl, iSeg;                 /* Iterate through levels, segments */
6196 
6197   for(iLvl=0; iLvl<p->nLevel; iLvl++){
6198     Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
6199     sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
6200         " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
6201     );
6202     for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
6203       Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
6204       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
6205           pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
6206       );
6207     }
6208     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
6209   }
6210 }
6211 
6212 /*
6213 ** This is part of the fts5_decode() debugging aid.
6214 **
6215 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
6216 ** function appends a human-readable representation of the same object
6217 ** to the buffer passed as the second argument.
6218 */
fts5DecodeStructure(int * pRc,Fts5Buffer * pBuf,const u8 * pBlob,int nBlob)6219 static void fts5DecodeStructure(
6220   int *pRc,                       /* IN/OUT: error code */
6221   Fts5Buffer *pBuf,
6222   const u8 *pBlob, int nBlob
6223 ){
6224   int rc;                         /* Return code */
6225   Fts5Structure *p = 0;           /* Decoded structure object */
6226 
6227   rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
6228   if( rc!=SQLITE_OK ){
6229     *pRc = rc;
6230     return;
6231   }
6232 
6233   fts5DebugStructure(pRc, pBuf, p);
6234   fts5StructureRelease(p);
6235 }
6236 
6237 /*
6238 ** This is part of the fts5_decode() debugging aid.
6239 **
6240 ** Arguments pBlob/nBlob contain an "averages" record. This function
6241 ** appends a human-readable representation of record to the buffer passed
6242 ** as the second argument.
6243 */
fts5DecodeAverages(int * pRc,Fts5Buffer * pBuf,const u8 * pBlob,int nBlob)6244 static void fts5DecodeAverages(
6245   int *pRc,                       /* IN/OUT: error code */
6246   Fts5Buffer *pBuf,
6247   const u8 *pBlob, int nBlob
6248 ){
6249   int i = 0;
6250   const char *zSpace = "";
6251 
6252   while( i<nBlob ){
6253     u64 iVal;
6254     i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
6255     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
6256     zSpace = " ";
6257   }
6258 }
6259 
6260 /*
6261 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
6262 ** each varint and append its string representation to buffer pBuf. Return
6263 ** after either the input buffer is exhausted or a 0 value is read.
6264 **
6265 ** The return value is the number of bytes read from the input buffer.
6266 */
fts5DecodePoslist(int * pRc,Fts5Buffer * pBuf,const u8 * a,int n)6267 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6268   int iOff = 0;
6269   while( iOff<n ){
6270     int iVal;
6271     iOff += fts5GetVarint32(&a[iOff], iVal);
6272     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
6273   }
6274   return iOff;
6275 }
6276 
6277 /*
6278 ** The start of buffer (a/n) contains the start of a doclist. The doclist
6279 ** may or may not finish within the buffer. This function appends a text
6280 ** representation of the part of the doclist that is present to buffer
6281 ** pBuf.
6282 **
6283 ** The return value is the number of bytes read from the input buffer.
6284 */
fts5DecodeDoclist(int * pRc,Fts5Buffer * pBuf,const u8 * a,int n)6285 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6286   i64 iDocid = 0;
6287   int iOff = 0;
6288 
6289   if( n>0 ){
6290     iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
6291     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6292   }
6293   while( iOff<n ){
6294     int nPos;
6295     int bDel;
6296     iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
6297     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
6298     iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
6299     if( iOff<n ){
6300       i64 iDelta;
6301       iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
6302       iDocid += iDelta;
6303       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6304     }
6305   }
6306 
6307   return iOff;
6308 }
6309 
6310 /*
6311 ** This function is part of the fts5_decode() debugging function. It is
6312 ** only ever used with detail=none tables.
6313 **
6314 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
6315 ** tables. This function appends a human-readable version of that list to
6316 ** buffer pBuf.
6317 **
6318 ** If *pRc is other than SQLITE_OK when this function is called, it is a
6319 ** no-op. If an OOM or other error occurs within this function, *pRc is
6320 ** set to an SQLite error code before returning. The final state of buffer
6321 ** pBuf is undefined in this case.
6322 */
fts5DecodeRowidList(int * pRc,Fts5Buffer * pBuf,const u8 * pData,int nData)6323 static void fts5DecodeRowidList(
6324   int *pRc,                       /* IN/OUT: Error code */
6325   Fts5Buffer *pBuf,               /* Buffer to append text to */
6326   const u8 *pData, int nData      /* Data to decode list-of-rowids from */
6327 ){
6328   int i = 0;
6329   i64 iRowid = 0;
6330 
6331   while( i<nData ){
6332     const char *zApp = "";
6333     u64 iVal;
6334     i += sqlite3Fts5GetVarint(&pData[i], &iVal);
6335     iRowid += iVal;
6336 
6337     if( i<nData && pData[i]==0x00 ){
6338       i++;
6339       if( i<nData && pData[i]==0x00 ){
6340         i++;
6341         zApp = "+";
6342       }else{
6343         zApp = "*";
6344       }
6345     }
6346 
6347     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
6348   }
6349 }
6350 
6351 /*
6352 ** The implementation of user-defined scalar function fts5_decode().
6353 */
fts5DecodeFunction(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)6354 static void fts5DecodeFunction(
6355   sqlite3_context *pCtx,          /* Function call context */
6356   int nArg,                       /* Number of args (always 2) */
6357   sqlite3_value **apVal           /* Function arguments */
6358 ){
6359   i64 iRowid;                     /* Rowid for record being decoded */
6360   int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
6361   const u8 *aBlob; int n;         /* Record to decode */
6362   u8 *a = 0;
6363   Fts5Buffer s;                   /* Build up text to return here */
6364   int rc = SQLITE_OK;             /* Return code */
6365   sqlite3_int64 nSpace = 0;
6366   int eDetailNone = (sqlite3_user_data(pCtx)!=0);
6367 
6368   assert( nArg==2 );
6369   UNUSED_PARAM(nArg);
6370   memset(&s, 0, sizeof(Fts5Buffer));
6371   iRowid = sqlite3_value_int64(apVal[0]);
6372 
6373   /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
6374   ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
6375   ** buffer overreads even if the record is corrupt.  */
6376   n = sqlite3_value_bytes(apVal[1]);
6377   aBlob = sqlite3_value_blob(apVal[1]);
6378   nSpace = n + FTS5_DATA_ZERO_PADDING;
6379   a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
6380   if( a==0 ) goto decode_out;
6381   if( n>0 ) memcpy(a, aBlob, n);
6382 
6383   fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
6384 
6385   fts5DebugRowid(&rc, &s, iRowid);
6386   if( bDlidx ){
6387     Fts5Data dlidx;
6388     Fts5DlidxLvl lvl;
6389 
6390     dlidx.p = a;
6391     dlidx.nn = n;
6392 
6393     memset(&lvl, 0, sizeof(Fts5DlidxLvl));
6394     lvl.pData = &dlidx;
6395     lvl.iLeafPgno = iPgno;
6396 
6397     for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
6398       sqlite3Fts5BufferAppendPrintf(&rc, &s,
6399           " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
6400       );
6401     }
6402   }else if( iSegid==0 ){
6403     if( iRowid==FTS5_AVERAGES_ROWID ){
6404       fts5DecodeAverages(&rc, &s, a, n);
6405     }else{
6406       fts5DecodeStructure(&rc, &s, a, n);
6407     }
6408   }else if( eDetailNone ){
6409     Fts5Buffer term;              /* Current term read from page */
6410     int szLeaf;
6411     int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6412     int iTermOff;
6413     int nKeep = 0;
6414     int iOff;
6415 
6416     memset(&term, 0, sizeof(Fts5Buffer));
6417 
6418     /* Decode any entries that occur before the first term. */
6419     if( szLeaf<n ){
6420       iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
6421     }else{
6422       iTermOff = szLeaf;
6423     }
6424     fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
6425 
6426     iOff = iTermOff;
6427     while( iOff<szLeaf ){
6428       int nAppend;
6429 
6430       /* Read the term data for the next term*/
6431       iOff += fts5GetVarint32(&a[iOff], nAppend);
6432       term.n = nKeep;
6433       fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
6434       sqlite3Fts5BufferAppendPrintf(
6435           &rc, &s, " term=%.*s", term.n, (const char*)term.p
6436       );
6437       iOff += nAppend;
6438 
6439       /* Figure out where the doclist for this term ends */
6440       if( iPgidxOff<n ){
6441         int nIncr;
6442         iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
6443         iTermOff += nIncr;
6444       }else{
6445         iTermOff = szLeaf;
6446       }
6447 
6448       fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
6449       iOff = iTermOff;
6450       if( iOff<szLeaf ){
6451         iOff += fts5GetVarint32(&a[iOff], nKeep);
6452       }
6453     }
6454 
6455     fts5BufferFree(&term);
6456   }else{
6457     Fts5Buffer term;              /* Current term read from page */
6458     int szLeaf;                   /* Offset of pgidx in a[] */
6459     int iPgidxOff;
6460     int iPgidxPrev = 0;           /* Previous value read from pgidx */
6461     int iTermOff = 0;
6462     int iRowidOff = 0;
6463     int iOff;
6464     int nDoclist;
6465 
6466     memset(&term, 0, sizeof(Fts5Buffer));
6467 
6468     if( n<4 ){
6469       sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
6470       goto decode_out;
6471     }else{
6472       iRowidOff = fts5GetU16(&a[0]);
6473       iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6474       if( iPgidxOff<n ){
6475         fts5GetVarint32(&a[iPgidxOff], iTermOff);
6476       }else if( iPgidxOff>n ){
6477         rc = FTS5_CORRUPT;
6478         goto decode_out;
6479       }
6480     }
6481 
6482     /* Decode the position list tail at the start of the page */
6483     if( iRowidOff!=0 ){
6484       iOff = iRowidOff;
6485     }else if( iTermOff!=0 ){
6486       iOff = iTermOff;
6487     }else{
6488       iOff = szLeaf;
6489     }
6490     if( iOff>n ){
6491       rc = FTS5_CORRUPT;
6492       goto decode_out;
6493     }
6494     fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
6495 
6496     /* Decode any more doclist data that appears on the page before the
6497     ** first term. */
6498     nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
6499     if( nDoclist+iOff>n ){
6500       rc = FTS5_CORRUPT;
6501       goto decode_out;
6502     }
6503     fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
6504 
6505     while( iPgidxOff<n && rc==SQLITE_OK ){
6506       int bFirst = (iPgidxOff==szLeaf);     /* True for first term on page */
6507       int nByte;                            /* Bytes of data */
6508       int iEnd;
6509 
6510       iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
6511       iPgidxPrev += nByte;
6512       iOff = iPgidxPrev;
6513 
6514       if( iPgidxOff<n ){
6515         fts5GetVarint32(&a[iPgidxOff], nByte);
6516         iEnd = iPgidxPrev + nByte;
6517       }else{
6518         iEnd = szLeaf;
6519       }
6520       if( iEnd>szLeaf ){
6521         rc = FTS5_CORRUPT;
6522         break;
6523       }
6524 
6525       if( bFirst==0 ){
6526         iOff += fts5GetVarint32(&a[iOff], nByte);
6527         if( nByte>term.n ){
6528           rc = FTS5_CORRUPT;
6529           break;
6530         }
6531         term.n = nByte;
6532       }
6533       iOff += fts5GetVarint32(&a[iOff], nByte);
6534       if( iOff+nByte>n ){
6535         rc = FTS5_CORRUPT;
6536         break;
6537       }
6538       fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
6539       iOff += nByte;
6540 
6541       sqlite3Fts5BufferAppendPrintf(
6542           &rc, &s, " term=%.*s", term.n, (const char*)term.p
6543       );
6544       iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
6545     }
6546 
6547     fts5BufferFree(&term);
6548   }
6549 
6550  decode_out:
6551   sqlite3_free(a);
6552   if( rc==SQLITE_OK ){
6553     sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
6554   }else{
6555     sqlite3_result_error_code(pCtx, rc);
6556   }
6557   fts5BufferFree(&s);
6558 }
6559 
6560 /*
6561 ** The implementation of user-defined scalar function fts5_rowid().
6562 */
fts5RowidFunction(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)6563 static void fts5RowidFunction(
6564   sqlite3_context *pCtx,          /* Function call context */
6565   int nArg,                       /* Number of args (always 2) */
6566   sqlite3_value **apVal           /* Function arguments */
6567 ){
6568   const char *zArg;
6569   if( nArg==0 ){
6570     sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
6571   }else{
6572     zArg = (const char*)sqlite3_value_text(apVal[0]);
6573     if( 0==sqlite3_stricmp(zArg, "segment") ){
6574       i64 iRowid;
6575       int segid, pgno;
6576       if( nArg!=3 ){
6577         sqlite3_result_error(pCtx,
6578             "should be: fts5_rowid('segment', segid, pgno))", -1
6579         );
6580       }else{
6581         segid = sqlite3_value_int(apVal[1]);
6582         pgno = sqlite3_value_int(apVal[2]);
6583         iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
6584         sqlite3_result_int64(pCtx, iRowid);
6585       }
6586     }else{
6587       sqlite3_result_error(pCtx,
6588         "first arg to fts5_rowid() must be 'segment'" , -1
6589       );
6590     }
6591   }
6592 }
6593 
6594 /*
6595 ** This is called as part of registering the FTS5 module with database
6596 ** connection db. It registers several user-defined scalar functions useful
6597 ** with FTS5.
6598 **
6599 ** If successful, SQLITE_OK is returned. If an error occurs, some other
6600 ** SQLite error code is returned instead.
6601 */
sqlite3Fts5IndexInit(sqlite3 * db)6602 int sqlite3Fts5IndexInit(sqlite3 *db){
6603   int rc = sqlite3_create_function(
6604       db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
6605   );
6606 
6607   if( rc==SQLITE_OK ){
6608     rc = sqlite3_create_function(
6609         db, "fts5_decode_none", 2,
6610         SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
6611     );
6612   }
6613 
6614   if( rc==SQLITE_OK ){
6615     rc = sqlite3_create_function(
6616         db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
6617     );
6618   }
6619   return rc;
6620 }
6621 
6622 
sqlite3Fts5IndexReset(Fts5Index * p)6623 int sqlite3Fts5IndexReset(Fts5Index *p){
6624   assert( p->pStruct==0 || p->iStructVersion!=0 );
6625   if( fts5IndexDataVersion(p)!=p->iStructVersion ){
6626     fts5StructureInvalidate(p);
6627   }
6628   return fts5IndexReturn(p);
6629 }
6630