1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
17 */
18
19
20 #include "fts5Int.h"
21
22 /*
23 ** Overview:
24 **
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
28 **
29 ** * all segment b-tree leaf data is stored in fixed size page records
30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 ** taken to ensure it is possible to iterate in either direction through
32 ** the entries in a doclist, or to seek to a specific entry within a
33 ** doclist, without loading it into memory.
34 **
35 ** * large doclists that span many pages have associated "doclist index"
36 ** records that contain a copy of the first rowid on each page spanned by
37 ** the doclist. This is used to speed up seek operations, and merges of
38 ** large doclists with very small doclists.
39 **
40 ** * extra fields in the "structure record" record the state of ongoing
41 ** incremental merge operations.
42 **
43 */
44
45
46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
48
49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
50
51 #define FTS5_MAIN_PREFIX '0'
52
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
55 #endif
56
57 /*
58 ** Details:
59 **
60 ** The %_data table managed by this module,
61 **
62 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
63 **
64 ** , contains the following 5 types of records. See the comments surrounding
65 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
66 ** assigned to each fo them.
67 **
68 ** 1. Structure Records:
69 **
70 ** The set of segments that make up an index - the index structure - are
71 ** recorded in a single record within the %_data table. The record consists
72 ** of a single 32-bit configuration cookie value followed by a list of
73 ** SQLite varints. If the FTS table features more than one index (because
74 ** there are one or more prefix indexes), it is guaranteed that all share
75 ** the same cookie value.
76 **
77 ** Immediately following the configuration cookie, the record begins with
78 ** three varints:
79 **
80 ** + number of levels,
81 ** + total number of segments on all levels,
82 ** + value of write counter.
83 **
84 ** Then, for each level from 0 to nMax:
85 **
86 ** + number of input segments in ongoing merge.
87 ** + total number of segments in level.
88 ** + for each segment from oldest to newest:
89 ** + segment id (always > 0)
90 ** + first leaf page number (often 1, always greater than 0)
91 ** + final leaf page number
92 **
93 ** 2. The Averages Record:
94 **
95 ** A single record within the %_data table. The data is a list of varints.
96 ** The first value is the number of rows in the index. Then, for each column
97 ** from left to right, the total number of tokens in the column for all
98 ** rows of the table.
99 **
100 ** 3. Segment leaves:
101 **
102 ** TERM/DOCLIST FORMAT:
103 **
104 ** Most of each segment leaf is taken up by term/doclist data. The
105 ** general format of term/doclist, starting with the first term
106 ** on the leaf page, is:
107 **
108 ** varint : size of first term
109 ** blob: first term data
110 ** doclist: first doclist
111 ** zero-or-more {
112 ** varint: number of bytes in common with previous term
113 ** varint: number of bytes of new term data (nNew)
114 ** blob: nNew bytes of new term data
115 ** doclist: next doclist
116 ** }
117 **
118 ** doclist format:
119 **
120 ** varint: first rowid
121 ** poslist: first poslist
122 ** zero-or-more {
123 ** varint: rowid delta (always > 0)
124 ** poslist: next poslist
125 ** }
126 **
127 ** poslist format:
128 **
129 ** varint: size of poslist in bytes multiplied by 2, not including
130 ** this field. Plus 1 if this entry carries the "delete" flag.
131 ** collist: collist for column 0
132 ** zero-or-more {
133 ** 0x01 byte
134 ** varint: column number (I)
135 ** collist: collist for column I
136 ** }
137 **
138 ** collist format:
139 **
140 ** varint: first offset + 2
141 ** zero-or-more {
142 ** varint: offset delta + 2
143 ** }
144 **
145 ** PAGE FORMAT
146 **
147 ** Each leaf page begins with a 4-byte header containing 2 16-bit
148 ** unsigned integer fields in big-endian format. They are:
149 **
150 ** * The byte offset of the first rowid on the page, if it exists
151 ** and occurs before the first term (otherwise 0).
152 **
153 ** * The byte offset of the start of the page footer. If the page
154 ** footer is 0 bytes in size, then this field is the same as the
155 ** size of the leaf page in bytes.
156 **
157 ** The page footer consists of a single varint for each term located
158 ** on the page. Each varint is the byte offset of the current term
159 ** within the page, delta-compressed against the previous value. In
160 ** other words, the first varint in the footer is the byte offset of
161 ** the first term, the second is the byte offset of the second less that
162 ** of the first, and so on.
163 **
164 ** The term/doclist format described above is accurate if the entire
165 ** term/doclist data fits on a single leaf page. If this is not the case,
166 ** the format is changed in two ways:
167 **
168 ** + if the first rowid on a page occurs before the first term, it
169 ** is stored as a literal value:
170 **
171 ** varint: first rowid
172 **
173 ** + the first term on each page is stored in the same way as the
174 ** very first term of the segment:
175 **
176 ** varint : size of first term
177 ** blob: first term data
178 **
179 ** 5. Segment doclist indexes:
180 **
181 ** Doclist indexes are themselves b-trees, however they usually consist of
182 ** a single leaf record only. The format of each doclist index leaf page
183 ** is:
184 **
185 ** * Flags byte. Bits are:
186 ** 0x01: Clear if leaf is also the root page, otherwise set.
187 **
188 ** * Page number of fts index leaf page. As a varint.
189 **
190 ** * First rowid on page indicated by previous field. As a varint.
191 **
192 ** * A list of varints, one for each subsequent termless page. A
193 ** positive delta if the termless page contains at least one rowid,
194 ** or an 0x00 byte otherwise.
195 **
196 ** Internal doclist index nodes are:
197 **
198 ** * Flags byte. Bits are:
199 ** 0x01: Clear for root page, otherwise set.
200 **
201 ** * Page number of first child page. As a varint.
202 **
203 ** * Copy of first rowid on page indicated by previous field. As a varint.
204 **
205 ** * A list of delta-encoded varints - the first rowid on each subsequent
206 ** child page.
207 **
208 */
209
210 /*
211 ** Rowids for the averages and structure records in the %_data table.
212 */
213 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
214 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
215
216 /*
217 ** Macros determining the rowids used by segment leaves and dlidx leaves
218 ** and nodes. All nodes and leaves are stored in the %_data table with large
219 ** positive rowids.
220 **
221 ** Each segment has a unique non-zero 16-bit id.
222 **
223 ** The rowid for each segment leaf is found by passing the segment id and
224 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
225 ** sequentially starting from 1.
226 */
227 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
228 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
229 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
230 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
231
232 #define fts5_dri(segid, dlidx, height, pgno) ( \
233 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
234 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
235 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
236 ((i64)(pgno)) \
237 )
238
239 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
240 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
241
242 /*
243 ** Maximum segments permitted in a single index
244 */
245 #define FTS5_MAX_SEGMENT 2000
246
247 #ifdef SQLITE_DEBUG
sqlite3Fts5Corrupt()248 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
249 #endif
250
251
252 /*
253 ** Each time a blob is read from the %_data table, it is padded with this
254 ** many zero bytes. This makes it easier to decode the various record formats
255 ** without overreading if the records are corrupt.
256 */
257 #define FTS5_DATA_ZERO_PADDING 8
258 #define FTS5_DATA_PADDING 20
259
260 typedef struct Fts5Data Fts5Data;
261 typedef struct Fts5DlidxIter Fts5DlidxIter;
262 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
263 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
264 typedef struct Fts5Iter Fts5Iter;
265 typedef struct Fts5PageWriter Fts5PageWriter;
266 typedef struct Fts5SegIter Fts5SegIter;
267 typedef struct Fts5DoclistIter Fts5DoclistIter;
268 typedef struct Fts5SegWriter Fts5SegWriter;
269 typedef struct Fts5Structure Fts5Structure;
270 typedef struct Fts5StructureLevel Fts5StructureLevel;
271 typedef struct Fts5StructureSegment Fts5StructureSegment;
272
273 struct Fts5Data {
274 u8 *p; /* Pointer to buffer containing record */
275 int nn; /* Size of record in bytes */
276 int szLeaf; /* Size of leaf without page-index */
277 };
278
279 /*
280 ** One object per %_data table.
281 */
282 struct Fts5Index {
283 Fts5Config *pConfig; /* Virtual table configuration */
284 char *zDataTbl; /* Name of %_data table */
285 int nWorkUnit; /* Leaf pages in a "unit" of work */
286
287 /*
288 ** Variables related to the accumulation of tokens and doclists within the
289 ** in-memory hash tables before they are flushed to disk.
290 */
291 Fts5Hash *pHash; /* Hash table for in-memory data */
292 int nPendingData; /* Current bytes of pending data */
293 i64 iWriteRowid; /* Rowid for current doc being written */
294 int bDelete; /* Current write is a delete */
295
296 /* Error state. */
297 int rc; /* Current error code */
298
299 /* State used by the fts5DataXXX() functions. */
300 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
301 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
302 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
303 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
304 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */
305 sqlite3_stmt *pIdxSelect;
306 int nRead; /* Total number of blocks read */
307
308 sqlite3_stmt *pDataVersion;
309 i64 iStructVersion; /* data_version when pStruct read */
310 Fts5Structure *pStruct; /* Current db structure (or NULL) */
311 };
312
313 struct Fts5DoclistIter {
314 u8 *aEof; /* Pointer to 1 byte past end of doclist */
315
316 /* Output variables. aPoslist==0 at EOF */
317 i64 iRowid;
318 u8 *aPoslist;
319 int nPoslist;
320 int nSize;
321 };
322
323 /*
324 ** The contents of the "structure" record for each index are represented
325 ** using an Fts5Structure record in memory. Which uses instances of the
326 ** other Fts5StructureXXX types as components.
327 */
328 struct Fts5StructureSegment {
329 int iSegid; /* Segment id */
330 int pgnoFirst; /* First leaf page number in segment */
331 int pgnoLast; /* Last leaf page number in segment */
332 };
333 struct Fts5StructureLevel {
334 int nMerge; /* Number of segments in incr-merge */
335 int nSeg; /* Total number of segments on level */
336 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
337 };
338 struct Fts5Structure {
339 int nRef; /* Object reference count */
340 u64 nWriteCounter; /* Total leaves written to level 0 */
341 int nSegment; /* Total segments in this structure */
342 int nLevel; /* Number of levels in this index */
343 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
344 };
345
346 /*
347 ** An object of type Fts5SegWriter is used to write to segments.
348 */
349 struct Fts5PageWriter {
350 int pgno; /* Page number for this page */
351 int iPrevPgidx; /* Previous value written into pgidx */
352 Fts5Buffer buf; /* Buffer containing leaf data */
353 Fts5Buffer pgidx; /* Buffer containing page-index */
354 Fts5Buffer term; /* Buffer containing previous term on page */
355 };
356 struct Fts5DlidxWriter {
357 int pgno; /* Page number for this page */
358 int bPrevValid; /* True if iPrev is valid */
359 i64 iPrev; /* Previous rowid value written to page */
360 Fts5Buffer buf; /* Buffer containing page data */
361 };
362 struct Fts5SegWriter {
363 int iSegid; /* Segid to write to */
364 Fts5PageWriter writer; /* PageWriter object */
365 i64 iPrevRowid; /* Previous rowid written to current leaf */
366 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
367 u8 bFirstRowidInPage; /* True if next rowid is first in page */
368 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
369 u8 bFirstTermInPage; /* True if next term will be first in leaf */
370 int nLeafWritten; /* Number of leaf pages written */
371 int nEmpty; /* Number of contiguous term-less nodes */
372
373 int nDlidx; /* Allocated size of aDlidx[] array */
374 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
375
376 /* Values to insert into the %_idx table */
377 Fts5Buffer btterm; /* Next term to insert into %_idx table */
378 int iBtPage; /* Page number corresponding to btterm */
379 };
380
381 typedef struct Fts5CResult Fts5CResult;
382 struct Fts5CResult {
383 u16 iFirst; /* aSeg[] index of firstest iterator */
384 u8 bTermEq; /* True if the terms are equal */
385 };
386
387 /*
388 ** Object for iterating through a single segment, visiting each term/rowid
389 ** pair in the segment.
390 **
391 ** pSeg:
392 ** The segment to iterate through.
393 **
394 ** iLeafPgno:
395 ** Current leaf page number within segment.
396 **
397 ** iLeafOffset:
398 ** Byte offset within the current leaf that is the first byte of the
399 ** position list data (one byte passed the position-list size field).
400 ** rowid field of the current entry. Usually this is the size field of the
401 ** position list data. The exception is if the rowid for the current entry
402 ** is the last thing on the leaf page.
403 **
404 ** pLeaf:
405 ** Buffer containing current leaf page data. Set to NULL at EOF.
406 **
407 ** iTermLeafPgno, iTermLeafOffset:
408 ** Leaf page number containing the last term read from the segment. And
409 ** the offset immediately following the term data.
410 **
411 ** flags:
412 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
413 **
414 ** FTS5_SEGITER_ONETERM:
415 ** If set, set the iterator to point to EOF after the current doclist
416 ** has been exhausted. Do not proceed to the next term in the segment.
417 **
418 ** FTS5_SEGITER_REVERSE:
419 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
420 ** it is set, iterate through rowid in descending order instead of the
421 ** default ascending order.
422 **
423 ** iRowidOffset/nRowidOffset/aRowidOffset:
424 ** These are used if the FTS5_SEGITER_REVERSE flag is set.
425 **
426 ** For each rowid on the page corresponding to the current term, the
427 ** corresponding aRowidOffset[] entry is set to the byte offset of the
428 ** start of the "position-list-size" field within the page.
429 **
430 ** iTermIdx:
431 ** Index of current term on iTermLeafPgno.
432 */
433 struct Fts5SegIter {
434 Fts5StructureSegment *pSeg; /* Segment to iterate through */
435 int flags; /* Mask of configuration flags */
436 int iLeafPgno; /* Current leaf page number */
437 Fts5Data *pLeaf; /* Current leaf data */
438 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
439 int iLeafOffset; /* Byte offset within current leaf */
440
441 /* Next method */
442 void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
443
444 /* The page and offset from which the current term was read. The offset
445 ** is the offset of the first rowid in the current doclist. */
446 int iTermLeafPgno;
447 int iTermLeafOffset;
448
449 int iPgidxOff; /* Next offset in pgidx */
450 int iEndofDoclist;
451
452 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
453 int iRowidOffset; /* Current entry in aRowidOffset[] */
454 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
455 int *aRowidOffset; /* Array of offset to rowid fields */
456
457 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
458
459 /* Variables populated based on current entry. */
460 Fts5Buffer term; /* Current term */
461 i64 iRowid; /* Current rowid */
462 int nPos; /* Number of bytes in current position list */
463 u8 bDel; /* True if the delete flag is set */
464 };
465
466 /*
467 ** Argument is a pointer to an Fts5Data structure that contains a
468 ** leaf page.
469 */
470 #define ASSERT_SZLEAF_OK(x) assert( \
471 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
472 )
473
474 #define FTS5_SEGITER_ONETERM 0x01
475 #define FTS5_SEGITER_REVERSE 0x02
476
477 /*
478 ** Argument is a pointer to an Fts5Data structure that contains a leaf
479 ** page. This macro evaluates to true if the leaf contains no terms, or
480 ** false if it contains at least one term.
481 */
482 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
483
484 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
485
486 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
487
488 /*
489 ** Object for iterating through the merged results of one or more segments,
490 ** visiting each term/rowid pair in the merged data.
491 **
492 ** nSeg is always a power of two greater than or equal to the number of
493 ** segments that this object is merging data from. Both the aSeg[] and
494 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
495 ** with zeroed objects - these are handled as if they were iterators opened
496 ** on empty segments.
497 **
498 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
499 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
500 ** comparison in this context is the index of the iterator that currently
501 ** points to the smaller term/rowid combination. Iterators at EOF are
502 ** considered to be greater than all other iterators.
503 **
504 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
505 ** the smallest key overall. aFirst[0] is unused.
506 **
507 ** poslist:
508 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
509 ** There is no way to tell if this is populated or not.
510 */
511 struct Fts5Iter {
512 Fts5IndexIter base; /* Base class containing output vars */
513
514 Fts5Index *pIndex; /* Index that owns this iterator */
515 Fts5Structure *pStruct; /* Database structure for this iterator */
516 Fts5Buffer poslist; /* Buffer containing current poslist */
517 Fts5Colset *pColset; /* Restrict matches to these columns */
518
519 /* Invoked to set output variables. */
520 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
521
522 int nSeg; /* Size of aSeg[] array */
523 int bRev; /* True to iterate in reverse order */
524 u8 bSkipEmpty; /* True to skip deleted entries */
525
526 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
527 Fts5CResult *aFirst; /* Current merge state (see above) */
528 Fts5SegIter aSeg[1]; /* Array of segment iterators */
529 };
530
531
532 /*
533 ** An instance of the following type is used to iterate through the contents
534 ** of a doclist-index record.
535 **
536 ** pData:
537 ** Record containing the doclist-index data.
538 **
539 ** bEof:
540 ** Set to true once iterator has reached EOF.
541 **
542 ** iOff:
543 ** Set to the current offset within record pData.
544 */
545 struct Fts5DlidxLvl {
546 Fts5Data *pData; /* Data for current page of this level */
547 int iOff; /* Current offset into pData */
548 int bEof; /* At EOF already */
549 int iFirstOff; /* Used by reverse iterators */
550
551 /* Output variables */
552 int iLeafPgno; /* Page number of current leaf page */
553 i64 iRowid; /* First rowid on leaf iLeafPgno */
554 };
555 struct Fts5DlidxIter {
556 int nLvl;
557 int iSegid;
558 Fts5DlidxLvl aLvl[1];
559 };
560
fts5PutU16(u8 * aOut,u16 iVal)561 static void fts5PutU16(u8 *aOut, u16 iVal){
562 aOut[0] = (iVal>>8);
563 aOut[1] = (iVal&0xFF);
564 }
565
fts5GetU16(const u8 * aIn)566 static u16 fts5GetU16(const u8 *aIn){
567 return ((u16)aIn[0] << 8) + aIn[1];
568 }
569
570 /*
571 ** Allocate and return a buffer at least nByte bytes in size.
572 **
573 ** If an OOM error is encountered, return NULL and set the error code in
574 ** the Fts5Index handle passed as the first argument.
575 */
fts5IdxMalloc(Fts5Index * p,int nByte)576 static void *fts5IdxMalloc(Fts5Index *p, int nByte){
577 return sqlite3Fts5MallocZero(&p->rc, nByte);
578 }
579
580 /*
581 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
582 **
583 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
584 ** +ve if pRight is smaller than pLeft. In other words:
585 **
586 ** res = *pLeft - *pRight
587 */
588 #ifdef SQLITE_DEBUG
fts5BufferCompareBlob(Fts5Buffer * pLeft,const u8 * pRight,int nRight)589 static int fts5BufferCompareBlob(
590 Fts5Buffer *pLeft, /* Left hand side of comparison */
591 const u8 *pRight, int nRight /* Right hand side of comparison */
592 ){
593 int nCmp = MIN(pLeft->n, nRight);
594 int res = memcmp(pLeft->p, pRight, nCmp);
595 return (res==0 ? (pLeft->n - nRight) : res);
596 }
597 #endif
598
599 /*
600 ** Compare the contents of the two buffers using memcmp(). If one buffer
601 ** is a prefix of the other, it is considered the lesser.
602 **
603 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
604 ** +ve if pRight is smaller than pLeft. In other words:
605 **
606 ** res = *pLeft - *pRight
607 */
fts5BufferCompare(Fts5Buffer * pLeft,Fts5Buffer * pRight)608 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
609 int nCmp = MIN(pLeft->n, pRight->n);
610 int res = memcmp(pLeft->p, pRight->p, nCmp);
611 return (res==0 ? (pLeft->n - pRight->n) : res);
612 }
613
fts5LeafFirstTermOff(Fts5Data * pLeaf)614 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
615 int ret;
616 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
617 return ret;
618 }
619
620 /*
621 ** Close the read-only blob handle, if it is open.
622 */
fts5CloseReader(Fts5Index * p)623 static void fts5CloseReader(Fts5Index *p){
624 if( p->pReader ){
625 sqlite3_blob *pReader = p->pReader;
626 p->pReader = 0;
627 sqlite3_blob_close(pReader);
628 }
629 }
630
631 /*
632 ** Retrieve a record from the %_data table.
633 **
634 ** If an error occurs, NULL is returned and an error left in the
635 ** Fts5Index object.
636 */
fts5DataRead(Fts5Index * p,i64 iRowid)637 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
638 Fts5Data *pRet = 0;
639 if( p->rc==SQLITE_OK ){
640 int rc = SQLITE_OK;
641
642 if( p->pReader ){
643 /* This call may return SQLITE_ABORT if there has been a savepoint
644 ** rollback since it was last used. In this case a new blob handle
645 ** is required. */
646 sqlite3_blob *pBlob = p->pReader;
647 p->pReader = 0;
648 rc = sqlite3_blob_reopen(pBlob, iRowid);
649 assert( p->pReader==0 );
650 p->pReader = pBlob;
651 if( rc!=SQLITE_OK ){
652 fts5CloseReader(p);
653 }
654 if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
655 }
656
657 /* If the blob handle is not open at this point, open it and seek
658 ** to the requested entry. */
659 if( p->pReader==0 && rc==SQLITE_OK ){
660 Fts5Config *pConfig = p->pConfig;
661 rc = sqlite3_blob_open(pConfig->db,
662 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
663 );
664 }
665
666 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
667 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
668 ** All the reasons those functions might return SQLITE_ERROR - missing
669 ** table, missing row, non-blob/text in block column - indicate
670 ** backing store corruption. */
671 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
672
673 if( rc==SQLITE_OK ){
674 u8 *aOut = 0; /* Read blob data into this buffer */
675 int nByte = sqlite3_blob_bytes(p->pReader);
676 int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
677 pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
678 if( pRet ){
679 pRet->nn = nByte;
680 aOut = pRet->p = (u8*)&pRet[1];
681 }else{
682 rc = SQLITE_NOMEM;
683 }
684
685 if( rc==SQLITE_OK ){
686 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
687 }
688 if( rc!=SQLITE_OK ){
689 sqlite3_free(pRet);
690 pRet = 0;
691 }else{
692 /* TODO1: Fix this */
693 pRet->szLeaf = fts5GetU16(&pRet->p[2]);
694 }
695 }
696 p->rc = rc;
697 p->nRead++;
698 }
699
700 assert( (pRet==0)==(p->rc!=SQLITE_OK) );
701 return pRet;
702 }
703
704 /*
705 ** Release a reference to data record returned by an earlier call to
706 ** fts5DataRead().
707 */
fts5DataRelease(Fts5Data * pData)708 static void fts5DataRelease(Fts5Data *pData){
709 sqlite3_free(pData);
710 }
711
fts5LeafRead(Fts5Index * p,i64 iRowid)712 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
713 Fts5Data *pRet = fts5DataRead(p, iRowid);
714 if( pRet ){
715 if( pRet->szLeaf>pRet->nn ){
716 p->rc = FTS5_CORRUPT;
717 fts5DataRelease(pRet);
718 pRet = 0;
719 }
720 }
721 return pRet;
722 }
723
fts5IndexPrepareStmt(Fts5Index * p,sqlite3_stmt ** ppStmt,char * zSql)724 static int fts5IndexPrepareStmt(
725 Fts5Index *p,
726 sqlite3_stmt **ppStmt,
727 char *zSql
728 ){
729 if( p->rc==SQLITE_OK ){
730 if( zSql ){
731 p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
732 SQLITE_PREPARE_PERSISTENT, ppStmt, 0);
733 }else{
734 p->rc = SQLITE_NOMEM;
735 }
736 }
737 sqlite3_free(zSql);
738 return p->rc;
739 }
740
741
742 /*
743 ** INSERT OR REPLACE a record into the %_data table.
744 */
fts5DataWrite(Fts5Index * p,i64 iRowid,const u8 * pData,int nData)745 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
746 if( p->rc!=SQLITE_OK ) return;
747
748 if( p->pWriter==0 ){
749 Fts5Config *pConfig = p->pConfig;
750 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
751 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
752 pConfig->zDb, pConfig->zName
753 ));
754 if( p->rc ) return;
755 }
756
757 sqlite3_bind_int64(p->pWriter, 1, iRowid);
758 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
759 sqlite3_step(p->pWriter);
760 p->rc = sqlite3_reset(p->pWriter);
761 }
762
763 /*
764 ** Execute the following SQL:
765 **
766 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
767 */
fts5DataDelete(Fts5Index * p,i64 iFirst,i64 iLast)768 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
769 if( p->rc!=SQLITE_OK ) return;
770
771 if( p->pDeleter==0 ){
772 int rc;
773 Fts5Config *pConfig = p->pConfig;
774 char *zSql = sqlite3_mprintf(
775 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
776 pConfig->zDb, pConfig->zName
777 );
778 if( zSql==0 ){
779 rc = SQLITE_NOMEM;
780 }else{
781 rc = sqlite3_prepare_v3(pConfig->db, zSql, -1,
782 SQLITE_PREPARE_PERSISTENT, &p->pDeleter, 0);
783 sqlite3_free(zSql);
784 }
785 if( rc!=SQLITE_OK ){
786 p->rc = rc;
787 return;
788 }
789 }
790
791 sqlite3_bind_int64(p->pDeleter, 1, iFirst);
792 sqlite3_bind_int64(p->pDeleter, 2, iLast);
793 sqlite3_step(p->pDeleter);
794 p->rc = sqlite3_reset(p->pDeleter);
795 }
796
797 /*
798 ** Remove all records associated with segment iSegid.
799 */
fts5DataRemoveSegment(Fts5Index * p,int iSegid)800 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
801 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
802 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
803 fts5DataDelete(p, iFirst, iLast);
804 if( p->pIdxDeleter==0 ){
805 Fts5Config *pConfig = p->pConfig;
806 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
807 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
808 pConfig->zDb, pConfig->zName
809 ));
810 }
811 if( p->rc==SQLITE_OK ){
812 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
813 sqlite3_step(p->pIdxDeleter);
814 p->rc = sqlite3_reset(p->pIdxDeleter);
815 }
816 }
817
818 /*
819 ** Release a reference to an Fts5Structure object returned by an earlier
820 ** call to fts5StructureRead() or fts5StructureDecode().
821 */
fts5StructureRelease(Fts5Structure * pStruct)822 static void fts5StructureRelease(Fts5Structure *pStruct){
823 if( pStruct && 0>=(--pStruct->nRef) ){
824 int i;
825 assert( pStruct->nRef==0 );
826 for(i=0; i<pStruct->nLevel; i++){
827 sqlite3_free(pStruct->aLevel[i].aSeg);
828 }
829 sqlite3_free(pStruct);
830 }
831 }
832
fts5StructureRef(Fts5Structure * pStruct)833 static void fts5StructureRef(Fts5Structure *pStruct){
834 pStruct->nRef++;
835 }
836
837 /*
838 ** Deserialize and return the structure record currently stored in serialized
839 ** form within buffer pData/nData.
840 **
841 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
842 ** are over-allocated by one slot. This allows the structure contents
843 ** to be more easily edited.
844 **
845 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
846 ** returned. Otherwise, *ppOut is set to point to the new object and
847 ** SQLITE_OK returned.
848 */
fts5StructureDecode(const u8 * pData,int nData,int * piCookie,Fts5Structure ** ppOut)849 static int fts5StructureDecode(
850 const u8 *pData, /* Buffer containing serialized structure */
851 int nData, /* Size of buffer pData in bytes */
852 int *piCookie, /* Configuration cookie value */
853 Fts5Structure **ppOut /* OUT: Deserialized object */
854 ){
855 int rc = SQLITE_OK;
856 int i = 0;
857 int iLvl;
858 int nLevel = 0;
859 int nSegment = 0;
860 int nByte; /* Bytes of space to allocate at pRet */
861 Fts5Structure *pRet = 0; /* Structure object to return */
862
863 /* Grab the cookie value */
864 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
865 i = 4;
866
867 /* Read the total number of levels and segments from the start of the
868 ** structure record. */
869 i += fts5GetVarint32(&pData[i], nLevel);
870 i += fts5GetVarint32(&pData[i], nSegment);
871 nByte = (
872 sizeof(Fts5Structure) + /* Main structure */
873 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
874 );
875 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
876
877 if( pRet ){
878 pRet->nRef = 1;
879 pRet->nLevel = nLevel;
880 pRet->nSegment = nSegment;
881 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
882
883 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
884 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
885 int nTotal = 0;
886 int iSeg;
887
888 if( i>=nData ){
889 rc = FTS5_CORRUPT;
890 }else{
891 i += fts5GetVarint32(&pData[i], pLvl->nMerge);
892 i += fts5GetVarint32(&pData[i], nTotal);
893 assert( nTotal>=pLvl->nMerge );
894 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
895 nTotal * sizeof(Fts5StructureSegment)
896 );
897 }
898
899 if( rc==SQLITE_OK ){
900 pLvl->nSeg = nTotal;
901 for(iSeg=0; iSeg<nTotal; iSeg++){
902 if( i>=nData ){
903 rc = FTS5_CORRUPT;
904 break;
905 }
906 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
907 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
908 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
909 }
910 }
911 }
912 if( rc!=SQLITE_OK ){
913 fts5StructureRelease(pRet);
914 pRet = 0;
915 }
916 }
917
918 *ppOut = pRet;
919 return rc;
920 }
921
922 /*
923 **
924 */
fts5StructureAddLevel(int * pRc,Fts5Structure ** ppStruct)925 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
926 if( *pRc==SQLITE_OK ){
927 Fts5Structure *pStruct = *ppStruct;
928 int nLevel = pStruct->nLevel;
929 int nByte = (
930 sizeof(Fts5Structure) + /* Main structure */
931 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
932 );
933
934 pStruct = sqlite3_realloc(pStruct, nByte);
935 if( pStruct ){
936 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
937 pStruct->nLevel++;
938 *ppStruct = pStruct;
939 }else{
940 *pRc = SQLITE_NOMEM;
941 }
942 }
943 }
944
945 /*
946 ** Extend level iLvl so that there is room for at least nExtra more
947 ** segments.
948 */
fts5StructureExtendLevel(int * pRc,Fts5Structure * pStruct,int iLvl,int nExtra,int bInsert)949 static void fts5StructureExtendLevel(
950 int *pRc,
951 Fts5Structure *pStruct,
952 int iLvl,
953 int nExtra,
954 int bInsert
955 ){
956 if( *pRc==SQLITE_OK ){
957 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
958 Fts5StructureSegment *aNew;
959 int nByte;
960
961 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
962 aNew = sqlite3_realloc(pLvl->aSeg, nByte);
963 if( aNew ){
964 if( bInsert==0 ){
965 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
966 }else{
967 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
968 memmove(&aNew[nExtra], aNew, nMove);
969 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
970 }
971 pLvl->aSeg = aNew;
972 }else{
973 *pRc = SQLITE_NOMEM;
974 }
975 }
976 }
977
fts5StructureReadUncached(Fts5Index * p)978 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
979 Fts5Structure *pRet = 0;
980 Fts5Config *pConfig = p->pConfig;
981 int iCookie; /* Configuration cookie */
982 Fts5Data *pData;
983
984 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
985 if( p->rc==SQLITE_OK ){
986 /* TODO: Do we need this if the leaf-index is appended? Probably... */
987 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
988 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
989 if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
990 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
991 }
992 fts5DataRelease(pData);
993 if( p->rc!=SQLITE_OK ){
994 fts5StructureRelease(pRet);
995 pRet = 0;
996 }
997 }
998
999 return pRet;
1000 }
1001
fts5IndexDataVersion(Fts5Index * p)1002 static i64 fts5IndexDataVersion(Fts5Index *p){
1003 i64 iVersion = 0;
1004
1005 if( p->rc==SQLITE_OK ){
1006 if( p->pDataVersion==0 ){
1007 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
1008 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
1009 );
1010 if( p->rc ) return 0;
1011 }
1012
1013 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
1014 iVersion = sqlite3_column_int64(p->pDataVersion, 0);
1015 }
1016 p->rc = sqlite3_reset(p->pDataVersion);
1017 }
1018
1019 return iVersion;
1020 }
1021
1022 /*
1023 ** Read, deserialize and return the structure record.
1024 **
1025 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1026 ** are over-allocated as described for function fts5StructureDecode()
1027 ** above.
1028 **
1029 ** If an error occurs, NULL is returned and an error code left in the
1030 ** Fts5Index handle. If an error has already occurred when this function
1031 ** is called, it is a no-op.
1032 */
fts5StructureRead(Fts5Index * p)1033 static Fts5Structure *fts5StructureRead(Fts5Index *p){
1034
1035 if( p->pStruct==0 ){
1036 p->iStructVersion = fts5IndexDataVersion(p);
1037 if( p->rc==SQLITE_OK ){
1038 p->pStruct = fts5StructureReadUncached(p);
1039 }
1040 }
1041
1042 #if 0
1043 else{
1044 Fts5Structure *pTest = fts5StructureReadUncached(p);
1045 if( pTest ){
1046 int i, j;
1047 assert_nc( p->pStruct->nSegment==pTest->nSegment );
1048 assert_nc( p->pStruct->nLevel==pTest->nLevel );
1049 for(i=0; i<pTest->nLevel; i++){
1050 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
1051 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
1052 for(j=0; j<pTest->aLevel[i].nSeg; j++){
1053 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
1054 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
1055 assert_nc( p1->iSegid==p2->iSegid );
1056 assert_nc( p1->pgnoFirst==p2->pgnoFirst );
1057 assert_nc( p1->pgnoLast==p2->pgnoLast );
1058 }
1059 }
1060 fts5StructureRelease(pTest);
1061 }
1062 }
1063 #endif
1064
1065 if( p->rc!=SQLITE_OK ) return 0;
1066 assert( p->iStructVersion!=0 );
1067 assert( p->pStruct!=0 );
1068 fts5StructureRef(p->pStruct);
1069 return p->pStruct;
1070 }
1071
fts5StructureInvalidate(Fts5Index * p)1072 static void fts5StructureInvalidate(Fts5Index *p){
1073 if( p->pStruct ){
1074 fts5StructureRelease(p->pStruct);
1075 p->pStruct = 0;
1076 }
1077 }
1078
1079 /*
1080 ** Return the total number of segments in index structure pStruct. This
1081 ** function is only ever used as part of assert() conditions.
1082 */
1083 #ifdef SQLITE_DEBUG
fts5StructureCountSegments(Fts5Structure * pStruct)1084 static int fts5StructureCountSegments(Fts5Structure *pStruct){
1085 int nSegment = 0; /* Total number of segments */
1086 if( pStruct ){
1087 int iLvl; /* Used to iterate through levels */
1088 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1089 nSegment += pStruct->aLevel[iLvl].nSeg;
1090 }
1091 }
1092
1093 return nSegment;
1094 }
1095 #endif
1096
1097 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
1098 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
1099 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
1100 (pBuf)->n += nBlob; \
1101 }
1102
1103 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
1104 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
1105 assert( (pBuf)->nSpace>=(pBuf)->n ); \
1106 }
1107
1108
1109 /*
1110 ** Serialize and store the "structure" record.
1111 **
1112 ** If an error occurs, leave an error code in the Fts5Index object. If an
1113 ** error has already occurred, this function is a no-op.
1114 */
fts5StructureWrite(Fts5Index * p,Fts5Structure * pStruct)1115 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
1116 if( p->rc==SQLITE_OK ){
1117 Fts5Buffer buf; /* Buffer to serialize record into */
1118 int iLvl; /* Used to iterate through levels */
1119 int iCookie; /* Cookie value to store */
1120
1121 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
1122 memset(&buf, 0, sizeof(Fts5Buffer));
1123
1124 /* Append the current configuration cookie */
1125 iCookie = p->pConfig->iCookie;
1126 if( iCookie<0 ) iCookie = 0;
1127
1128 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
1129 sqlite3Fts5Put32(buf.p, iCookie);
1130 buf.n = 4;
1131 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
1132 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
1133 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
1134 }
1135
1136 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1137 int iSeg; /* Used to iterate through segments */
1138 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1139 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
1140 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1141 assert( pLvl->nMerge<=pLvl->nSeg );
1142
1143 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
1144 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
1145 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
1146 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
1147 }
1148 }
1149
1150 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
1151 fts5BufferFree(&buf);
1152 }
1153 }
1154
1155 #if 0
1156 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
1157 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
1158 int rc = SQLITE_OK;
1159 Fts5Buffer buf;
1160 memset(&buf, 0, sizeof(buf));
1161 fts5DebugStructure(&rc, &buf, pStruct);
1162 fprintf(stdout, "%s: %s\n", zCaption, buf.p);
1163 fflush(stdout);
1164 fts5BufferFree(&buf);
1165 }
1166 #else
1167 # define fts5PrintStructure(x,y)
1168 #endif
1169
fts5SegmentSize(Fts5StructureSegment * pSeg)1170 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
1171 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
1172 }
1173
1174 /*
1175 ** Return a copy of index structure pStruct. Except, promote as many
1176 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1177 ** returned.
1178 */
fts5StructurePromoteTo(Fts5Index * p,int iPromote,int szPromote,Fts5Structure * pStruct)1179 static void fts5StructurePromoteTo(
1180 Fts5Index *p,
1181 int iPromote,
1182 int szPromote,
1183 Fts5Structure *pStruct
1184 ){
1185 int il, is;
1186 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
1187
1188 if( pOut->nMerge==0 ){
1189 for(il=iPromote+1; il<pStruct->nLevel; il++){
1190 Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
1191 if( pLvl->nMerge ) return;
1192 for(is=pLvl->nSeg-1; is>=0; is--){
1193 int sz = fts5SegmentSize(&pLvl->aSeg[is]);
1194 if( sz>szPromote ) return;
1195 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
1196 if( p->rc ) return;
1197 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
1198 pOut->nSeg++;
1199 pLvl->nSeg--;
1200 }
1201 }
1202 }
1203 }
1204
1205 /*
1206 ** A new segment has just been written to level iLvl of index structure
1207 ** pStruct. This function determines if any segments should be promoted
1208 ** as a result. Segments are promoted in two scenarios:
1209 **
1210 ** a) If the segment just written is smaller than one or more segments
1211 ** within the previous populated level, it is promoted to the previous
1212 ** populated level.
1213 **
1214 ** b) If the segment just written is larger than the newest segment on
1215 ** the next populated level, then that segment, and any other adjacent
1216 ** segments that are also smaller than the one just written, are
1217 ** promoted.
1218 **
1219 ** If one or more segments are promoted, the structure object is updated
1220 ** to reflect this.
1221 */
fts5StructurePromote(Fts5Index * p,int iLvl,Fts5Structure * pStruct)1222 static void fts5StructurePromote(
1223 Fts5Index *p, /* FTS5 backend object */
1224 int iLvl, /* Index level just updated */
1225 Fts5Structure *pStruct /* Index structure */
1226 ){
1227 if( p->rc==SQLITE_OK ){
1228 int iTst;
1229 int iPromote = -1;
1230 int szPromote = 0; /* Promote anything this size or smaller */
1231 Fts5StructureSegment *pSeg; /* Segment just written */
1232 int szSeg; /* Size of segment just written */
1233 int nSeg = pStruct->aLevel[iLvl].nSeg;
1234
1235 if( nSeg==0 ) return;
1236 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
1237 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
1238
1239 /* Check for condition (a) */
1240 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
1241 if( iTst>=0 ){
1242 int i;
1243 int szMax = 0;
1244 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
1245 assert( pTst->nMerge==0 );
1246 for(i=0; i<pTst->nSeg; i++){
1247 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
1248 if( sz>szMax ) szMax = sz;
1249 }
1250 if( szMax>=szSeg ){
1251 /* Condition (a) is true. Promote the newest segment on level
1252 ** iLvl to level iTst. */
1253 iPromote = iTst;
1254 szPromote = szMax;
1255 }
1256 }
1257
1258 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1259 ** is a no-op if it is not. */
1260 if( iPromote<0 ){
1261 iPromote = iLvl;
1262 szPromote = szSeg;
1263 }
1264 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
1265 }
1266 }
1267
1268
1269 /*
1270 ** Advance the iterator passed as the only argument. If the end of the
1271 ** doclist-index page is reached, return non-zero.
1272 */
fts5DlidxLvlNext(Fts5DlidxLvl * pLvl)1273 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
1274 Fts5Data *pData = pLvl->pData;
1275
1276 if( pLvl->iOff==0 ){
1277 assert( pLvl->bEof==0 );
1278 pLvl->iOff = 1;
1279 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
1280 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
1281 pLvl->iFirstOff = pLvl->iOff;
1282 }else{
1283 int iOff;
1284 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
1285 if( pData->p[iOff] ) break;
1286 }
1287
1288 if( iOff<pData->nn ){
1289 i64 iVal;
1290 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
1291 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
1292 pLvl->iRowid += iVal;
1293 pLvl->iOff = iOff;
1294 }else{
1295 pLvl->bEof = 1;
1296 }
1297 }
1298
1299 return pLvl->bEof;
1300 }
1301
1302 /*
1303 ** Advance the iterator passed as the only argument.
1304 */
fts5DlidxIterNextR(Fts5Index * p,Fts5DlidxIter * pIter,int iLvl)1305 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1306 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1307
1308 assert( iLvl<pIter->nLvl );
1309 if( fts5DlidxLvlNext(pLvl) ){
1310 if( (iLvl+1) < pIter->nLvl ){
1311 fts5DlidxIterNextR(p, pIter, iLvl+1);
1312 if( pLvl[1].bEof==0 ){
1313 fts5DataRelease(pLvl->pData);
1314 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1315 pLvl->pData = fts5DataRead(p,
1316 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1317 );
1318 if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
1319 }
1320 }
1321 }
1322
1323 return pIter->aLvl[0].bEof;
1324 }
fts5DlidxIterNext(Fts5Index * p,Fts5DlidxIter * pIter)1325 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
1326 return fts5DlidxIterNextR(p, pIter, 0);
1327 }
1328
1329 /*
1330 ** The iterator passed as the first argument has the following fields set
1331 ** as follows. This function sets up the rest of the iterator so that it
1332 ** points to the first rowid in the doclist-index.
1333 **
1334 ** pData:
1335 ** pointer to doclist-index record,
1336 **
1337 ** When this function is called pIter->iLeafPgno is the page number the
1338 ** doclist is associated with (the one featuring the term).
1339 */
fts5DlidxIterFirst(Fts5DlidxIter * pIter)1340 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
1341 int i;
1342 for(i=0; i<pIter->nLvl; i++){
1343 fts5DlidxLvlNext(&pIter->aLvl[i]);
1344 }
1345 return pIter->aLvl[0].bEof;
1346 }
1347
1348
fts5DlidxIterEof(Fts5Index * p,Fts5DlidxIter * pIter)1349 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
1350 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
1351 }
1352
fts5DlidxIterLast(Fts5Index * p,Fts5DlidxIter * pIter)1353 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
1354 int i;
1355
1356 /* Advance each level to the last entry on the last page */
1357 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
1358 Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
1359 while( fts5DlidxLvlNext(pLvl)==0 );
1360 pLvl->bEof = 0;
1361
1362 if( i>0 ){
1363 Fts5DlidxLvl *pChild = &pLvl[-1];
1364 fts5DataRelease(pChild->pData);
1365 memset(pChild, 0, sizeof(Fts5DlidxLvl));
1366 pChild->pData = fts5DataRead(p,
1367 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
1368 );
1369 }
1370 }
1371 }
1372
1373 /*
1374 ** Move the iterator passed as the only argument to the previous entry.
1375 */
fts5DlidxLvlPrev(Fts5DlidxLvl * pLvl)1376 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
1377 int iOff = pLvl->iOff;
1378
1379 assert( pLvl->bEof==0 );
1380 if( iOff<=pLvl->iFirstOff ){
1381 pLvl->bEof = 1;
1382 }else{
1383 u8 *a = pLvl->pData->p;
1384 i64 iVal;
1385 int iLimit;
1386 int ii;
1387 int nZero = 0;
1388
1389 /* Currently iOff points to the first byte of a varint. This block
1390 ** decrements iOff until it points to the first byte of the previous
1391 ** varint. Taking care not to read any memory locations that occur
1392 ** before the buffer in memory. */
1393 iLimit = (iOff>9 ? iOff-9 : 0);
1394 for(iOff--; iOff>iLimit; iOff--){
1395 if( (a[iOff-1] & 0x80)==0 ) break;
1396 }
1397
1398 fts5GetVarint(&a[iOff], (u64*)&iVal);
1399 pLvl->iRowid -= iVal;
1400 pLvl->iLeafPgno--;
1401
1402 /* Skip backwards past any 0x00 varints. */
1403 for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
1404 nZero++;
1405 }
1406 if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
1407 /* The byte immediately before the last 0x00 byte has the 0x80 bit
1408 ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
1409 ** bytes before a[ii]. */
1410 int bZero = 0; /* True if last 0x00 counts */
1411 if( (ii-8)>=pLvl->iFirstOff ){
1412 int j;
1413 for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
1414 bZero = (j>8);
1415 }
1416 if( bZero==0 ) nZero--;
1417 }
1418 pLvl->iLeafPgno -= nZero;
1419 pLvl->iOff = iOff - nZero;
1420 }
1421
1422 return pLvl->bEof;
1423 }
1424
fts5DlidxIterPrevR(Fts5Index * p,Fts5DlidxIter * pIter,int iLvl)1425 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1426 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1427
1428 assert( iLvl<pIter->nLvl );
1429 if( fts5DlidxLvlPrev(pLvl) ){
1430 if( (iLvl+1) < pIter->nLvl ){
1431 fts5DlidxIterPrevR(p, pIter, iLvl+1);
1432 if( pLvl[1].bEof==0 ){
1433 fts5DataRelease(pLvl->pData);
1434 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1435 pLvl->pData = fts5DataRead(p,
1436 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1437 );
1438 if( pLvl->pData ){
1439 while( fts5DlidxLvlNext(pLvl)==0 );
1440 pLvl->bEof = 0;
1441 }
1442 }
1443 }
1444 }
1445
1446 return pIter->aLvl[0].bEof;
1447 }
fts5DlidxIterPrev(Fts5Index * p,Fts5DlidxIter * pIter)1448 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
1449 return fts5DlidxIterPrevR(p, pIter, 0);
1450 }
1451
1452 /*
1453 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1454 */
fts5DlidxIterFree(Fts5DlidxIter * pIter)1455 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
1456 if( pIter ){
1457 int i;
1458 for(i=0; i<pIter->nLvl; i++){
1459 fts5DataRelease(pIter->aLvl[i].pData);
1460 }
1461 sqlite3_free(pIter);
1462 }
1463 }
1464
fts5DlidxIterInit(Fts5Index * p,int bRev,int iSegid,int iLeafPg)1465 static Fts5DlidxIter *fts5DlidxIterInit(
1466 Fts5Index *p, /* Fts5 Backend to iterate within */
1467 int bRev, /* True for ORDER BY ASC */
1468 int iSegid, /* Segment id */
1469 int iLeafPg /* Leaf page number to load dlidx for */
1470 ){
1471 Fts5DlidxIter *pIter = 0;
1472 int i;
1473 int bDone = 0;
1474
1475 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
1476 int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
1477 Fts5DlidxIter *pNew;
1478
1479 pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
1480 if( pNew==0 ){
1481 p->rc = SQLITE_NOMEM;
1482 }else{
1483 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
1484 Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
1485 pIter = pNew;
1486 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1487 pLvl->pData = fts5DataRead(p, iRowid);
1488 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
1489 bDone = 1;
1490 }
1491 pIter->nLvl = i+1;
1492 }
1493 }
1494
1495 if( p->rc==SQLITE_OK ){
1496 pIter->iSegid = iSegid;
1497 if( bRev==0 ){
1498 fts5DlidxIterFirst(pIter);
1499 }else{
1500 fts5DlidxIterLast(p, pIter);
1501 }
1502 }
1503
1504 if( p->rc!=SQLITE_OK ){
1505 fts5DlidxIterFree(pIter);
1506 pIter = 0;
1507 }
1508
1509 return pIter;
1510 }
1511
fts5DlidxIterRowid(Fts5DlidxIter * pIter)1512 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
1513 return pIter->aLvl[0].iRowid;
1514 }
fts5DlidxIterPgno(Fts5DlidxIter * pIter)1515 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
1516 return pIter->aLvl[0].iLeafPgno;
1517 }
1518
1519 /*
1520 ** Load the next leaf page into the segment iterator.
1521 */
fts5SegIterNextPage(Fts5Index * p,Fts5SegIter * pIter)1522 static void fts5SegIterNextPage(
1523 Fts5Index *p, /* FTS5 backend object */
1524 Fts5SegIter *pIter /* Iterator to advance to next page */
1525 ){
1526 Fts5Data *pLeaf;
1527 Fts5StructureSegment *pSeg = pIter->pSeg;
1528 fts5DataRelease(pIter->pLeaf);
1529 pIter->iLeafPgno++;
1530 if( pIter->pNextLeaf ){
1531 pIter->pLeaf = pIter->pNextLeaf;
1532 pIter->pNextLeaf = 0;
1533 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1534 pIter->pLeaf = fts5LeafRead(p,
1535 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1536 );
1537 }else{
1538 pIter->pLeaf = 0;
1539 }
1540 pLeaf = pIter->pLeaf;
1541
1542 if( pLeaf ){
1543 pIter->iPgidxOff = pLeaf->szLeaf;
1544 if( fts5LeafIsTermless(pLeaf) ){
1545 pIter->iEndofDoclist = pLeaf->nn+1;
1546 }else{
1547 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
1548 pIter->iEndofDoclist
1549 );
1550 }
1551 }
1552 }
1553
1554 /*
1555 ** Argument p points to a buffer containing a varint to be interpreted as a
1556 ** position list size field. Read the varint and return the number of bytes
1557 ** read. Before returning, set *pnSz to the number of bytes in the position
1558 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1559 */
fts5GetPoslistSize(const u8 * p,int * pnSz,int * pbDel)1560 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
1561 int nSz;
1562 int n = 0;
1563 fts5FastGetVarint32(p, n, nSz);
1564 assert_nc( nSz>=0 );
1565 *pnSz = nSz/2;
1566 *pbDel = nSz & 0x0001;
1567 return n;
1568 }
1569
1570 /*
1571 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1572 ** position-list size field. Read the value of the field and store it
1573 ** in the following variables:
1574 **
1575 ** Fts5SegIter.nPos
1576 ** Fts5SegIter.bDel
1577 **
1578 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1579 ** position list content (if any).
1580 */
fts5SegIterLoadNPos(Fts5Index * p,Fts5SegIter * pIter)1581 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1582 if( p->rc==SQLITE_OK ){
1583 int iOff = pIter->iLeafOffset; /* Offset to read at */
1584 ASSERT_SZLEAF_OK(pIter->pLeaf);
1585 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1586 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
1587 pIter->bDel = 0;
1588 pIter->nPos = 1;
1589 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1590 pIter->bDel = 1;
1591 iOff++;
1592 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1593 pIter->nPos = 1;
1594 iOff++;
1595 }else{
1596 pIter->nPos = 0;
1597 }
1598 }
1599 }else{
1600 int nSz;
1601 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1602 pIter->bDel = (nSz & 0x0001);
1603 pIter->nPos = nSz>>1;
1604 assert_nc( pIter->nPos>=0 );
1605 }
1606 pIter->iLeafOffset = iOff;
1607 }
1608 }
1609
fts5SegIterLoadRowid(Fts5Index * p,Fts5SegIter * pIter)1610 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1611 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1612 int iOff = pIter->iLeafOffset;
1613
1614 ASSERT_SZLEAF_OK(pIter->pLeaf);
1615 if( iOff>=pIter->pLeaf->szLeaf ){
1616 fts5SegIterNextPage(p, pIter);
1617 if( pIter->pLeaf==0 ){
1618 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
1619 return;
1620 }
1621 iOff = 4;
1622 a = pIter->pLeaf->p;
1623 }
1624 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
1625 pIter->iLeafOffset = iOff;
1626 }
1627
1628 /*
1629 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1630 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1631 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1632 ** the first term in the segment).
1633 **
1634 ** This function populates:
1635 **
1636 ** Fts5SegIter.term
1637 ** Fts5SegIter.rowid
1638 **
1639 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1640 ** the first position list. The position list belonging to document
1641 ** (Fts5SegIter.iRowid).
1642 */
fts5SegIterLoadTerm(Fts5Index * p,Fts5SegIter * pIter,int nKeep)1643 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1644 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1645 int iOff = pIter->iLeafOffset; /* Offset to read at */
1646 int nNew; /* Bytes of new data */
1647
1648 iOff += fts5GetVarint32(&a[iOff], nNew);
1649 if( iOff+nNew>pIter->pLeaf->nn ){
1650 p->rc = FTS5_CORRUPT;
1651 return;
1652 }
1653 pIter->term.n = nKeep;
1654 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1655 iOff += nNew;
1656 pIter->iTermLeafOffset = iOff;
1657 pIter->iTermLeafPgno = pIter->iLeafPgno;
1658 pIter->iLeafOffset = iOff;
1659
1660 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1661 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1662 }else{
1663 int nExtra;
1664 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1665 pIter->iEndofDoclist += nExtra;
1666 }
1667
1668 fts5SegIterLoadRowid(p, pIter);
1669 }
1670
1671 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
1672 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
1673 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
1674
fts5SegIterSetNext(Fts5Index * p,Fts5SegIter * pIter)1675 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
1676 if( pIter->flags & FTS5_SEGITER_REVERSE ){
1677 pIter->xNext = fts5SegIterNext_Reverse;
1678 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1679 pIter->xNext = fts5SegIterNext_None;
1680 }else{
1681 pIter->xNext = fts5SegIterNext;
1682 }
1683 }
1684
1685 /*
1686 ** Initialize the iterator object pIter to iterate through the entries in
1687 ** segment pSeg. The iterator is left pointing to the first entry when
1688 ** this function returns.
1689 **
1690 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1691 ** an error has already occurred when this function is called, it is a no-op.
1692 */
fts5SegIterInit(Fts5Index * p,Fts5StructureSegment * pSeg,Fts5SegIter * pIter)1693 static void fts5SegIterInit(
1694 Fts5Index *p, /* FTS index object */
1695 Fts5StructureSegment *pSeg, /* Description of segment */
1696 Fts5SegIter *pIter /* Object to populate */
1697 ){
1698 if( pSeg->pgnoFirst==0 ){
1699 /* This happens if the segment is being used as an input to an incremental
1700 ** merge and all data has already been "trimmed". See function
1701 ** fts5TrimSegments() for details. In this case leave the iterator empty.
1702 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1703 ** at EOF already. */
1704 assert( pIter->pLeaf==0 );
1705 return;
1706 }
1707
1708 if( p->rc==SQLITE_OK ){
1709 memset(pIter, 0, sizeof(*pIter));
1710 fts5SegIterSetNext(p, pIter);
1711 pIter->pSeg = pSeg;
1712 pIter->iLeafPgno = pSeg->pgnoFirst-1;
1713 fts5SegIterNextPage(p, pIter);
1714 }
1715
1716 if( p->rc==SQLITE_OK ){
1717 pIter->iLeafOffset = 4;
1718 assert_nc( pIter->pLeaf->nn>4 );
1719 assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1720 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
1721 fts5SegIterLoadTerm(p, pIter, 0);
1722 fts5SegIterLoadNPos(p, pIter);
1723 }
1724 }
1725
1726 /*
1727 ** This function is only ever called on iterators created by calls to
1728 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1729 **
1730 ** The iterator is in an unusual state when this function is called: the
1731 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
1732 ** the position-list size field for the first relevant rowid on the page.
1733 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1734 **
1735 ** This function advances the iterator so that it points to the last
1736 ** relevant rowid on the page and, if necessary, initializes the
1737 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1738 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1739 ** byte of the position list content associated with said rowid.
1740 */
fts5SegIterReverseInitPage(Fts5Index * p,Fts5SegIter * pIter)1741 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
1742 int eDetail = p->pConfig->eDetail;
1743 int n = pIter->pLeaf->szLeaf;
1744 int i = pIter->iLeafOffset;
1745 u8 *a = pIter->pLeaf->p;
1746 int iRowidOffset = 0;
1747
1748 if( n>pIter->iEndofDoclist ){
1749 n = pIter->iEndofDoclist;
1750 }
1751
1752 ASSERT_SZLEAF_OK(pIter->pLeaf);
1753 while( 1 ){
1754 i64 iDelta = 0;
1755
1756 if( eDetail==FTS5_DETAIL_NONE ){
1757 /* todo */
1758 if( i<n && a[i]==0 ){
1759 i++;
1760 if( i<n && a[i]==0 ) i++;
1761 }
1762 }else{
1763 int nPos;
1764 int bDummy;
1765 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
1766 i += nPos;
1767 }
1768 if( i>=n ) break;
1769 i += fts5GetVarint(&a[i], (u64*)&iDelta);
1770 pIter->iRowid += iDelta;
1771
1772 /* If necessary, grow the pIter->aRowidOffset[] array. */
1773 if( iRowidOffset>=pIter->nRowidOffset ){
1774 int nNew = pIter->nRowidOffset + 8;
1775 int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
1776 if( aNew==0 ){
1777 p->rc = SQLITE_NOMEM;
1778 break;
1779 }
1780 pIter->aRowidOffset = aNew;
1781 pIter->nRowidOffset = nNew;
1782 }
1783
1784 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
1785 pIter->iLeafOffset = i;
1786 }
1787 pIter->iRowidOffset = iRowidOffset;
1788 fts5SegIterLoadNPos(p, pIter);
1789 }
1790
1791 /*
1792 **
1793 */
fts5SegIterReverseNewPage(Fts5Index * p,Fts5SegIter * pIter)1794 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
1795 assert( pIter->flags & FTS5_SEGITER_REVERSE );
1796 assert( pIter->flags & FTS5_SEGITER_ONETERM );
1797
1798 fts5DataRelease(pIter->pLeaf);
1799 pIter->pLeaf = 0;
1800 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
1801 Fts5Data *pNew;
1802 pIter->iLeafPgno--;
1803 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
1804 pIter->pSeg->iSegid, pIter->iLeafPgno
1805 ));
1806 if( pNew ){
1807 /* iTermLeafOffset may be equal to szLeaf if the term is the last
1808 ** thing on the page - i.e. the first rowid is on the following page.
1809 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
1810 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
1811 assert( pIter->pLeaf==0 );
1812 if( pIter->iTermLeafOffset<pNew->szLeaf ){
1813 pIter->pLeaf = pNew;
1814 pIter->iLeafOffset = pIter->iTermLeafOffset;
1815 }
1816 }else{
1817 int iRowidOff;
1818 iRowidOff = fts5LeafFirstRowidOff(pNew);
1819 if( iRowidOff ){
1820 pIter->pLeaf = pNew;
1821 pIter->iLeafOffset = iRowidOff;
1822 }
1823 }
1824
1825 if( pIter->pLeaf ){
1826 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
1827 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
1828 break;
1829 }else{
1830 fts5DataRelease(pNew);
1831 }
1832 }
1833 }
1834
1835 if( pIter->pLeaf ){
1836 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1837 fts5SegIterReverseInitPage(p, pIter);
1838 }
1839 }
1840
1841 /*
1842 ** Return true if the iterator passed as the second argument currently
1843 ** points to a delete marker. A delete marker is an entry with a 0 byte
1844 ** position-list.
1845 */
fts5MultiIterIsEmpty(Fts5Index * p,Fts5Iter * pIter)1846 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
1847 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
1848 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
1849 }
1850
1851 /*
1852 ** Advance iterator pIter to the next entry.
1853 **
1854 ** This version of fts5SegIterNext() is only used by reverse iterators.
1855 */
fts5SegIterNext_Reverse(Fts5Index * p,Fts5SegIter * pIter,int * pbUnused)1856 static void fts5SegIterNext_Reverse(
1857 Fts5Index *p, /* FTS5 backend object */
1858 Fts5SegIter *pIter, /* Iterator to advance */
1859 int *pbUnused /* Unused */
1860 ){
1861 assert( pIter->flags & FTS5_SEGITER_REVERSE );
1862 assert( pIter->pNextLeaf==0 );
1863 UNUSED_PARAM(pbUnused);
1864
1865 if( pIter->iRowidOffset>0 ){
1866 u8 *a = pIter->pLeaf->p;
1867 int iOff;
1868 i64 iDelta;
1869
1870 pIter->iRowidOffset--;
1871 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
1872 fts5SegIterLoadNPos(p, pIter);
1873 iOff = pIter->iLeafOffset;
1874 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
1875 iOff += pIter->nPos;
1876 }
1877 fts5GetVarint(&a[iOff], (u64*)&iDelta);
1878 pIter->iRowid -= iDelta;
1879 }else{
1880 fts5SegIterReverseNewPage(p, pIter);
1881 }
1882 }
1883
1884 /*
1885 ** Advance iterator pIter to the next entry.
1886 **
1887 ** This version of fts5SegIterNext() is only used if detail=none and the
1888 ** iterator is not a reverse direction iterator.
1889 */
fts5SegIterNext_None(Fts5Index * p,Fts5SegIter * pIter,int * pbNewTerm)1890 static void fts5SegIterNext_None(
1891 Fts5Index *p, /* FTS5 backend object */
1892 Fts5SegIter *pIter, /* Iterator to advance */
1893 int *pbNewTerm /* OUT: Set for new term */
1894 ){
1895 int iOff;
1896
1897 assert( p->rc==SQLITE_OK );
1898 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
1899 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
1900
1901 ASSERT_SZLEAF_OK(pIter->pLeaf);
1902 iOff = pIter->iLeafOffset;
1903
1904 /* Next entry is on the next page */
1905 if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
1906 fts5SegIterNextPage(p, pIter);
1907 if( p->rc || pIter->pLeaf==0 ) return;
1908 pIter->iRowid = 0;
1909 iOff = 4;
1910 }
1911
1912 if( iOff<pIter->iEndofDoclist ){
1913 /* Next entry is on the current page */
1914 i64 iDelta;
1915 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
1916 pIter->iLeafOffset = iOff;
1917 pIter->iRowid += iDelta;
1918 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
1919 if( pIter->pSeg ){
1920 int nKeep = 0;
1921 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
1922 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
1923 }
1924 pIter->iLeafOffset = iOff;
1925 fts5SegIterLoadTerm(p, pIter, nKeep);
1926 }else{
1927 const u8 *pList = 0;
1928 const char *zTerm = 0;
1929 int nList;
1930 sqlite3Fts5HashScanNext(p->pHash);
1931 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
1932 if( pList==0 ) goto next_none_eof;
1933 pIter->pLeaf->p = (u8*)pList;
1934 pIter->pLeaf->nn = nList;
1935 pIter->pLeaf->szLeaf = nList;
1936 pIter->iEndofDoclist = nList;
1937 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
1938 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
1939 }
1940
1941 if( pbNewTerm ) *pbNewTerm = 1;
1942 }else{
1943 goto next_none_eof;
1944 }
1945
1946 fts5SegIterLoadNPos(p, pIter);
1947
1948 return;
1949 next_none_eof:
1950 fts5DataRelease(pIter->pLeaf);
1951 pIter->pLeaf = 0;
1952 }
1953
1954
1955 /*
1956 ** Advance iterator pIter to the next entry.
1957 **
1958 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
1959 ** is not considered an error if the iterator reaches EOF. If an error has
1960 ** already occurred when this function is called, it is a no-op.
1961 */
fts5SegIterNext(Fts5Index * p,Fts5SegIter * pIter,int * pbNewTerm)1962 static void fts5SegIterNext(
1963 Fts5Index *p, /* FTS5 backend object */
1964 Fts5SegIter *pIter, /* Iterator to advance */
1965 int *pbNewTerm /* OUT: Set for new term */
1966 ){
1967 Fts5Data *pLeaf = pIter->pLeaf;
1968 int iOff;
1969 int bNewTerm = 0;
1970 int nKeep = 0;
1971 u8 *a;
1972 int n;
1973
1974 assert( pbNewTerm==0 || *pbNewTerm==0 );
1975 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
1976
1977 /* Search for the end of the position list within the current page. */
1978 a = pLeaf->p;
1979 n = pLeaf->szLeaf;
1980
1981 ASSERT_SZLEAF_OK(pLeaf);
1982 iOff = pIter->iLeafOffset + pIter->nPos;
1983
1984 if( iOff<n ){
1985 /* The next entry is on the current page. */
1986 assert_nc( iOff<=pIter->iEndofDoclist );
1987 if( iOff>=pIter->iEndofDoclist ){
1988 bNewTerm = 1;
1989 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
1990 iOff += fts5GetVarint32(&a[iOff], nKeep);
1991 }
1992 }else{
1993 u64 iDelta;
1994 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
1995 pIter->iRowid += iDelta;
1996 assert_nc( iDelta>0 );
1997 }
1998 pIter->iLeafOffset = iOff;
1999
2000 }else if( pIter->pSeg==0 ){
2001 const u8 *pList = 0;
2002 const char *zTerm = 0;
2003 int nList = 0;
2004 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
2005 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
2006 sqlite3Fts5HashScanNext(p->pHash);
2007 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2008 }
2009 if( pList==0 ){
2010 fts5DataRelease(pIter->pLeaf);
2011 pIter->pLeaf = 0;
2012 }else{
2013 pIter->pLeaf->p = (u8*)pList;
2014 pIter->pLeaf->nn = nList;
2015 pIter->pLeaf->szLeaf = nList;
2016 pIter->iEndofDoclist = nList+1;
2017 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
2018 (u8*)zTerm);
2019 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2020 *pbNewTerm = 1;
2021 }
2022 }else{
2023 iOff = 0;
2024 /* Next entry is not on the current page */
2025 while( iOff==0 ){
2026 fts5SegIterNextPage(p, pIter);
2027 pLeaf = pIter->pLeaf;
2028 if( pLeaf==0 ) break;
2029 ASSERT_SZLEAF_OK(pLeaf);
2030 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
2031 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
2032 pIter->iLeafOffset = iOff;
2033
2034 if( pLeaf->nn>pLeaf->szLeaf ){
2035 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2036 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
2037 );
2038 }
2039 }
2040 else if( pLeaf->nn>pLeaf->szLeaf ){
2041 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2042 &pLeaf->p[pLeaf->szLeaf], iOff
2043 );
2044 pIter->iLeafOffset = iOff;
2045 pIter->iEndofDoclist = iOff;
2046 bNewTerm = 1;
2047 }
2048 assert_nc( iOff<pLeaf->szLeaf );
2049 if( iOff>pLeaf->szLeaf ){
2050 p->rc = FTS5_CORRUPT;
2051 return;
2052 }
2053 }
2054 }
2055
2056 /* Check if the iterator is now at EOF. If so, return early. */
2057 if( pIter->pLeaf ){
2058 if( bNewTerm ){
2059 if( pIter->flags & FTS5_SEGITER_ONETERM ){
2060 fts5DataRelease(pIter->pLeaf);
2061 pIter->pLeaf = 0;
2062 }else{
2063 fts5SegIterLoadTerm(p, pIter, nKeep);
2064 fts5SegIterLoadNPos(p, pIter);
2065 if( pbNewTerm ) *pbNewTerm = 1;
2066 }
2067 }else{
2068 /* The following could be done by calling fts5SegIterLoadNPos(). But
2069 ** this block is particularly performance critical, so equivalent
2070 ** code is inlined.
2071 **
2072 ** Later: Switched back to fts5SegIterLoadNPos() because it supports
2073 ** detail=none mode. Not ideal.
2074 */
2075 int nSz;
2076 assert( p->rc==SQLITE_OK );
2077 assert( pIter->iLeafOffset<=pIter->pLeaf->nn );
2078 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
2079 pIter->bDel = (nSz & 0x0001);
2080 pIter->nPos = nSz>>1;
2081 assert_nc( pIter->nPos>=0 );
2082 }
2083 }
2084 }
2085
2086 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2087
2088 #define fts5IndexSkipVarint(a, iOff) { \
2089 int iEnd = iOff+9; \
2090 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
2091 }
2092
2093 /*
2094 ** Iterator pIter currently points to the first rowid in a doclist. This
2095 ** function sets the iterator up so that iterates in reverse order through
2096 ** the doclist.
2097 */
fts5SegIterReverse(Fts5Index * p,Fts5SegIter * pIter)2098 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
2099 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2100 Fts5Data *pLast = 0;
2101 int pgnoLast = 0;
2102
2103 if( pDlidx ){
2104 int iSegid = pIter->pSeg->iSegid;
2105 pgnoLast = fts5DlidxIterPgno(pDlidx);
2106 pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
2107 }else{
2108 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2109
2110 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2111 ** position-list content for the current rowid. Back it up so that it
2112 ** points to the start of the position-list size field. */
2113 int iPoslist;
2114 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
2115 iPoslist = pIter->iTermLeafOffset;
2116 }else{
2117 iPoslist = 4;
2118 }
2119 fts5IndexSkipVarint(pLeaf->p, iPoslist);
2120 pIter->iLeafOffset = iPoslist;
2121
2122 /* If this condition is true then the largest rowid for the current
2123 ** term may not be stored on the current page. So search forward to
2124 ** see where said rowid really is. */
2125 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
2126 int pgno;
2127 Fts5StructureSegment *pSeg = pIter->pSeg;
2128
2129 /* The last rowid in the doclist may not be on the current page. Search
2130 ** forward to find the page containing the last rowid. */
2131 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
2132 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
2133 Fts5Data *pNew = fts5DataRead(p, iAbs);
2134 if( pNew ){
2135 int iRowid, bTermless;
2136 iRowid = fts5LeafFirstRowidOff(pNew);
2137 bTermless = fts5LeafIsTermless(pNew);
2138 if( iRowid ){
2139 SWAPVAL(Fts5Data*, pNew, pLast);
2140 pgnoLast = pgno;
2141 }
2142 fts5DataRelease(pNew);
2143 if( bTermless==0 ) break;
2144 }
2145 }
2146 }
2147 }
2148
2149 /* If pLast is NULL at this point, then the last rowid for this doclist
2150 ** lies on the page currently indicated by the iterator. In this case
2151 ** pIter->iLeafOffset is already set to point to the position-list size
2152 ** field associated with the first relevant rowid on the page.
2153 **
2154 ** Or, if pLast is non-NULL, then it is the page that contains the last
2155 ** rowid. In this case configure the iterator so that it points to the
2156 ** first rowid on this page.
2157 */
2158 if( pLast ){
2159 int iOff;
2160 fts5DataRelease(pIter->pLeaf);
2161 pIter->pLeaf = pLast;
2162 pIter->iLeafPgno = pgnoLast;
2163 iOff = fts5LeafFirstRowidOff(pLast);
2164 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
2165 pIter->iLeafOffset = iOff;
2166
2167 if( fts5LeafIsTermless(pLast) ){
2168 pIter->iEndofDoclist = pLast->nn+1;
2169 }else{
2170 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
2171 }
2172
2173 }
2174
2175 fts5SegIterReverseInitPage(p, pIter);
2176 }
2177
2178 /*
2179 ** Iterator pIter currently points to the first rowid of a doclist.
2180 ** There is a doclist-index associated with the final term on the current
2181 ** page. If the current term is the last term on the page, load the
2182 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2183 */
fts5SegIterLoadDlidx(Fts5Index * p,Fts5SegIter * pIter)2184 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
2185 int iSeg = pIter->pSeg->iSegid;
2186 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2187 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2188
2189 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2190 assert( pIter->pDlidx==0 );
2191
2192 /* Check if the current doclist ends on this page. If it does, return
2193 ** early without loading the doclist-index (as it belongs to a different
2194 ** term. */
2195 if( pIter->iTermLeafPgno==pIter->iLeafPgno
2196 && pIter->iEndofDoclist<pLeaf->szLeaf
2197 ){
2198 return;
2199 }
2200
2201 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
2202 }
2203
2204 /*
2205 ** The iterator object passed as the second argument currently contains
2206 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2207 ** function searches the leaf page for a term matching (pTerm/nTerm).
2208 **
2209 ** If the specified term is found on the page, then the iterator is left
2210 ** pointing to it. If argument bGe is zero and the term is not found,
2211 ** the iterator is left pointing at EOF.
2212 **
2213 ** If bGe is non-zero and the specified term is not found, then the
2214 ** iterator is left pointing to the smallest term in the segment that
2215 ** is larger than the specified term, even if this term is not on the
2216 ** current page.
2217 */
fts5LeafSeek(Fts5Index * p,int bGe,Fts5SegIter * pIter,const u8 * pTerm,int nTerm)2218 static void fts5LeafSeek(
2219 Fts5Index *p, /* Leave any error code here */
2220 int bGe, /* True for a >= search */
2221 Fts5SegIter *pIter, /* Iterator to seek */
2222 const u8 *pTerm, int nTerm /* Term to search for */
2223 ){
2224 int iOff;
2225 const u8 *a = pIter->pLeaf->p;
2226 int szLeaf = pIter->pLeaf->szLeaf;
2227 int n = pIter->pLeaf->nn;
2228
2229 int nMatch = 0;
2230 int nKeep = 0;
2231 int nNew = 0;
2232 int iTermOff;
2233 int iPgidx; /* Current offset in pgidx */
2234 int bEndOfPage = 0;
2235
2236 assert( p->rc==SQLITE_OK );
2237
2238 iPgidx = szLeaf;
2239 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2240 iOff = iTermOff;
2241 if( iOff>n ){
2242 p->rc = FTS5_CORRUPT;
2243 return;
2244 }
2245
2246 while( 1 ){
2247
2248 /* Figure out how many new bytes are in this term */
2249 fts5FastGetVarint32(a, iOff, nNew);
2250 if( nKeep<nMatch ){
2251 goto search_failed;
2252 }
2253
2254 assert( nKeep>=nMatch );
2255 if( nKeep==nMatch ){
2256 int nCmp;
2257 int i;
2258 nCmp = MIN(nNew, nTerm-nMatch);
2259 for(i=0; i<nCmp; i++){
2260 if( a[iOff+i]!=pTerm[nMatch+i] ) break;
2261 }
2262 nMatch += i;
2263
2264 if( nTerm==nMatch ){
2265 if( i==nNew ){
2266 goto search_success;
2267 }else{
2268 goto search_failed;
2269 }
2270 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
2271 goto search_failed;
2272 }
2273 }
2274
2275 if( iPgidx>=n ){
2276 bEndOfPage = 1;
2277 break;
2278 }
2279
2280 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2281 iTermOff += nKeep;
2282 iOff = iTermOff;
2283
2284 if( iOff>=n ){
2285 p->rc = FTS5_CORRUPT;
2286 return;
2287 }
2288
2289 /* Read the nKeep field of the next term. */
2290 fts5FastGetVarint32(a, iOff, nKeep);
2291 }
2292
2293 search_failed:
2294 if( bGe==0 ){
2295 fts5DataRelease(pIter->pLeaf);
2296 pIter->pLeaf = 0;
2297 return;
2298 }else if( bEndOfPage ){
2299 do {
2300 fts5SegIterNextPage(p, pIter);
2301 if( pIter->pLeaf==0 ) return;
2302 a = pIter->pLeaf->p;
2303 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
2304 iPgidx = pIter->pLeaf->szLeaf;
2305 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
2306 if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
2307 p->rc = FTS5_CORRUPT;
2308 }else{
2309 nKeep = 0;
2310 iTermOff = iOff;
2311 n = pIter->pLeaf->nn;
2312 iOff += fts5GetVarint32(&a[iOff], nNew);
2313 break;
2314 }
2315 }
2316 }while( 1 );
2317 }
2318
2319 search_success:
2320
2321 pIter->iLeafOffset = iOff + nNew;
2322 pIter->iTermLeafOffset = pIter->iLeafOffset;
2323 pIter->iTermLeafPgno = pIter->iLeafPgno;
2324
2325 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
2326 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
2327
2328 if( iPgidx>=n ){
2329 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2330 }else{
2331 int nExtra;
2332 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2333 pIter->iEndofDoclist = iTermOff + nExtra;
2334 }
2335 pIter->iPgidxOff = iPgidx;
2336
2337 fts5SegIterLoadRowid(p, pIter);
2338 fts5SegIterLoadNPos(p, pIter);
2339 }
2340
fts5IdxSelectStmt(Fts5Index * p)2341 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
2342 if( p->pIdxSelect==0 ){
2343 Fts5Config *pConfig = p->pConfig;
2344 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2345 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2346 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2347 pConfig->zDb, pConfig->zName
2348 ));
2349 }
2350 return p->pIdxSelect;
2351 }
2352
2353 /*
2354 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2355 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2356 **
2357 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2358 ** an error has already occurred when this function is called, it is a no-op.
2359 */
fts5SegIterSeekInit(Fts5Index * p,const u8 * pTerm,int nTerm,int flags,Fts5StructureSegment * pSeg,Fts5SegIter * pIter)2360 static void fts5SegIterSeekInit(
2361 Fts5Index *p, /* FTS5 backend */
2362 const u8 *pTerm, int nTerm, /* Term to seek to */
2363 int flags, /* Mask of FTS5INDEX_XXX flags */
2364 Fts5StructureSegment *pSeg, /* Description of segment */
2365 Fts5SegIter *pIter /* Object to populate */
2366 ){
2367 int iPg = 1;
2368 int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2369 int bDlidx = 0; /* True if there is a doclist-index */
2370 sqlite3_stmt *pIdxSelect = 0;
2371
2372 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2373 assert( pTerm && nTerm );
2374 memset(pIter, 0, sizeof(*pIter));
2375 pIter->pSeg = pSeg;
2376
2377 /* This block sets stack variable iPg to the leaf page number that may
2378 ** contain term (pTerm/nTerm), if it is present in the segment. */
2379 pIdxSelect = fts5IdxSelectStmt(p);
2380 if( p->rc ) return;
2381 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
2382 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2383 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
2384 i64 val = sqlite3_column_int(pIdxSelect, 0);
2385 iPg = (int)(val>>1);
2386 bDlidx = (val & 0x0001);
2387 }
2388 p->rc = sqlite3_reset(pIdxSelect);
2389
2390 if( iPg<pSeg->pgnoFirst ){
2391 iPg = pSeg->pgnoFirst;
2392 bDlidx = 0;
2393 }
2394
2395 pIter->iLeafPgno = iPg - 1;
2396 fts5SegIterNextPage(p, pIter);
2397
2398 if( pIter->pLeaf ){
2399 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2400 }
2401
2402 if( p->rc==SQLITE_OK && bGe==0 ){
2403 pIter->flags |= FTS5_SEGITER_ONETERM;
2404 if( pIter->pLeaf ){
2405 if( flags & FTS5INDEX_QUERY_DESC ){
2406 pIter->flags |= FTS5_SEGITER_REVERSE;
2407 }
2408 if( bDlidx ){
2409 fts5SegIterLoadDlidx(p, pIter);
2410 }
2411 if( flags & FTS5INDEX_QUERY_DESC ){
2412 fts5SegIterReverse(p, pIter);
2413 }
2414 }
2415 }
2416
2417 fts5SegIterSetNext(p, pIter);
2418
2419 /* Either:
2420 **
2421 ** 1) an error has occurred, or
2422 ** 2) the iterator points to EOF, or
2423 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
2424 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2425 ** to an entry with a term greater than or equal to (pTerm/nTerm).
2426 */
2427 assert( p->rc!=SQLITE_OK /* 1 */
2428 || pIter->pLeaf==0 /* 2 */
2429 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
2430 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
2431 );
2432 }
2433
2434 /*
2435 ** Initialize the object pIter to point to term pTerm/nTerm within the
2436 ** in-memory hash table. If there is no such term in the hash-table, the
2437 ** iterator is set to EOF.
2438 **
2439 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2440 ** an error has already occurred when this function is called, it is a no-op.
2441 */
fts5SegIterHashInit(Fts5Index * p,const u8 * pTerm,int nTerm,int flags,Fts5SegIter * pIter)2442 static void fts5SegIterHashInit(
2443 Fts5Index *p, /* FTS5 backend */
2444 const u8 *pTerm, int nTerm, /* Term to seek to */
2445 int flags, /* Mask of FTS5INDEX_XXX flags */
2446 Fts5SegIter *pIter /* Object to populate */
2447 ){
2448 const u8 *pList = 0;
2449 int nList = 0;
2450 const u8 *z = 0;
2451 int n = 0;
2452
2453 assert( p->pHash );
2454 assert( p->rc==SQLITE_OK );
2455
2456 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
2457 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
2458 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
2459 n = (z ? (int)strlen((const char*)z) : 0);
2460 }else{
2461 pIter->flags |= FTS5_SEGITER_ONETERM;
2462 sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList);
2463 z = pTerm;
2464 n = nTerm;
2465 }
2466
2467 if( pList ){
2468 Fts5Data *pLeaf;
2469 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2470 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2471 if( pLeaf==0 ) return;
2472 pLeaf->p = (u8*)pList;
2473 pLeaf->nn = pLeaf->szLeaf = nList;
2474 pIter->pLeaf = pLeaf;
2475 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2476 pIter->iEndofDoclist = pLeaf->nn;
2477
2478 if( flags & FTS5INDEX_QUERY_DESC ){
2479 pIter->flags |= FTS5_SEGITER_REVERSE;
2480 fts5SegIterReverseInitPage(p, pIter);
2481 }else{
2482 fts5SegIterLoadNPos(p, pIter);
2483 }
2484 }
2485
2486 fts5SegIterSetNext(p, pIter);
2487 }
2488
2489 /*
2490 ** Zero the iterator passed as the only argument.
2491 */
fts5SegIterClear(Fts5SegIter * pIter)2492 static void fts5SegIterClear(Fts5SegIter *pIter){
2493 fts5BufferFree(&pIter->term);
2494 fts5DataRelease(pIter->pLeaf);
2495 fts5DataRelease(pIter->pNextLeaf);
2496 fts5DlidxIterFree(pIter->pDlidx);
2497 sqlite3_free(pIter->aRowidOffset);
2498 memset(pIter, 0, sizeof(Fts5SegIter));
2499 }
2500
2501 #ifdef SQLITE_DEBUG
2502
2503 /*
2504 ** This function is used as part of the big assert() procedure implemented by
2505 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2506 ** in *pRes is the correct result of comparing the current positions of the
2507 ** two iterators.
2508 */
fts5AssertComparisonResult(Fts5Iter * pIter,Fts5SegIter * p1,Fts5SegIter * p2,Fts5CResult * pRes)2509 static void fts5AssertComparisonResult(
2510 Fts5Iter *pIter,
2511 Fts5SegIter *p1,
2512 Fts5SegIter *p2,
2513 Fts5CResult *pRes
2514 ){
2515 int i1 = p1 - pIter->aSeg;
2516 int i2 = p2 - pIter->aSeg;
2517
2518 if( p1->pLeaf || p2->pLeaf ){
2519 if( p1->pLeaf==0 ){
2520 assert( pRes->iFirst==i2 );
2521 }else if( p2->pLeaf==0 ){
2522 assert( pRes->iFirst==i1 );
2523 }else{
2524 int nMin = MIN(p1->term.n, p2->term.n);
2525 int res = memcmp(p1->term.p, p2->term.p, nMin);
2526 if( res==0 ) res = p1->term.n - p2->term.n;
2527
2528 if( res==0 ){
2529 assert( pRes->bTermEq==1 );
2530 assert( p1->iRowid!=p2->iRowid );
2531 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
2532 }else{
2533 assert( pRes->bTermEq==0 );
2534 }
2535
2536 if( res<0 ){
2537 assert( pRes->iFirst==i1 );
2538 }else{
2539 assert( pRes->iFirst==i2 );
2540 }
2541 }
2542 }
2543 }
2544
2545 /*
2546 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2547 ** is compiled. In that case, this function is essentially an assert()
2548 ** statement used to verify that the contents of the pIter->aFirst[] array
2549 ** are correct.
2550 */
fts5AssertMultiIterSetup(Fts5Index * p,Fts5Iter * pIter)2551 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
2552 if( p->rc==SQLITE_OK ){
2553 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2554 int i;
2555
2556 assert( (pFirst->pLeaf==0)==pIter->base.bEof );
2557
2558 /* Check that pIter->iSwitchRowid is set correctly. */
2559 for(i=0; i<pIter->nSeg; i++){
2560 Fts5SegIter *p1 = &pIter->aSeg[i];
2561 assert( p1==pFirst
2562 || p1->pLeaf==0
2563 || fts5BufferCompare(&pFirst->term, &p1->term)
2564 || p1->iRowid==pIter->iSwitchRowid
2565 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2566 );
2567 }
2568
2569 for(i=0; i<pIter->nSeg; i+=2){
2570 Fts5SegIter *p1 = &pIter->aSeg[i];
2571 Fts5SegIter *p2 = &pIter->aSeg[i+1];
2572 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
2573 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2574 }
2575
2576 for(i=1; i<(pIter->nSeg / 2); i+=2){
2577 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
2578 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
2579 Fts5CResult *pRes = &pIter->aFirst[i];
2580 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2581 }
2582 }
2583 }
2584 #else
2585 # define fts5AssertMultiIterSetup(x,y)
2586 #endif
2587
2588 /*
2589 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2590 **
2591 ** If the returned value is non-zero, then it is the index of an entry
2592 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2593 ** to a key that is a duplicate of another, higher priority,
2594 ** segment-iterator in the pSeg->aSeg[] array.
2595 */
fts5MultiIterDoCompare(Fts5Iter * pIter,int iOut)2596 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
2597 int i1; /* Index of left-hand Fts5SegIter */
2598 int i2; /* Index of right-hand Fts5SegIter */
2599 int iRes;
2600 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
2601 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
2602 Fts5CResult *pRes = &pIter->aFirst[iOut];
2603
2604 assert( iOut<pIter->nSeg && iOut>0 );
2605 assert( pIter->bRev==0 || pIter->bRev==1 );
2606
2607 if( iOut>=(pIter->nSeg/2) ){
2608 i1 = (iOut - pIter->nSeg/2) * 2;
2609 i2 = i1 + 1;
2610 }else{
2611 i1 = pIter->aFirst[iOut*2].iFirst;
2612 i2 = pIter->aFirst[iOut*2+1].iFirst;
2613 }
2614 p1 = &pIter->aSeg[i1];
2615 p2 = &pIter->aSeg[i2];
2616
2617 pRes->bTermEq = 0;
2618 if( p1->pLeaf==0 ){ /* If p1 is at EOF */
2619 iRes = i2;
2620 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
2621 iRes = i1;
2622 }else{
2623 int res = fts5BufferCompare(&p1->term, &p2->term);
2624 if( res==0 ){
2625 assert( i2>i1 );
2626 assert( i2!=0 );
2627 pRes->bTermEq = 1;
2628 if( p1->iRowid==p2->iRowid ){
2629 p1->bDel = p2->bDel;
2630 return i2;
2631 }
2632 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
2633 }
2634 assert( res!=0 );
2635 if( res<0 ){
2636 iRes = i1;
2637 }else{
2638 iRes = i2;
2639 }
2640 }
2641
2642 pRes->iFirst = (u16)iRes;
2643 return 0;
2644 }
2645
2646 /*
2647 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
2648 ** It is an error if leaf iLeafPgno does not exist or contains no rowids.
2649 */
fts5SegIterGotoPage(Fts5Index * p,Fts5SegIter * pIter,int iLeafPgno)2650 static void fts5SegIterGotoPage(
2651 Fts5Index *p, /* FTS5 backend object */
2652 Fts5SegIter *pIter, /* Iterator to advance */
2653 int iLeafPgno
2654 ){
2655 assert( iLeafPgno>pIter->iLeafPgno );
2656
2657 if( iLeafPgno>pIter->pSeg->pgnoLast ){
2658 p->rc = FTS5_CORRUPT;
2659 }else{
2660 fts5DataRelease(pIter->pNextLeaf);
2661 pIter->pNextLeaf = 0;
2662 pIter->iLeafPgno = iLeafPgno-1;
2663 fts5SegIterNextPage(p, pIter);
2664 assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
2665
2666 if( p->rc==SQLITE_OK ){
2667 int iOff;
2668 u8 *a = pIter->pLeaf->p;
2669 int n = pIter->pLeaf->szLeaf;
2670
2671 iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
2672 if( iOff<4 || iOff>=n ){
2673 p->rc = FTS5_CORRUPT;
2674 }else{
2675 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
2676 pIter->iLeafOffset = iOff;
2677 fts5SegIterLoadNPos(p, pIter);
2678 }
2679 }
2680 }
2681 }
2682
2683 /*
2684 ** Advance the iterator passed as the second argument until it is at or
2685 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2686 ** always advanced at least once.
2687 */
fts5SegIterNextFrom(Fts5Index * p,Fts5SegIter * pIter,i64 iMatch)2688 static void fts5SegIterNextFrom(
2689 Fts5Index *p, /* FTS5 backend object */
2690 Fts5SegIter *pIter, /* Iterator to advance */
2691 i64 iMatch /* Advance iterator at least this far */
2692 ){
2693 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2694 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2695 int iLeafPgno = pIter->iLeafPgno;
2696 int bMove = 1;
2697
2698 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2699 assert( pIter->pDlidx );
2700 assert( pIter->pLeaf );
2701
2702 if( bRev==0 ){
2703 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
2704 iLeafPgno = fts5DlidxIterPgno(pDlidx);
2705 fts5DlidxIterNext(p, pDlidx);
2706 }
2707 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
2708 if( iLeafPgno>pIter->iLeafPgno ){
2709 fts5SegIterGotoPage(p, pIter, iLeafPgno);
2710 bMove = 0;
2711 }
2712 }else{
2713 assert( pIter->pNextLeaf==0 );
2714 assert( iMatch<pIter->iRowid );
2715 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
2716 fts5DlidxIterPrev(p, pDlidx);
2717 }
2718 iLeafPgno = fts5DlidxIterPgno(pDlidx);
2719
2720 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
2721
2722 if( iLeafPgno<pIter->iLeafPgno ){
2723 pIter->iLeafPgno = iLeafPgno+1;
2724 fts5SegIterReverseNewPage(p, pIter);
2725 bMove = 0;
2726 }
2727 }
2728
2729 do{
2730 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
2731 if( pIter->pLeaf==0 ) break;
2732 if( bRev==0 && pIter->iRowid>=iMatch ) break;
2733 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
2734 bMove = 1;
2735 }while( p->rc==SQLITE_OK );
2736 }
2737
2738
2739 /*
2740 ** Free the iterator object passed as the second argument.
2741 */
fts5MultiIterFree(Fts5Iter * pIter)2742 static void fts5MultiIterFree(Fts5Iter *pIter){
2743 if( pIter ){
2744 int i;
2745 for(i=0; i<pIter->nSeg; i++){
2746 fts5SegIterClear(&pIter->aSeg[i]);
2747 }
2748 fts5StructureRelease(pIter->pStruct);
2749 fts5BufferFree(&pIter->poslist);
2750 sqlite3_free(pIter);
2751 }
2752 }
2753
fts5MultiIterAdvanced(Fts5Index * p,Fts5Iter * pIter,int iChanged,int iMinset)2754 static void fts5MultiIterAdvanced(
2755 Fts5Index *p, /* FTS5 backend to iterate within */
2756 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
2757 int iChanged, /* Index of sub-iterator just advanced */
2758 int iMinset /* Minimum entry in aFirst[] to set */
2759 ){
2760 int i;
2761 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
2762 int iEq;
2763 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
2764 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
2765 assert( p->rc==SQLITE_OK );
2766 pSeg->xNext(p, pSeg, 0);
2767 i = pIter->nSeg + iEq;
2768 }
2769 }
2770 }
2771
2772 /*
2773 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
2774 ** points to the same term though - just a different rowid. This function
2775 ** attempts to update the contents of the pIter->aFirst[] accordingly.
2776 ** If it does so successfully, 0 is returned. Otherwise 1.
2777 **
2778 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
2779 ** on the iterator instead. That function does the same as this one, except
2780 ** that it deals with more complicated cases as well.
2781 */
fts5MultiIterAdvanceRowid(Fts5Iter * pIter,int iChanged,Fts5SegIter ** ppFirst)2782 static int fts5MultiIterAdvanceRowid(
2783 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
2784 int iChanged, /* Index of sub-iterator just advanced */
2785 Fts5SegIter **ppFirst
2786 ){
2787 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
2788
2789 if( pNew->iRowid==pIter->iSwitchRowid
2790 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
2791 ){
2792 int i;
2793 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
2794 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
2795 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
2796 Fts5CResult *pRes = &pIter->aFirst[i];
2797
2798 assert( pNew->pLeaf );
2799 assert( pRes->bTermEq==0 || pOther->pLeaf );
2800
2801 if( pRes->bTermEq ){
2802 if( pNew->iRowid==pOther->iRowid ){
2803 return 1;
2804 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
2805 pIter->iSwitchRowid = pOther->iRowid;
2806 pNew = pOther;
2807 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
2808 pIter->iSwitchRowid = pOther->iRowid;
2809 }
2810 }
2811 pRes->iFirst = (u16)(pNew - pIter->aSeg);
2812 if( i==1 ) break;
2813
2814 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
2815 }
2816 }
2817
2818 *ppFirst = pNew;
2819 return 0;
2820 }
2821
2822 /*
2823 ** Set the pIter->bEof variable based on the state of the sub-iterators.
2824 */
fts5MultiIterSetEof(Fts5Iter * pIter)2825 static void fts5MultiIterSetEof(Fts5Iter *pIter){
2826 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2827 pIter->base.bEof = pSeg->pLeaf==0;
2828 pIter->iSwitchRowid = pSeg->iRowid;
2829 }
2830
2831 /*
2832 ** Move the iterator to the next entry.
2833 **
2834 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
2835 ** considered an error if the iterator reaches EOF, or if it is already at
2836 ** EOF when this function is called.
2837 */
fts5MultiIterNext(Fts5Index * p,Fts5Iter * pIter,int bFrom,i64 iFrom)2838 static void fts5MultiIterNext(
2839 Fts5Index *p,
2840 Fts5Iter *pIter,
2841 int bFrom, /* True if argument iFrom is valid */
2842 i64 iFrom /* Advance at least as far as this */
2843 ){
2844 int bUseFrom = bFrom;
2845 assert( pIter->base.bEof==0 );
2846 while( p->rc==SQLITE_OK ){
2847 int iFirst = pIter->aFirst[1].iFirst;
2848 int bNewTerm = 0;
2849 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2850 assert( p->rc==SQLITE_OK );
2851 if( bUseFrom && pSeg->pDlidx ){
2852 fts5SegIterNextFrom(p, pSeg, iFrom);
2853 }else{
2854 pSeg->xNext(p, pSeg, &bNewTerm);
2855 }
2856
2857 if( pSeg->pLeaf==0 || bNewTerm
2858 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2859 ){
2860 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2861 fts5MultiIterSetEof(pIter);
2862 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
2863 if( pSeg->pLeaf==0 ) return;
2864 }
2865
2866 fts5AssertMultiIterSetup(p, pIter);
2867 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
2868 if( pIter->bSkipEmpty==0 || pSeg->nPos ){
2869 pIter->xSetOutputs(pIter, pSeg);
2870 return;
2871 }
2872 bUseFrom = 0;
2873 }
2874 }
2875
fts5MultiIterNext2(Fts5Index * p,Fts5Iter * pIter,int * pbNewTerm)2876 static void fts5MultiIterNext2(
2877 Fts5Index *p,
2878 Fts5Iter *pIter,
2879 int *pbNewTerm /* OUT: True if *might* be new term */
2880 ){
2881 assert( pIter->bSkipEmpty );
2882 if( p->rc==SQLITE_OK ){
2883 *pbNewTerm = 0;
2884 do{
2885 int iFirst = pIter->aFirst[1].iFirst;
2886 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2887 int bNewTerm = 0;
2888
2889 assert( p->rc==SQLITE_OK );
2890 pSeg->xNext(p, pSeg, &bNewTerm);
2891 if( pSeg->pLeaf==0 || bNewTerm
2892 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2893 ){
2894 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2895 fts5MultiIterSetEof(pIter);
2896 *pbNewTerm = 1;
2897 }
2898 fts5AssertMultiIterSetup(p, pIter);
2899
2900 }while( fts5MultiIterIsEmpty(p, pIter) );
2901 }
2902 }
2903
fts5IterSetOutputs_Noop(Fts5Iter * pUnused1,Fts5SegIter * pUnused2)2904 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
2905 UNUSED_PARAM2(pUnused1, pUnused2);
2906 }
2907
fts5MultiIterAlloc(Fts5Index * p,int nSeg)2908 static Fts5Iter *fts5MultiIterAlloc(
2909 Fts5Index *p, /* FTS5 backend to iterate within */
2910 int nSeg
2911 ){
2912 Fts5Iter *pNew;
2913 int nSlot; /* Power of two >= nSeg */
2914
2915 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
2916 pNew = fts5IdxMalloc(p,
2917 sizeof(Fts5Iter) + /* pNew */
2918 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
2919 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
2920 );
2921 if( pNew ){
2922 pNew->nSeg = nSlot;
2923 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
2924 pNew->pIndex = p;
2925 pNew->xSetOutputs = fts5IterSetOutputs_Noop;
2926 }
2927 return pNew;
2928 }
2929
fts5PoslistCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)2930 static void fts5PoslistCallback(
2931 Fts5Index *pUnused,
2932 void *pContext,
2933 const u8 *pChunk, int nChunk
2934 ){
2935 UNUSED_PARAM(pUnused);
2936 assert_nc( nChunk>=0 );
2937 if( nChunk>0 ){
2938 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
2939 }
2940 }
2941
2942 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
2943 struct PoslistCallbackCtx {
2944 Fts5Buffer *pBuf; /* Append to this buffer */
2945 Fts5Colset *pColset; /* Restrict matches to this column */
2946 int eState; /* See above */
2947 };
2948
2949 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
2950 struct PoslistOffsetsCtx {
2951 Fts5Buffer *pBuf; /* Append to this buffer */
2952 Fts5Colset *pColset; /* Restrict matches to this column */
2953 int iRead;
2954 int iWrite;
2955 };
2956
2957 /*
2958 ** TODO: Make this more efficient!
2959 */
fts5IndexColsetTest(Fts5Colset * pColset,int iCol)2960 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
2961 int i;
2962 for(i=0; i<pColset->nCol; i++){
2963 if( pColset->aiCol[i]==iCol ) return 1;
2964 }
2965 return 0;
2966 }
2967
fts5PoslistOffsetsCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)2968 static void fts5PoslistOffsetsCallback(
2969 Fts5Index *pUnused,
2970 void *pContext,
2971 const u8 *pChunk, int nChunk
2972 ){
2973 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
2974 UNUSED_PARAM(pUnused);
2975 assert_nc( nChunk>=0 );
2976 if( nChunk>0 ){
2977 int i = 0;
2978 while( i<nChunk ){
2979 int iVal;
2980 i += fts5GetVarint32(&pChunk[i], iVal);
2981 iVal += pCtx->iRead - 2;
2982 pCtx->iRead = iVal;
2983 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
2984 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
2985 pCtx->iWrite = iVal;
2986 }
2987 }
2988 }
2989 }
2990
fts5PoslistFilterCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)2991 static void fts5PoslistFilterCallback(
2992 Fts5Index *pUnused,
2993 void *pContext,
2994 const u8 *pChunk, int nChunk
2995 ){
2996 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
2997 UNUSED_PARAM(pUnused);
2998 assert_nc( nChunk>=0 );
2999 if( nChunk>0 ){
3000 /* Search through to find the first varint with value 1. This is the
3001 ** start of the next columns hits. */
3002 int i = 0;
3003 int iStart = 0;
3004
3005 if( pCtx->eState==2 ){
3006 int iCol;
3007 fts5FastGetVarint32(pChunk, i, iCol);
3008 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
3009 pCtx->eState = 1;
3010 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
3011 }else{
3012 pCtx->eState = 0;
3013 }
3014 }
3015
3016 do {
3017 while( i<nChunk && pChunk[i]!=0x01 ){
3018 while( pChunk[i] & 0x80 ) i++;
3019 i++;
3020 }
3021 if( pCtx->eState ){
3022 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3023 }
3024 if( i<nChunk ){
3025 int iCol;
3026 iStart = i;
3027 i++;
3028 if( i>=nChunk ){
3029 pCtx->eState = 2;
3030 }else{
3031 fts5FastGetVarint32(pChunk, i, iCol);
3032 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
3033 if( pCtx->eState ){
3034 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3035 iStart = i;
3036 }
3037 }
3038 }
3039 }while( i<nChunk );
3040 }
3041 }
3042
fts5ChunkIterate(Fts5Index * p,Fts5SegIter * pSeg,void * pCtx,void (* xChunk)(Fts5Index *,void *,const u8 *,int))3043 static void fts5ChunkIterate(
3044 Fts5Index *p, /* Index object */
3045 Fts5SegIter *pSeg, /* Poslist of this iterator */
3046 void *pCtx, /* Context pointer for xChunk callback */
3047 void (*xChunk)(Fts5Index*, void*, const u8*, int)
3048 ){
3049 int nRem = pSeg->nPos; /* Number of bytes still to come */
3050 Fts5Data *pData = 0;
3051 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3052 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
3053 int pgno = pSeg->iLeafPgno;
3054 int pgnoSave = 0;
3055
3056 /* This function does notmwork with detail=none databases. */
3057 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
3058
3059 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
3060 pgnoSave = pgno+1;
3061 }
3062
3063 while( 1 ){
3064 xChunk(p, pCtx, pChunk, nChunk);
3065 nRem -= nChunk;
3066 fts5DataRelease(pData);
3067 if( nRem<=0 ){
3068 break;
3069 }else{
3070 pgno++;
3071 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
3072 if( pData==0 ) break;
3073 pChunk = &pData->p[4];
3074 nChunk = MIN(nRem, pData->szLeaf - 4);
3075 if( pgno==pgnoSave ){
3076 assert( pSeg->pNextLeaf==0 );
3077 pSeg->pNextLeaf = pData;
3078 pData = 0;
3079 }
3080 }
3081 }
3082 }
3083
3084 /*
3085 ** Iterator pIter currently points to a valid entry (not EOF). This
3086 ** function appends the position list data for the current entry to
3087 ** buffer pBuf. It does not make a copy of the position-list size
3088 ** field.
3089 */
fts5SegiterPoslist(Fts5Index * p,Fts5SegIter * pSeg,Fts5Colset * pColset,Fts5Buffer * pBuf)3090 static void fts5SegiterPoslist(
3091 Fts5Index *p,
3092 Fts5SegIter *pSeg,
3093 Fts5Colset *pColset,
3094 Fts5Buffer *pBuf
3095 ){
3096 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
3097 if( pColset==0 ){
3098 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
3099 }else{
3100 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
3101 PoslistCallbackCtx sCtx;
3102 sCtx.pBuf = pBuf;
3103 sCtx.pColset = pColset;
3104 sCtx.eState = fts5IndexColsetTest(pColset, 0);
3105 assert( sCtx.eState==0 || sCtx.eState==1 );
3106 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
3107 }else{
3108 PoslistOffsetsCtx sCtx;
3109 memset(&sCtx, 0, sizeof(sCtx));
3110 sCtx.pBuf = pBuf;
3111 sCtx.pColset = pColset;
3112 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
3113 }
3114 }
3115 }
3116 }
3117
3118 /*
3119 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If
3120 ** the position list contains entries for column iCol, then (*pa) is set
3121 ** to point to the sub-position-list for that column and the number of
3122 ** bytes in it returned. Or, if the argument position list does not
3123 ** contain any entries for column iCol, return 0.
3124 */
fts5IndexExtractCol(const u8 ** pa,int n,int iCol)3125 static int fts5IndexExtractCol(
3126 const u8 **pa, /* IN/OUT: Pointer to poslist */
3127 int n, /* IN: Size of poslist in bytes */
3128 int iCol /* Column to extract from poslist */
3129 ){
3130 int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
3131 const u8 *p = *pa;
3132 const u8 *pEnd = &p[n]; /* One byte past end of position list */
3133
3134 while( iCol>iCurrent ){
3135 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3136 ** not part of a varint. Note that it is not possible for a negative
3137 ** or extremely large varint to occur within an uncorrupted position
3138 ** list. So the last byte of each varint may be assumed to have a clear
3139 ** 0x80 bit. */
3140 while( *p!=0x01 ){
3141 while( *p++ & 0x80 );
3142 if( p>=pEnd ) return 0;
3143 }
3144 *pa = p++;
3145 iCurrent = *p++;
3146 if( iCurrent & 0x80 ){
3147 p--;
3148 p += fts5GetVarint32(p, iCurrent);
3149 }
3150 }
3151 if( iCol!=iCurrent ) return 0;
3152
3153 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3154 ** not part of a varint */
3155 while( p<pEnd && *p!=0x01 ){
3156 while( *p++ & 0x80 );
3157 }
3158
3159 return p - (*pa);
3160 }
3161
fts5IndexExtractColset(int * pRc,Fts5Colset * pColset,const u8 * pPos,int nPos,Fts5Buffer * pBuf)3162 static void fts5IndexExtractColset(
3163 int *pRc,
3164 Fts5Colset *pColset, /* Colset to filter on */
3165 const u8 *pPos, int nPos, /* Position list */
3166 Fts5Buffer *pBuf /* Output buffer */
3167 ){
3168 if( *pRc==SQLITE_OK ){
3169 int i;
3170 fts5BufferZero(pBuf);
3171 for(i=0; i<pColset->nCol; i++){
3172 const u8 *pSub = pPos;
3173 int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
3174 if( nSub ){
3175 fts5BufferAppendBlob(pRc, pBuf, nSub, pSub);
3176 }
3177 }
3178 }
3179 }
3180
3181 /*
3182 ** xSetOutputs callback used by detail=none tables.
3183 */
fts5IterSetOutputs_None(Fts5Iter * pIter,Fts5SegIter * pSeg)3184 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
3185 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
3186 pIter->base.iRowid = pSeg->iRowid;
3187 pIter->base.nData = pSeg->nPos;
3188 }
3189
3190 /*
3191 ** xSetOutputs callback used by detail=full and detail=col tables when no
3192 ** column filters are specified.
3193 */
fts5IterSetOutputs_Nocolset(Fts5Iter * pIter,Fts5SegIter * pSeg)3194 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3195 pIter->base.iRowid = pSeg->iRowid;
3196 pIter->base.nData = pSeg->nPos;
3197
3198 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
3199 assert( pIter->pColset==0 );
3200
3201 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3202 /* All data is stored on the current page. Populate the output
3203 ** variables to point into the body of the page object. */
3204 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3205 }else{
3206 /* The data is distributed over two or more pages. Copy it into the
3207 ** Fts5Iter.poslist buffer and then set the output pointer to point
3208 ** to this buffer. */
3209 fts5BufferZero(&pIter->poslist);
3210 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
3211 pIter->base.pData = pIter->poslist.p;
3212 }
3213 }
3214
3215 /*
3216 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3217 ** against no columns at all).
3218 */
fts5IterSetOutputs_ZeroColset(Fts5Iter * pIter,Fts5SegIter * pSeg)3219 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3220 UNUSED_PARAM(pSeg);
3221 pIter->base.nData = 0;
3222 }
3223
3224 /*
3225 ** xSetOutputs callback used by detail=col when there is a column filter
3226 ** and there are 100 or more columns. Also called as a fallback from
3227 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3228 */
fts5IterSetOutputs_Col(Fts5Iter * pIter,Fts5SegIter * pSeg)3229 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
3230 fts5BufferZero(&pIter->poslist);
3231 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
3232 pIter->base.iRowid = pSeg->iRowid;
3233 pIter->base.pData = pIter->poslist.p;
3234 pIter->base.nData = pIter->poslist.n;
3235 }
3236
3237 /*
3238 ** xSetOutputs callback used when:
3239 **
3240 ** * detail=col,
3241 ** * there is a column filter, and
3242 ** * the table contains 100 or fewer columns.
3243 **
3244 ** The last point is to ensure all column numbers are stored as
3245 ** single-byte varints.
3246 */
fts5IterSetOutputs_Col100(Fts5Iter * pIter,Fts5SegIter * pSeg)3247 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
3248
3249 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3250 assert( pIter->pColset );
3251
3252 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
3253 fts5IterSetOutputs_Col(pIter, pSeg);
3254 }else{
3255 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
3256 u8 *pEnd = (u8*)&a[pSeg->nPos];
3257 int iPrev = 0;
3258 int *aiCol = pIter->pColset->aiCol;
3259 int *aiColEnd = &aiCol[pIter->pColset->nCol];
3260
3261 u8 *aOut = pIter->poslist.p;
3262 int iPrevOut = 0;
3263
3264 pIter->base.iRowid = pSeg->iRowid;
3265
3266 while( a<pEnd ){
3267 iPrev += (int)a++[0] - 2;
3268 while( *aiCol<iPrev ){
3269 aiCol++;
3270 if( aiCol==aiColEnd ) goto setoutputs_col_out;
3271 }
3272 if( *aiCol==iPrev ){
3273 *aOut++ = (u8)((iPrev - iPrevOut) + 2);
3274 iPrevOut = iPrev;
3275 }
3276 }
3277
3278 setoutputs_col_out:
3279 pIter->base.pData = pIter->poslist.p;
3280 pIter->base.nData = aOut - pIter->poslist.p;
3281 }
3282 }
3283
3284 /*
3285 ** xSetOutputs callback used by detail=full when there is a column filter.
3286 */
fts5IterSetOutputs_Full(Fts5Iter * pIter,Fts5SegIter * pSeg)3287 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
3288 Fts5Colset *pColset = pIter->pColset;
3289 pIter->base.iRowid = pSeg->iRowid;
3290
3291 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
3292 assert( pColset );
3293
3294 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3295 /* All data is stored on the current page. Populate the output
3296 ** variables to point into the body of the page object. */
3297 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3298 if( pColset->nCol==1 ){
3299 pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]);
3300 pIter->base.pData = a;
3301 }else{
3302 int *pRc = &pIter->pIndex->rc;
3303 fts5BufferZero(&pIter->poslist);
3304 fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, &pIter->poslist);
3305 pIter->base.pData = pIter->poslist.p;
3306 pIter->base.nData = pIter->poslist.n;
3307 }
3308 }else{
3309 /* The data is distributed over two or more pages. Copy it into the
3310 ** Fts5Iter.poslist buffer and then set the output pointer to point
3311 ** to this buffer. */
3312 fts5BufferZero(&pIter->poslist);
3313 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
3314 pIter->base.pData = pIter->poslist.p;
3315 pIter->base.nData = pIter->poslist.n;
3316 }
3317 }
3318
fts5IterSetOutputCb(int * pRc,Fts5Iter * pIter)3319 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
3320 if( *pRc==SQLITE_OK ){
3321 Fts5Config *pConfig = pIter->pIndex->pConfig;
3322 if( pConfig->eDetail==FTS5_DETAIL_NONE ){
3323 pIter->xSetOutputs = fts5IterSetOutputs_None;
3324 }
3325
3326 else if( pIter->pColset==0 ){
3327 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
3328 }
3329
3330 else if( pIter->pColset->nCol==0 ){
3331 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
3332 }
3333
3334 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
3335 pIter->xSetOutputs = fts5IterSetOutputs_Full;
3336 }
3337
3338 else{
3339 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3340 if( pConfig->nCol<=100 ){
3341 pIter->xSetOutputs = fts5IterSetOutputs_Col100;
3342 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
3343 }else{
3344 pIter->xSetOutputs = fts5IterSetOutputs_Col;
3345 }
3346 }
3347 }
3348 }
3349
3350
3351 /*
3352 ** Allocate a new Fts5Iter object.
3353 **
3354 ** The new object will be used to iterate through data in structure pStruct.
3355 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3356 ** is zero or greater, data from the first nSegment segments on level iLevel
3357 ** is merged.
3358 **
3359 ** The iterator initially points to the first term/rowid entry in the
3360 ** iterated data.
3361 */
fts5MultiIterNew(Fts5Index * p,Fts5Structure * pStruct,int flags,Fts5Colset * pColset,const u8 * pTerm,int nTerm,int iLevel,int nSegment,Fts5Iter ** ppOut)3362 static void fts5MultiIterNew(
3363 Fts5Index *p, /* FTS5 backend to iterate within */
3364 Fts5Structure *pStruct, /* Structure of specific index */
3365 int flags, /* FTS5INDEX_QUERY_XXX flags */
3366 Fts5Colset *pColset, /* Colset to filter on (or NULL) */
3367 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
3368 int iLevel, /* Level to iterate (-1 for all) */
3369 int nSegment, /* Number of segments to merge (iLevel>=0) */
3370 Fts5Iter **ppOut /* New object */
3371 ){
3372 int nSeg = 0; /* Number of segment-iters in use */
3373 int iIter = 0; /* */
3374 int iSeg; /* Used to iterate through segments */
3375 Fts5StructureLevel *pLvl;
3376 Fts5Iter *pNew;
3377
3378 assert( (pTerm==0 && nTerm==0) || iLevel<0 );
3379
3380 /* Allocate space for the new multi-seg-iterator. */
3381 if( p->rc==SQLITE_OK ){
3382 if( iLevel<0 ){
3383 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3384 nSeg = pStruct->nSegment;
3385 nSeg += (p->pHash ? 1 : 0);
3386 }else{
3387 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
3388 }
3389 }
3390 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
3391 if( pNew==0 ) return;
3392 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
3393 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
3394 pNew->pStruct = pStruct;
3395 pNew->pColset = pColset;
3396 fts5StructureRef(pStruct);
3397 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
3398 fts5IterSetOutputCb(&p->rc, pNew);
3399 }
3400
3401 /* Initialize each of the component segment iterators. */
3402 if( p->rc==SQLITE_OK ){
3403 if( iLevel<0 ){
3404 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
3405 if( p->pHash ){
3406 /* Add a segment iterator for the current contents of the hash table. */
3407 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3408 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
3409 }
3410 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
3411 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
3412 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
3413 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3414 if( pTerm==0 ){
3415 fts5SegIterInit(p, pSeg, pIter);
3416 }else{
3417 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
3418 }
3419 }
3420 }
3421 }else{
3422 pLvl = &pStruct->aLevel[iLevel];
3423 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
3424 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
3425 }
3426 }
3427 assert( iIter==nSeg );
3428 }
3429
3430 /* If the above was successful, each component iterators now points
3431 ** to the first entry in its segment. In this case initialize the
3432 ** aFirst[] array. Or, if an error has occurred, free the iterator
3433 ** object and set the output variable to NULL. */
3434 if( p->rc==SQLITE_OK ){
3435 for(iIter=pNew->nSeg-1; iIter>0; iIter--){
3436 int iEq;
3437 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
3438 Fts5SegIter *pSeg = &pNew->aSeg[iEq];
3439 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
3440 fts5MultiIterAdvanced(p, pNew, iEq, iIter);
3441 }
3442 }
3443 fts5MultiIterSetEof(pNew);
3444 fts5AssertMultiIterSetup(p, pNew);
3445
3446 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
3447 fts5MultiIterNext(p, pNew, 0, 0);
3448 }else if( pNew->base.bEof==0 ){
3449 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
3450 pNew->xSetOutputs(pNew, pSeg);
3451 }
3452
3453 }else{
3454 fts5MultiIterFree(pNew);
3455 *ppOut = 0;
3456 }
3457 }
3458
3459 /*
3460 ** Create an Fts5Iter that iterates through the doclist provided
3461 ** as the second argument.
3462 */
fts5MultiIterNew2(Fts5Index * p,Fts5Data * pData,int bDesc,Fts5Iter ** ppOut)3463 static void fts5MultiIterNew2(
3464 Fts5Index *p, /* FTS5 backend to iterate within */
3465 Fts5Data *pData, /* Doclist to iterate through */
3466 int bDesc, /* True for descending rowid order */
3467 Fts5Iter **ppOut /* New object */
3468 ){
3469 Fts5Iter *pNew;
3470 pNew = fts5MultiIterAlloc(p, 2);
3471 if( pNew ){
3472 Fts5SegIter *pIter = &pNew->aSeg[1];
3473
3474 pIter->flags = FTS5_SEGITER_ONETERM;
3475 if( pData->szLeaf>0 ){
3476 pIter->pLeaf = pData;
3477 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
3478 pIter->iEndofDoclist = pData->nn;
3479 pNew->aFirst[1].iFirst = 1;
3480 if( bDesc ){
3481 pNew->bRev = 1;
3482 pIter->flags |= FTS5_SEGITER_REVERSE;
3483 fts5SegIterReverseInitPage(p, pIter);
3484 }else{
3485 fts5SegIterLoadNPos(p, pIter);
3486 }
3487 pData = 0;
3488 }else{
3489 pNew->base.bEof = 1;
3490 }
3491 fts5SegIterSetNext(p, pIter);
3492
3493 *ppOut = pNew;
3494 }
3495
3496 fts5DataRelease(pData);
3497 }
3498
3499 /*
3500 ** Return true if the iterator is at EOF or if an error has occurred.
3501 ** False otherwise.
3502 */
fts5MultiIterEof(Fts5Index * p,Fts5Iter * pIter)3503 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
3504 assert( p->rc
3505 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
3506 );
3507 return (p->rc || pIter->base.bEof);
3508 }
3509
3510 /*
3511 ** Return the rowid of the entry that the iterator currently points
3512 ** to. If the iterator points to EOF when this function is called the
3513 ** results are undefined.
3514 */
fts5MultiIterRowid(Fts5Iter * pIter)3515 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
3516 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
3517 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
3518 }
3519
3520 /*
3521 ** Move the iterator to the next entry at or following iMatch.
3522 */
fts5MultiIterNextFrom(Fts5Index * p,Fts5Iter * pIter,i64 iMatch)3523 static void fts5MultiIterNextFrom(
3524 Fts5Index *p,
3525 Fts5Iter *pIter,
3526 i64 iMatch
3527 ){
3528 while( 1 ){
3529 i64 iRowid;
3530 fts5MultiIterNext(p, pIter, 1, iMatch);
3531 if( fts5MultiIterEof(p, pIter) ) break;
3532 iRowid = fts5MultiIterRowid(pIter);
3533 if( pIter->bRev==0 && iRowid>=iMatch ) break;
3534 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
3535 }
3536 }
3537
3538 /*
3539 ** Return a pointer to a buffer containing the term associated with the
3540 ** entry that the iterator currently points to.
3541 */
fts5MultiIterTerm(Fts5Iter * pIter,int * pn)3542 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
3543 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3544 *pn = p->term.n;
3545 return p->term.p;
3546 }
3547
3548 /*
3549 ** Allocate a new segment-id for the structure pStruct. The new segment
3550 ** id must be between 1 and 65335 inclusive, and must not be used by
3551 ** any currently existing segment. If a free segment id cannot be found,
3552 ** SQLITE_FULL is returned.
3553 **
3554 ** If an error has already occurred, this function is a no-op. 0 is
3555 ** returned in this case.
3556 */
fts5AllocateSegid(Fts5Index * p,Fts5Structure * pStruct)3557 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
3558 int iSegid = 0;
3559
3560 if( p->rc==SQLITE_OK ){
3561 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
3562 p->rc = SQLITE_FULL;
3563 }else{
3564 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
3565 ** array is 63 elements, or 252 bytes, in size. */
3566 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
3567 int iLvl, iSeg;
3568 int i;
3569 u32 mask;
3570 memset(aUsed, 0, sizeof(aUsed));
3571 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3572 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3573 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
3574 if( iId<=FTS5_MAX_SEGMENT ){
3575 aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32);
3576 }
3577 }
3578 }
3579
3580 for(i=0; aUsed[i]==0xFFFFFFFF; i++);
3581 mask = aUsed[i];
3582 for(iSegid=0; mask & (1 << iSegid); iSegid++);
3583 iSegid += 1 + i*32;
3584
3585 #ifdef SQLITE_DEBUG
3586 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3587 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3588 assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
3589 }
3590 }
3591 assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
3592
3593 {
3594 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
3595 if( p->rc==SQLITE_OK ){
3596 u8 aBlob[2] = {0xff, 0xff};
3597 sqlite3_bind_int(pIdxSelect, 1, iSegid);
3598 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
3599 assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
3600 p->rc = sqlite3_reset(pIdxSelect);
3601 }
3602 }
3603 #endif
3604 }
3605 }
3606
3607 return iSegid;
3608 }
3609
3610 /*
3611 ** Discard all data currently cached in the hash-tables.
3612 */
fts5IndexDiscardData(Fts5Index * p)3613 static void fts5IndexDiscardData(Fts5Index *p){
3614 assert( p->pHash || p->nPendingData==0 );
3615 if( p->pHash ){
3616 sqlite3Fts5HashClear(p->pHash);
3617 p->nPendingData = 0;
3618 }
3619 }
3620
3621 /*
3622 ** Return the size of the prefix, in bytes, that buffer
3623 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
3624 **
3625 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
3626 ** than buffer (pOld/nOld).
3627 */
fts5PrefixCompress(int nOld,const u8 * pOld,const u8 * pNew)3628 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
3629 int i;
3630 for(i=0; i<nOld; i++){
3631 if( pOld[i]!=pNew[i] ) break;
3632 }
3633 return i;
3634 }
3635
fts5WriteDlidxClear(Fts5Index * p,Fts5SegWriter * pWriter,int bFlush)3636 static void fts5WriteDlidxClear(
3637 Fts5Index *p,
3638 Fts5SegWriter *pWriter,
3639 int bFlush /* If true, write dlidx to disk */
3640 ){
3641 int i;
3642 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
3643 for(i=0; i<pWriter->nDlidx; i++){
3644 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3645 if( pDlidx->buf.n==0 ) break;
3646 if( bFlush ){
3647 assert( pDlidx->pgno!=0 );
3648 fts5DataWrite(p,
3649 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3650 pDlidx->buf.p, pDlidx->buf.n
3651 );
3652 }
3653 sqlite3Fts5BufferZero(&pDlidx->buf);
3654 pDlidx->bPrevValid = 0;
3655 }
3656 }
3657
3658 /*
3659 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
3660 ** Any new array elements are zeroed before returning.
3661 */
fts5WriteDlidxGrow(Fts5Index * p,Fts5SegWriter * pWriter,int nLvl)3662 static int fts5WriteDlidxGrow(
3663 Fts5Index *p,
3664 Fts5SegWriter *pWriter,
3665 int nLvl
3666 ){
3667 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
3668 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
3669 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
3670 );
3671 if( aDlidx==0 ){
3672 p->rc = SQLITE_NOMEM;
3673 }else{
3674 int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
3675 memset(&aDlidx[pWriter->nDlidx], 0, nByte);
3676 pWriter->aDlidx = aDlidx;
3677 pWriter->nDlidx = nLvl;
3678 }
3679 }
3680 return p->rc;
3681 }
3682
3683 /*
3684 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
3685 ** enough, flush it to disk and return 1. Otherwise discard it and return
3686 ** zero.
3687 */
fts5WriteFlushDlidx(Fts5Index * p,Fts5SegWriter * pWriter)3688 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
3689 int bFlag = 0;
3690
3691 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
3692 ** to the database, also write the doclist-index to disk. */
3693 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
3694 bFlag = 1;
3695 }
3696 fts5WriteDlidxClear(p, pWriter, bFlag);
3697 pWriter->nEmpty = 0;
3698 return bFlag;
3699 }
3700
3701 /*
3702 ** This function is called whenever processing of the doclist for the
3703 ** last term on leaf page (pWriter->iBtPage) is completed.
3704 **
3705 ** The doclist-index for that term is currently stored in-memory within the
3706 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
3707 ** writes it out to disk. Or, if it is too small to bother with, discards
3708 ** it.
3709 **
3710 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
3711 */
fts5WriteFlushBtree(Fts5Index * p,Fts5SegWriter * pWriter)3712 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
3713 int bFlag;
3714
3715 assert( pWriter->iBtPage || pWriter->nEmpty==0 );
3716 if( pWriter->iBtPage==0 ) return;
3717 bFlag = fts5WriteFlushDlidx(p, pWriter);
3718
3719 if( p->rc==SQLITE_OK ){
3720 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
3721 /* The following was already done in fts5WriteInit(): */
3722 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
3723 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
3724 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
3725 sqlite3_step(p->pIdxWriter);
3726 p->rc = sqlite3_reset(p->pIdxWriter);
3727 }
3728 pWriter->iBtPage = 0;
3729 }
3730
3731 /*
3732 ** This is called once for each leaf page except the first that contains
3733 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
3734 ** is larger than all terms written to earlier leaves, and equal to or
3735 ** smaller than the first term on the new leaf.
3736 **
3737 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
3738 ** has already occurred when this function is called, it is a no-op.
3739 */
fts5WriteBtreeTerm(Fts5Index * p,Fts5SegWriter * pWriter,int nTerm,const u8 * pTerm)3740 static void fts5WriteBtreeTerm(
3741 Fts5Index *p, /* FTS5 backend object */
3742 Fts5SegWriter *pWriter, /* Writer object */
3743 int nTerm, const u8 *pTerm /* First term on new page */
3744 ){
3745 fts5WriteFlushBtree(p, pWriter);
3746 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
3747 pWriter->iBtPage = pWriter->writer.pgno;
3748 }
3749
3750 /*
3751 ** This function is called when flushing a leaf page that contains no
3752 ** terms at all to disk.
3753 */
fts5WriteBtreeNoTerm(Fts5Index * p,Fts5SegWriter * pWriter)3754 static void fts5WriteBtreeNoTerm(
3755 Fts5Index *p, /* FTS5 backend object */
3756 Fts5SegWriter *pWriter /* Writer object */
3757 ){
3758 /* If there were no rowids on the leaf page either and the doclist-index
3759 ** has already been started, append an 0x00 byte to it. */
3760 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
3761 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
3762 assert( pDlidx->bPrevValid );
3763 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
3764 }
3765
3766 /* Increment the "number of sequential leaves without a term" counter. */
3767 pWriter->nEmpty++;
3768 }
3769
fts5DlidxExtractFirstRowid(Fts5Buffer * pBuf)3770 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
3771 i64 iRowid;
3772 int iOff;
3773
3774 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
3775 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
3776 return iRowid;
3777 }
3778
3779 /*
3780 ** Rowid iRowid has just been appended to the current leaf page. It is the
3781 ** first on the page. This function appends an appropriate entry to the current
3782 ** doclist-index.
3783 */
fts5WriteDlidxAppend(Fts5Index * p,Fts5SegWriter * pWriter,i64 iRowid)3784 static void fts5WriteDlidxAppend(
3785 Fts5Index *p,
3786 Fts5SegWriter *pWriter,
3787 i64 iRowid
3788 ){
3789 int i;
3790 int bDone = 0;
3791
3792 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
3793 i64 iVal;
3794 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3795
3796 if( pDlidx->buf.n>=p->pConfig->pgsz ){
3797 /* The current doclist-index page is full. Write it to disk and push
3798 ** a copy of iRowid (which will become the first rowid on the next
3799 ** doclist-index leaf page) up into the next level of the b-tree
3800 ** hierarchy. If the node being flushed is currently the root node,
3801 ** also push its first rowid upwards. */
3802 pDlidx->buf.p[0] = 0x01; /* Not the root node */
3803 fts5DataWrite(p,
3804 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3805 pDlidx->buf.p, pDlidx->buf.n
3806 );
3807 fts5WriteDlidxGrow(p, pWriter, i+2);
3808 pDlidx = &pWriter->aDlidx[i];
3809 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
3810 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
3811
3812 /* This was the root node. Push its first rowid up to the new root. */
3813 pDlidx[1].pgno = pDlidx->pgno;
3814 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
3815 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
3816 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
3817 pDlidx[1].bPrevValid = 1;
3818 pDlidx[1].iPrev = iFirst;
3819 }
3820
3821 sqlite3Fts5BufferZero(&pDlidx->buf);
3822 pDlidx->bPrevValid = 0;
3823 pDlidx->pgno++;
3824 }else{
3825 bDone = 1;
3826 }
3827
3828 if( pDlidx->bPrevValid ){
3829 iVal = iRowid - pDlidx->iPrev;
3830 }else{
3831 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
3832 assert( pDlidx->buf.n==0 );
3833 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
3834 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
3835 iVal = iRowid;
3836 }
3837
3838 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
3839 pDlidx->bPrevValid = 1;
3840 pDlidx->iPrev = iRowid;
3841 }
3842 }
3843
fts5WriteFlushLeaf(Fts5Index * p,Fts5SegWriter * pWriter)3844 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
3845 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
3846 Fts5PageWriter *pPage = &pWriter->writer;
3847 i64 iRowid;
3848
3849 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
3850
3851 /* Set the szLeaf header field. */
3852 assert( 0==fts5GetU16(&pPage->buf.p[2]) );
3853 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
3854
3855 if( pWriter->bFirstTermInPage ){
3856 /* No term was written to this page. */
3857 assert( pPage->pgidx.n==0 );
3858 fts5WriteBtreeNoTerm(p, pWriter);
3859 }else{
3860 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
3861 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
3862 }
3863
3864 /* Write the page out to disk */
3865 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
3866 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
3867
3868 /* Initialize the next page. */
3869 fts5BufferZero(&pPage->buf);
3870 fts5BufferZero(&pPage->pgidx);
3871 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
3872 pPage->iPrevPgidx = 0;
3873 pPage->pgno++;
3874
3875 /* Increase the leaves written counter */
3876 pWriter->nLeafWritten++;
3877
3878 /* The new leaf holds no terms or rowids */
3879 pWriter->bFirstTermInPage = 1;
3880 pWriter->bFirstRowidInPage = 1;
3881 }
3882
3883 /*
3884 ** Append term pTerm/nTerm to the segment being written by the writer passed
3885 ** as the second argument.
3886 **
3887 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3888 ** already occurred, this function is a no-op.
3889 */
fts5WriteAppendTerm(Fts5Index * p,Fts5SegWriter * pWriter,int nTerm,const u8 * pTerm)3890 static void fts5WriteAppendTerm(
3891 Fts5Index *p,
3892 Fts5SegWriter *pWriter,
3893 int nTerm, const u8 *pTerm
3894 ){
3895 int nPrefix; /* Bytes of prefix compression for term */
3896 Fts5PageWriter *pPage = &pWriter->writer;
3897 Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
3898
3899 assert( p->rc==SQLITE_OK );
3900 assert( pPage->buf.n>=4 );
3901 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
3902
3903 /* If the current leaf page is full, flush it to disk. */
3904 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
3905 if( pPage->buf.n>4 ){
3906 fts5WriteFlushLeaf(p, pWriter);
3907 }
3908 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
3909 }
3910
3911 /* TODO1: Updating pgidx here. */
3912 pPgidx->n += sqlite3Fts5PutVarint(
3913 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
3914 );
3915 pPage->iPrevPgidx = pPage->buf.n;
3916 #if 0
3917 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
3918 pPgidx->n += 2;
3919 #endif
3920
3921 if( pWriter->bFirstTermInPage ){
3922 nPrefix = 0;
3923 if( pPage->pgno!=1 ){
3924 /* This is the first term on a leaf that is not the leftmost leaf in
3925 ** the segment b-tree. In this case it is necessary to add a term to
3926 ** the b-tree hierarchy that is (a) larger than the largest term
3927 ** already written to the segment and (b) smaller than or equal to
3928 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
3929 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
3930 ** previous term.
3931 **
3932 ** Usually, the previous term is available in pPage->term. The exception
3933 ** is if this is the first term written in an incremental-merge step.
3934 ** In this case the previous term is not available, so just write a
3935 ** copy of (pTerm/nTerm) into the parent node. This is slightly
3936 ** inefficient, but still correct. */
3937 int n = nTerm;
3938 if( pPage->term.n ){
3939 n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
3940 }
3941 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
3942 pPage = &pWriter->writer;
3943 }
3944 }else{
3945 nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
3946 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
3947 }
3948
3949 /* Append the number of bytes of new data, then the term data itself
3950 ** to the page. */
3951 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
3952 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
3953
3954 /* Update the Fts5PageWriter.term field. */
3955 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
3956 pWriter->bFirstTermInPage = 0;
3957
3958 pWriter->bFirstRowidInPage = 0;
3959 pWriter->bFirstRowidInDoclist = 1;
3960
3961 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
3962 pWriter->aDlidx[0].pgno = pPage->pgno;
3963 }
3964
3965 /*
3966 ** Append a rowid and position-list size field to the writers output.
3967 */
fts5WriteAppendRowid(Fts5Index * p,Fts5SegWriter * pWriter,i64 iRowid)3968 static void fts5WriteAppendRowid(
3969 Fts5Index *p,
3970 Fts5SegWriter *pWriter,
3971 i64 iRowid
3972 ){
3973 if( p->rc==SQLITE_OK ){
3974 Fts5PageWriter *pPage = &pWriter->writer;
3975
3976 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
3977 fts5WriteFlushLeaf(p, pWriter);
3978 }
3979
3980 /* If this is to be the first rowid written to the page, set the
3981 ** rowid-pointer in the page-header. Also append a value to the dlidx
3982 ** buffer, in case a doclist-index is required. */
3983 if( pWriter->bFirstRowidInPage ){
3984 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
3985 fts5WriteDlidxAppend(p, pWriter, iRowid);
3986 }
3987
3988 /* Write the rowid. */
3989 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
3990 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
3991 }else{
3992 assert( p->rc || iRowid>pWriter->iPrevRowid );
3993 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
3994 }
3995 pWriter->iPrevRowid = iRowid;
3996 pWriter->bFirstRowidInDoclist = 0;
3997 pWriter->bFirstRowidInPage = 0;
3998 }
3999 }
4000
fts5WriteAppendPoslistData(Fts5Index * p,Fts5SegWriter * pWriter,const u8 * aData,int nData)4001 static void fts5WriteAppendPoslistData(
4002 Fts5Index *p,
4003 Fts5SegWriter *pWriter,
4004 const u8 *aData,
4005 int nData
4006 ){
4007 Fts5PageWriter *pPage = &pWriter->writer;
4008 const u8 *a = aData;
4009 int n = nData;
4010
4011 assert( p->pConfig->pgsz>0 );
4012 while( p->rc==SQLITE_OK
4013 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
4014 ){
4015 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
4016 int nCopy = 0;
4017 while( nCopy<nReq ){
4018 i64 dummy;
4019 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
4020 }
4021 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
4022 a += nCopy;
4023 n -= nCopy;
4024 fts5WriteFlushLeaf(p, pWriter);
4025 }
4026 if( n>0 ){
4027 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
4028 }
4029 }
4030
4031 /*
4032 ** Flush any data cached by the writer object to the database. Free any
4033 ** allocations associated with the writer.
4034 */
fts5WriteFinish(Fts5Index * p,Fts5SegWriter * pWriter,int * pnLeaf)4035 static void fts5WriteFinish(
4036 Fts5Index *p,
4037 Fts5SegWriter *pWriter, /* Writer object */
4038 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
4039 ){
4040 int i;
4041 Fts5PageWriter *pLeaf = &pWriter->writer;
4042 if( p->rc==SQLITE_OK ){
4043 assert( pLeaf->pgno>=1 );
4044 if( pLeaf->buf.n>4 ){
4045 fts5WriteFlushLeaf(p, pWriter);
4046 }
4047 *pnLeaf = pLeaf->pgno-1;
4048 if( pLeaf->pgno>1 ){
4049 fts5WriteFlushBtree(p, pWriter);
4050 }
4051 }
4052 fts5BufferFree(&pLeaf->term);
4053 fts5BufferFree(&pLeaf->buf);
4054 fts5BufferFree(&pLeaf->pgidx);
4055 fts5BufferFree(&pWriter->btterm);
4056
4057 for(i=0; i<pWriter->nDlidx; i++){
4058 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
4059 }
4060 sqlite3_free(pWriter->aDlidx);
4061 }
4062
fts5WriteInit(Fts5Index * p,Fts5SegWriter * pWriter,int iSegid)4063 static void fts5WriteInit(
4064 Fts5Index *p,
4065 Fts5SegWriter *pWriter,
4066 int iSegid
4067 ){
4068 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
4069
4070 memset(pWriter, 0, sizeof(Fts5SegWriter));
4071 pWriter->iSegid = iSegid;
4072
4073 fts5WriteDlidxGrow(p, pWriter, 1);
4074 pWriter->writer.pgno = 1;
4075 pWriter->bFirstTermInPage = 1;
4076 pWriter->iBtPage = 1;
4077
4078 assert( pWriter->writer.buf.n==0 );
4079 assert( pWriter->writer.pgidx.n==0 );
4080
4081 /* Grow the two buffers to pgsz + padding bytes in size. */
4082 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
4083 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
4084
4085 if( p->pIdxWriter==0 ){
4086 Fts5Config *pConfig = p->pConfig;
4087 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
4088 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4089 pConfig->zDb, pConfig->zName
4090 ));
4091 }
4092
4093 if( p->rc==SQLITE_OK ){
4094 /* Initialize the 4-byte leaf-page header to 0x00. */
4095 memset(pWriter->writer.buf.p, 0, 4);
4096 pWriter->writer.buf.n = 4;
4097
4098 /* Bind the current output segment id to the index-writer. This is an
4099 ** optimization over binding the same value over and over as rows are
4100 ** inserted into %_idx by the current writer. */
4101 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
4102 }
4103 }
4104
4105 /*
4106 ** Iterator pIter was used to iterate through the input segments of on an
4107 ** incremental merge operation. This function is called if the incremental
4108 ** merge step has finished but the input has not been completely exhausted.
4109 */
fts5TrimSegments(Fts5Index * p,Fts5Iter * pIter)4110 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
4111 int i;
4112 Fts5Buffer buf;
4113 memset(&buf, 0, sizeof(Fts5Buffer));
4114 for(i=0; i<pIter->nSeg; i++){
4115 Fts5SegIter *pSeg = &pIter->aSeg[i];
4116 if( pSeg->pSeg==0 ){
4117 /* no-op */
4118 }else if( pSeg->pLeaf==0 ){
4119 /* All keys from this input segment have been transfered to the output.
4120 ** Set both the first and last page-numbers to 0 to indicate that the
4121 ** segment is now empty. */
4122 pSeg->pSeg->pgnoLast = 0;
4123 pSeg->pSeg->pgnoFirst = 0;
4124 }else{
4125 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
4126 i64 iLeafRowid;
4127 Fts5Data *pData;
4128 int iId = pSeg->pSeg->iSegid;
4129 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
4130
4131 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
4132 pData = fts5DataRead(p, iLeafRowid);
4133 if( pData ){
4134 fts5BufferZero(&buf);
4135 fts5BufferGrow(&p->rc, &buf, pData->nn);
4136 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
4137 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
4138 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
4139 fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
4140 if( p->rc==SQLITE_OK ){
4141 /* Set the szLeaf field */
4142 fts5PutU16(&buf.p[2], (u16)buf.n);
4143 }
4144
4145 /* Set up the new page-index array */
4146 fts5BufferAppendVarint(&p->rc, &buf, 4);
4147 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
4148 && pSeg->iEndofDoclist<pData->szLeaf
4149 ){
4150 int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
4151 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
4152 fts5BufferAppendBlob(&p->rc, &buf,
4153 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
4154 );
4155 }
4156
4157 fts5DataRelease(pData);
4158 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
4159 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
4160 fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
4161 }
4162 }
4163 }
4164 fts5BufferFree(&buf);
4165 }
4166
fts5MergeChunkCallback(Fts5Index * p,void * pCtx,const u8 * pChunk,int nChunk)4167 static void fts5MergeChunkCallback(
4168 Fts5Index *p,
4169 void *pCtx,
4170 const u8 *pChunk, int nChunk
4171 ){
4172 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
4173 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
4174 }
4175
4176 /*
4177 **
4178 */
fts5IndexMergeLevel(Fts5Index * p,Fts5Structure ** ppStruct,int iLvl,int * pnRem)4179 static void fts5IndexMergeLevel(
4180 Fts5Index *p, /* FTS5 backend object */
4181 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
4182 int iLvl, /* Level to read input from */
4183 int *pnRem /* Write up to this many output leaves */
4184 ){
4185 Fts5Structure *pStruct = *ppStruct;
4186 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4187 Fts5StructureLevel *pLvlOut;
4188 Fts5Iter *pIter = 0; /* Iterator to read input data */
4189 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
4190 int nInput; /* Number of input segments */
4191 Fts5SegWriter writer; /* Writer object */
4192 Fts5StructureSegment *pSeg; /* Output segment */
4193 Fts5Buffer term;
4194 int bOldest; /* True if the output segment is the oldest */
4195 int eDetail = p->pConfig->eDetail;
4196 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
4197 int bTermWritten = 0; /* True if current term already output */
4198
4199 assert( iLvl<pStruct->nLevel );
4200 assert( pLvl->nMerge<=pLvl->nSeg );
4201
4202 memset(&writer, 0, sizeof(Fts5SegWriter));
4203 memset(&term, 0, sizeof(Fts5Buffer));
4204 if( pLvl->nMerge ){
4205 pLvlOut = &pStruct->aLevel[iLvl+1];
4206 assert( pLvlOut->nSeg>0 );
4207 nInput = pLvl->nMerge;
4208 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
4209
4210 fts5WriteInit(p, &writer, pSeg->iSegid);
4211 writer.writer.pgno = pSeg->pgnoLast+1;
4212 writer.iBtPage = 0;
4213 }else{
4214 int iSegid = fts5AllocateSegid(p, pStruct);
4215
4216 /* Extend the Fts5Structure object as required to ensure the output
4217 ** segment exists. */
4218 if( iLvl==pStruct->nLevel-1 ){
4219 fts5StructureAddLevel(&p->rc, ppStruct);
4220 pStruct = *ppStruct;
4221 }
4222 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
4223 if( p->rc ) return;
4224 pLvl = &pStruct->aLevel[iLvl];
4225 pLvlOut = &pStruct->aLevel[iLvl+1];
4226
4227 fts5WriteInit(p, &writer, iSegid);
4228
4229 /* Add the new segment to the output level */
4230 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
4231 pLvlOut->nSeg++;
4232 pSeg->pgnoFirst = 1;
4233 pSeg->iSegid = iSegid;
4234 pStruct->nSegment++;
4235
4236 /* Read input from all segments in the input level */
4237 nInput = pLvl->nSeg;
4238 }
4239 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
4240
4241 assert( iLvl>=0 );
4242 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
4243 fts5MultiIterEof(p, pIter)==0;
4244 fts5MultiIterNext(p, pIter, 0, 0)
4245 ){
4246 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4247 int nPos; /* position-list size field value */
4248 int nTerm;
4249 const u8 *pTerm;
4250
4251 pTerm = fts5MultiIterTerm(pIter, &nTerm);
4252 if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
4253 if( pnRem && writer.nLeafWritten>nRem ){
4254 break;
4255 }
4256 fts5BufferSet(&p->rc, &term, nTerm, pTerm);
4257 bTermWritten =0;
4258 }
4259
4260 /* Check for key annihilation. */
4261 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
4262
4263 if( p->rc==SQLITE_OK && bTermWritten==0 ){
4264 /* This is a new term. Append a term to the output segment. */
4265 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
4266 bTermWritten = 1;
4267 }
4268
4269 /* Append the rowid to the output */
4270 /* WRITEPOSLISTSIZE */
4271 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
4272
4273 if( eDetail==FTS5_DETAIL_NONE ){
4274 if( pSegIter->bDel ){
4275 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4276 if( pSegIter->nPos>0 ){
4277 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4278 }
4279 }
4280 }else{
4281 /* Append the position-list data to the output */
4282 nPos = pSegIter->nPos*2 + pSegIter->bDel;
4283 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
4284 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
4285 }
4286 }
4287
4288 /* Flush the last leaf page to disk. Set the output segment b-tree height
4289 ** and last leaf page number at the same time. */
4290 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
4291
4292 if( fts5MultiIterEof(p, pIter) ){
4293 int i;
4294
4295 /* Remove the redundant segments from the %_data table */
4296 for(i=0; i<nInput; i++){
4297 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
4298 }
4299
4300 /* Remove the redundant segments from the input level */
4301 if( pLvl->nSeg!=nInput ){
4302 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
4303 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
4304 }
4305 pStruct->nSegment -= nInput;
4306 pLvl->nSeg -= nInput;
4307 pLvl->nMerge = 0;
4308 if( pSeg->pgnoLast==0 ){
4309 pLvlOut->nSeg--;
4310 pStruct->nSegment--;
4311 }
4312 }else{
4313 assert( pSeg->pgnoLast>0 );
4314 fts5TrimSegments(p, pIter);
4315 pLvl->nMerge = nInput;
4316 }
4317
4318 fts5MultiIterFree(pIter);
4319 fts5BufferFree(&term);
4320 if( pnRem ) *pnRem -= writer.nLeafWritten;
4321 }
4322
4323 /*
4324 ** Do up to nPg pages of automerge work on the index.
4325 **
4326 ** Return true if any changes were actually made, or false otherwise.
4327 */
fts5IndexMerge(Fts5Index * p,Fts5Structure ** ppStruct,int nPg,int nMin)4328 static int fts5IndexMerge(
4329 Fts5Index *p, /* FTS5 backend object */
4330 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4331 int nPg, /* Pages of work to do */
4332 int nMin /* Minimum number of segments to merge */
4333 ){
4334 int nRem = nPg;
4335 int bRet = 0;
4336 Fts5Structure *pStruct = *ppStruct;
4337 while( nRem>0 && p->rc==SQLITE_OK ){
4338 int iLvl; /* To iterate through levels */
4339 int iBestLvl = 0; /* Level offering the most input segments */
4340 int nBest = 0; /* Number of input segments on best level */
4341
4342 /* Set iBestLvl to the level to read input segments from. */
4343 assert( pStruct->nLevel>0 );
4344 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4345 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4346 if( pLvl->nMerge ){
4347 if( pLvl->nMerge>nBest ){
4348 iBestLvl = iLvl;
4349 nBest = pLvl->nMerge;
4350 }
4351 break;
4352 }
4353 if( pLvl->nSeg>nBest ){
4354 nBest = pLvl->nSeg;
4355 iBestLvl = iLvl;
4356 }
4357 }
4358
4359 /* If nBest is still 0, then the index must be empty. */
4360 #ifdef SQLITE_DEBUG
4361 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
4362 assert( pStruct->aLevel[iLvl].nSeg==0 );
4363 }
4364 #endif
4365
4366 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
4367 break;
4368 }
4369 bRet = 1;
4370 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
4371 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
4372 fts5StructurePromote(p, iBestLvl+1, pStruct);
4373 }
4374 }
4375 *ppStruct = pStruct;
4376 return bRet;
4377 }
4378
4379 /*
4380 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4381 ** segment. This function updates the write-counter accordingly and, if
4382 ** necessary, performs incremental merge work.
4383 **
4384 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4385 ** already occurred, this function is a no-op.
4386 */
fts5IndexAutomerge(Fts5Index * p,Fts5Structure ** ppStruct,int nLeaf)4387 static void fts5IndexAutomerge(
4388 Fts5Index *p, /* FTS5 backend object */
4389 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4390 int nLeaf /* Number of output leaves just written */
4391 ){
4392 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){
4393 Fts5Structure *pStruct = *ppStruct;
4394 u64 nWrite; /* Initial value of write-counter */
4395 int nWork; /* Number of work-quanta to perform */
4396 int nRem; /* Number of leaf pages left to write */
4397
4398 /* Update the write-counter. While doing so, set nWork. */
4399 nWrite = pStruct->nWriteCounter;
4400 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
4401 pStruct->nWriteCounter += nLeaf;
4402 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
4403
4404 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
4405 }
4406 }
4407
fts5IndexCrisismerge(Fts5Index * p,Fts5Structure ** ppStruct)4408 static void fts5IndexCrisismerge(
4409 Fts5Index *p, /* FTS5 backend object */
4410 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
4411 ){
4412 const int nCrisis = p->pConfig->nCrisisMerge;
4413 Fts5Structure *pStruct = *ppStruct;
4414 int iLvl = 0;
4415
4416 assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
4417 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
4418 fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
4419 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
4420 fts5StructurePromote(p, iLvl+1, pStruct);
4421 iLvl++;
4422 }
4423 *ppStruct = pStruct;
4424 }
4425
fts5IndexReturn(Fts5Index * p)4426 static int fts5IndexReturn(Fts5Index *p){
4427 int rc = p->rc;
4428 p->rc = SQLITE_OK;
4429 return rc;
4430 }
4431
4432 typedef struct Fts5FlushCtx Fts5FlushCtx;
4433 struct Fts5FlushCtx {
4434 Fts5Index *pIdx;
4435 Fts5SegWriter writer;
4436 };
4437
4438 /*
4439 ** Buffer aBuf[] contains a list of varints, all small enough to fit
4440 ** in a 32-bit integer. Return the size of the largest prefix of this
4441 ** list nMax bytes or less in size.
4442 */
fts5PoslistPrefix(const u8 * aBuf,int nMax)4443 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
4444 int ret;
4445 u32 dummy;
4446 ret = fts5GetVarint32(aBuf, dummy);
4447 if( ret<nMax ){
4448 while( 1 ){
4449 int i = fts5GetVarint32(&aBuf[ret], dummy);
4450 if( (ret + i) > nMax ) break;
4451 ret += i;
4452 }
4453 }
4454 return ret;
4455 }
4456
4457 /*
4458 ** Flush the contents of in-memory hash table iHash to a new level-0
4459 ** segment on disk. Also update the corresponding structure record.
4460 **
4461 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4462 ** already occurred, this function is a no-op.
4463 */
fts5FlushOneHash(Fts5Index * p)4464 static void fts5FlushOneHash(Fts5Index *p){
4465 Fts5Hash *pHash = p->pHash;
4466 Fts5Structure *pStruct;
4467 int iSegid;
4468 int pgnoLast = 0; /* Last leaf page number in segment */
4469
4470 /* Obtain a reference to the index structure and allocate a new segment-id
4471 ** for the new level-0 segment. */
4472 pStruct = fts5StructureRead(p);
4473 iSegid = fts5AllocateSegid(p, pStruct);
4474 fts5StructureInvalidate(p);
4475
4476 if( iSegid ){
4477 const int pgsz = p->pConfig->pgsz;
4478 int eDetail = p->pConfig->eDetail;
4479 Fts5StructureSegment *pSeg; /* New segment within pStruct */
4480 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
4481 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
4482
4483 Fts5SegWriter writer;
4484 fts5WriteInit(p, &writer, iSegid);
4485
4486 pBuf = &writer.writer.buf;
4487 pPgidx = &writer.writer.pgidx;
4488
4489 /* fts5WriteInit() should have initialized the buffers to (most likely)
4490 ** the maximum space required. */
4491 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4492 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4493
4494 /* Begin scanning through hash table entries. This loop runs once for each
4495 ** term/doclist currently stored within the hash table. */
4496 if( p->rc==SQLITE_OK ){
4497 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
4498 }
4499 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
4500 const char *zTerm; /* Buffer containing term */
4501 const u8 *pDoclist; /* Pointer to doclist for this term */
4502 int nDoclist; /* Size of doclist in bytes */
4503
4504 /* Write the term for this entry to disk. */
4505 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
4506 fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
4507
4508 assert( writer.bFirstRowidInPage==0 );
4509 if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
4510 /* The entire doclist will fit on the current leaf. */
4511 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
4512 }else{
4513 i64 iRowid = 0;
4514 i64 iDelta = 0;
4515 int iOff = 0;
4516
4517 /* The entire doclist will not fit on this leaf. The following
4518 ** loop iterates through the poslists that make up the current
4519 ** doclist. */
4520 while( p->rc==SQLITE_OK && iOff<nDoclist ){
4521 iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
4522 iRowid += iDelta;
4523
4524 if( writer.bFirstRowidInPage ){
4525 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
4526 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
4527 writer.bFirstRowidInPage = 0;
4528 fts5WriteDlidxAppend(p, &writer, iRowid);
4529 }else{
4530 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
4531 }
4532 assert( pBuf->n<=pBuf->nSpace );
4533
4534 if( eDetail==FTS5_DETAIL_NONE ){
4535 if( iOff<nDoclist && pDoclist[iOff]==0 ){
4536 pBuf->p[pBuf->n++] = 0;
4537 iOff++;
4538 if( iOff<nDoclist && pDoclist[iOff]==0 ){
4539 pBuf->p[pBuf->n++] = 0;
4540 iOff++;
4541 }
4542 }
4543 if( (pBuf->n + pPgidx->n)>=pgsz ){
4544 fts5WriteFlushLeaf(p, &writer);
4545 }
4546 }else{
4547 int bDummy;
4548 int nPos;
4549 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
4550 nCopy += nPos;
4551 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
4552 /* The entire poslist will fit on the current leaf. So copy
4553 ** it in one go. */
4554 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
4555 }else{
4556 /* The entire poslist will not fit on this leaf. So it needs
4557 ** to be broken into sections. The only qualification being
4558 ** that each varint must be stored contiguously. */
4559 const u8 *pPoslist = &pDoclist[iOff];
4560 int iPos = 0;
4561 while( p->rc==SQLITE_OK ){
4562 int nSpace = pgsz - pBuf->n - pPgidx->n;
4563 int n = 0;
4564 if( (nCopy - iPos)<=nSpace ){
4565 n = nCopy - iPos;
4566 }else{
4567 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
4568 }
4569 assert( n>0 );
4570 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
4571 iPos += n;
4572 if( (pBuf->n + pPgidx->n)>=pgsz ){
4573 fts5WriteFlushLeaf(p, &writer);
4574 }
4575 if( iPos>=nCopy ) break;
4576 }
4577 }
4578 iOff += nCopy;
4579 }
4580 }
4581 }
4582
4583 /* TODO2: Doclist terminator written here. */
4584 /* pBuf->p[pBuf->n++] = '\0'; */
4585 assert( pBuf->n<=pBuf->nSpace );
4586 sqlite3Fts5HashScanNext(pHash);
4587 }
4588 sqlite3Fts5HashClear(pHash);
4589 fts5WriteFinish(p, &writer, &pgnoLast);
4590
4591 /* Update the Fts5Structure. It is written back to the database by the
4592 ** fts5StructureRelease() call below. */
4593 if( pStruct->nLevel==0 ){
4594 fts5StructureAddLevel(&p->rc, &pStruct);
4595 }
4596 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
4597 if( p->rc==SQLITE_OK ){
4598 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
4599 pSeg->iSegid = iSegid;
4600 pSeg->pgnoFirst = 1;
4601 pSeg->pgnoLast = pgnoLast;
4602 pStruct->nSegment++;
4603 }
4604 fts5StructurePromote(p, 0, pStruct);
4605 }
4606
4607 fts5IndexAutomerge(p, &pStruct, pgnoLast);
4608 fts5IndexCrisismerge(p, &pStruct);
4609 fts5StructureWrite(p, pStruct);
4610 fts5StructureRelease(pStruct);
4611 }
4612
4613 /*
4614 ** Flush any data stored in the in-memory hash tables to the database.
4615 */
fts5IndexFlush(Fts5Index * p)4616 static void fts5IndexFlush(Fts5Index *p){
4617 /* Unless it is empty, flush the hash table to disk */
4618 if( p->nPendingData ){
4619 assert( p->pHash );
4620 p->nPendingData = 0;
4621 fts5FlushOneHash(p);
4622 }
4623 }
4624
fts5IndexOptimizeStruct(Fts5Index * p,Fts5Structure * pStruct)4625 static Fts5Structure *fts5IndexOptimizeStruct(
4626 Fts5Index *p,
4627 Fts5Structure *pStruct
4628 ){
4629 Fts5Structure *pNew = 0;
4630 int nByte = sizeof(Fts5Structure);
4631 int nSeg = pStruct->nSegment;
4632 int i;
4633
4634 /* Figure out if this structure requires optimization. A structure does
4635 ** not require optimization if either:
4636 **
4637 ** + it consists of fewer than two segments, or
4638 ** + all segments are on the same level, or
4639 ** + all segments except one are currently inputs to a merge operation.
4640 **
4641 ** In the first case, return NULL. In the second, increment the ref-count
4642 ** on *pStruct and return a copy of the pointer to it.
4643 */
4644 if( nSeg<2 ) return 0;
4645 for(i=0; i<pStruct->nLevel; i++){
4646 int nThis = pStruct->aLevel[i].nSeg;
4647 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
4648 fts5StructureRef(pStruct);
4649 return pStruct;
4650 }
4651 assert( pStruct->aLevel[i].nMerge<=nThis );
4652 }
4653
4654 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
4655 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
4656
4657 if( pNew ){
4658 Fts5StructureLevel *pLvl;
4659 nByte = nSeg * sizeof(Fts5StructureSegment);
4660 pNew->nLevel = pStruct->nLevel+1;
4661 pNew->nRef = 1;
4662 pNew->nWriteCounter = pStruct->nWriteCounter;
4663 pLvl = &pNew->aLevel[pStruct->nLevel];
4664 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
4665 if( pLvl->aSeg ){
4666 int iLvl, iSeg;
4667 int iSegOut = 0;
4668 /* Iterate through all segments, from oldest to newest. Add them to
4669 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
4670 ** segment in the data structure. */
4671 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
4672 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
4673 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
4674 iSegOut++;
4675 }
4676 }
4677 pNew->nSegment = pLvl->nSeg = nSeg;
4678 }else{
4679 sqlite3_free(pNew);
4680 pNew = 0;
4681 }
4682 }
4683
4684 return pNew;
4685 }
4686
sqlite3Fts5IndexOptimize(Fts5Index * p)4687 int sqlite3Fts5IndexOptimize(Fts5Index *p){
4688 Fts5Structure *pStruct;
4689 Fts5Structure *pNew = 0;
4690
4691 assert( p->rc==SQLITE_OK );
4692 fts5IndexFlush(p);
4693 pStruct = fts5StructureRead(p);
4694 fts5StructureInvalidate(p);
4695
4696 if( pStruct ){
4697 pNew = fts5IndexOptimizeStruct(p, pStruct);
4698 }
4699 fts5StructureRelease(pStruct);
4700
4701 assert( pNew==0 || pNew->nSegment>0 );
4702 if( pNew ){
4703 int iLvl;
4704 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
4705 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
4706 int nRem = FTS5_OPT_WORK_UNIT;
4707 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
4708 }
4709
4710 fts5StructureWrite(p, pNew);
4711 fts5StructureRelease(pNew);
4712 }
4713
4714 return fts5IndexReturn(p);
4715 }
4716
4717 /*
4718 ** This is called to implement the special "VALUES('merge', $nMerge)"
4719 ** INSERT command.
4720 */
sqlite3Fts5IndexMerge(Fts5Index * p,int nMerge)4721 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
4722 Fts5Structure *pStruct = fts5StructureRead(p);
4723 if( pStruct ){
4724 int nMin = p->pConfig->nUsermerge;
4725 fts5StructureInvalidate(p);
4726 if( nMerge<0 ){
4727 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
4728 fts5StructureRelease(pStruct);
4729 pStruct = pNew;
4730 nMin = 2;
4731 nMerge = nMerge*-1;
4732 }
4733 if( pStruct && pStruct->nLevel ){
4734 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
4735 fts5StructureWrite(p, pStruct);
4736 }
4737 }
4738 fts5StructureRelease(pStruct);
4739 }
4740 return fts5IndexReturn(p);
4741 }
4742
fts5AppendRowid(Fts5Index * p,i64 iDelta,Fts5Iter * pUnused,Fts5Buffer * pBuf)4743 static void fts5AppendRowid(
4744 Fts5Index *p,
4745 i64 iDelta,
4746 Fts5Iter *pUnused,
4747 Fts5Buffer *pBuf
4748 ){
4749 UNUSED_PARAM(pUnused);
4750 fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
4751 }
4752
fts5AppendPoslist(Fts5Index * p,i64 iDelta,Fts5Iter * pMulti,Fts5Buffer * pBuf)4753 static void fts5AppendPoslist(
4754 Fts5Index *p,
4755 i64 iDelta,
4756 Fts5Iter *pMulti,
4757 Fts5Buffer *pBuf
4758 ){
4759 int nData = pMulti->base.nData;
4760 assert( nData>0 );
4761 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){
4762 fts5BufferSafeAppendVarint(pBuf, iDelta);
4763 fts5BufferSafeAppendVarint(pBuf, nData*2);
4764 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
4765 }
4766 }
4767
4768
fts5DoclistIterNext(Fts5DoclistIter * pIter)4769 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
4770 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
4771
4772 assert( pIter->aPoslist );
4773 if( p>=pIter->aEof ){
4774 pIter->aPoslist = 0;
4775 }else{
4776 i64 iDelta;
4777
4778 p += fts5GetVarint(p, (u64*)&iDelta);
4779 pIter->iRowid += iDelta;
4780
4781 /* Read position list size */
4782 if( p[0] & 0x80 ){
4783 int nPos;
4784 pIter->nSize = fts5GetVarint32(p, nPos);
4785 pIter->nPoslist = (nPos>>1);
4786 }else{
4787 pIter->nPoslist = ((int)(p[0])) >> 1;
4788 pIter->nSize = 1;
4789 }
4790
4791 pIter->aPoslist = p;
4792 }
4793 }
4794
fts5DoclistIterInit(Fts5Buffer * pBuf,Fts5DoclistIter * pIter)4795 static void fts5DoclistIterInit(
4796 Fts5Buffer *pBuf,
4797 Fts5DoclistIter *pIter
4798 ){
4799 memset(pIter, 0, sizeof(*pIter));
4800 pIter->aPoslist = pBuf->p;
4801 pIter->aEof = &pBuf->p[pBuf->n];
4802 fts5DoclistIterNext(pIter);
4803 }
4804
4805 #if 0
4806 /*
4807 ** Append a doclist to buffer pBuf.
4808 **
4809 ** This function assumes that space within the buffer has already been
4810 ** allocated.
4811 */
4812 static void fts5MergeAppendDocid(
4813 Fts5Buffer *pBuf, /* Buffer to write to */
4814 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
4815 i64 iRowid /* Rowid to append */
4816 ){
4817 assert( pBuf->n!=0 || (*piLastRowid)==0 );
4818 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
4819 *piLastRowid = iRowid;
4820 }
4821 #endif
4822
4823 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
4824 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
4825 fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
4826 (iLastRowid) = (iRowid); \
4827 }
4828
4829 /*
4830 ** Swap the contents of buffer *p1 with that of *p2.
4831 */
fts5BufferSwap(Fts5Buffer * p1,Fts5Buffer * p2)4832 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
4833 Fts5Buffer tmp = *p1;
4834 *p1 = *p2;
4835 *p2 = tmp;
4836 }
4837
fts5NextRowid(Fts5Buffer * pBuf,int * piOff,i64 * piRowid)4838 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
4839 int i = *piOff;
4840 if( i>=pBuf->n ){
4841 *piOff = -1;
4842 }else{
4843 u64 iVal;
4844 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
4845 *piRowid += iVal;
4846 }
4847 }
4848
4849 /*
4850 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
4851 ** In this case the buffers consist of a delta-encoded list of rowids only.
4852 */
fts5MergeRowidLists(Fts5Index * p,Fts5Buffer * p1,Fts5Buffer * p2)4853 static void fts5MergeRowidLists(
4854 Fts5Index *p, /* FTS5 backend object */
4855 Fts5Buffer *p1, /* First list to merge */
4856 Fts5Buffer *p2 /* Second list to merge */
4857 ){
4858 int i1 = 0;
4859 int i2 = 0;
4860 i64 iRowid1 = 0;
4861 i64 iRowid2 = 0;
4862 i64 iOut = 0;
4863
4864 Fts5Buffer out;
4865 memset(&out, 0, sizeof(out));
4866 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
4867 if( p->rc ) return;
4868
4869 fts5NextRowid(p1, &i1, &iRowid1);
4870 fts5NextRowid(p2, &i2, &iRowid2);
4871 while( i1>=0 || i2>=0 ){
4872 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
4873 assert( iOut==0 || iRowid1>iOut );
4874 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
4875 iOut = iRowid1;
4876 fts5NextRowid(p1, &i1, &iRowid1);
4877 }else{
4878 assert( iOut==0 || iRowid2>iOut );
4879 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
4880 iOut = iRowid2;
4881 if( i1>=0 && iRowid1==iRowid2 ){
4882 fts5NextRowid(p1, &i1, &iRowid1);
4883 }
4884 fts5NextRowid(p2, &i2, &iRowid2);
4885 }
4886 }
4887
4888 fts5BufferSwap(&out, p1);
4889 fts5BufferFree(&out);
4890 }
4891
4892 /*
4893 ** Buffers p1 and p2 contain doclists. This function merges the content
4894 ** of the two doclists together and sets buffer p1 to the result before
4895 ** returning.
4896 **
4897 ** If an error occurs, an error code is left in p->rc. If an error has
4898 ** already occurred, this function is a no-op.
4899 */
fts5MergePrefixLists(Fts5Index * p,Fts5Buffer * p1,Fts5Buffer * p2)4900 static void fts5MergePrefixLists(
4901 Fts5Index *p, /* FTS5 backend object */
4902 Fts5Buffer *p1, /* First list to merge */
4903 Fts5Buffer *p2 /* Second list to merge */
4904 ){
4905 if( p2->n ){
4906 i64 iLastRowid = 0;
4907 Fts5DoclistIter i1;
4908 Fts5DoclistIter i2;
4909 Fts5Buffer out = {0, 0, 0};
4910 Fts5Buffer tmp = {0, 0, 0};
4911
4912 if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return;
4913 fts5DoclistIterInit(p1, &i1);
4914 fts5DoclistIterInit(p2, &i2);
4915
4916 while( 1 ){
4917 if( i1.iRowid<i2.iRowid ){
4918 /* Copy entry from i1 */
4919 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
4920 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize);
4921 fts5DoclistIterNext(&i1);
4922 if( i1.aPoslist==0 ) break;
4923 }
4924 else if( i2.iRowid!=i1.iRowid ){
4925 /* Copy entry from i2 */
4926 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4927 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize);
4928 fts5DoclistIterNext(&i2);
4929 if( i2.aPoslist==0 ) break;
4930 }
4931 else{
4932 /* Merge the two position lists. */
4933 i64 iPos1 = 0;
4934 i64 iPos2 = 0;
4935 int iOff1 = 0;
4936 int iOff2 = 0;
4937 u8 *a1 = &i1.aPoslist[i1.nSize];
4938 u8 *a2 = &i2.aPoslist[i2.nSize];
4939
4940 i64 iPrev = 0;
4941 Fts5PoslistWriter writer;
4942 memset(&writer, 0, sizeof(writer));
4943
4944 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4945 fts5BufferZero(&tmp);
4946 sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist);
4947 if( p->rc ) break;
4948
4949 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4950 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4951 assert( iPos1>=0 && iPos2>=0 );
4952
4953 if( iPos1<iPos2 ){
4954 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
4955 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4956 }else{
4957 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
4958 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4959 }
4960
4961 if( iPos1>=0 && iPos2>=0 ){
4962 while( 1 ){
4963 if( iPos1<iPos2 ){
4964 if( iPos1!=iPrev ){
4965 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
4966 }
4967 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4968 if( iPos1<0 ) break;
4969 }else{
4970 assert( iPos2!=iPrev );
4971 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
4972 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4973 if( iPos2<0 ) break;
4974 }
4975 }
4976 }
4977
4978 if( iPos1>=0 ){
4979 if( iPos1!=iPrev ){
4980 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
4981 }
4982 fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1);
4983 }else{
4984 assert( iPos2>=0 && iPos2!=iPrev );
4985 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
4986 fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2);
4987 }
4988
4989 /* WRITEPOSLISTSIZE */
4990 fts5BufferSafeAppendVarint(&out, tmp.n * 2);
4991 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
4992 fts5DoclistIterNext(&i1);
4993 fts5DoclistIterNext(&i2);
4994 if( i1.aPoslist==0 || i2.aPoslist==0 ) break;
4995 }
4996 }
4997
4998 if( i1.aPoslist ){
4999 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
5000 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist);
5001 }
5002 else if( i2.aPoslist ){
5003 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
5004 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist);
5005 }
5006
5007 fts5BufferSet(&p->rc, p1, out.n, out.p);
5008 fts5BufferFree(&tmp);
5009 fts5BufferFree(&out);
5010 }
5011 }
5012
fts5SetupPrefixIter(Fts5Index * p,int bDesc,const u8 * pToken,int nToken,Fts5Colset * pColset,Fts5Iter ** ppIter)5013 static void fts5SetupPrefixIter(
5014 Fts5Index *p, /* Index to read from */
5015 int bDesc, /* True for "ORDER BY rowid DESC" */
5016 const u8 *pToken, /* Buffer containing prefix to match */
5017 int nToken, /* Size of buffer pToken in bytes */
5018 Fts5Colset *pColset, /* Restrict matches to these columns */
5019 Fts5Iter **ppIter /* OUT: New iterator */
5020 ){
5021 Fts5Structure *pStruct;
5022 Fts5Buffer *aBuf;
5023 const int nBuf = 32;
5024
5025 void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*);
5026 void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*);
5027 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
5028 xMerge = fts5MergeRowidLists;
5029 xAppend = fts5AppendRowid;
5030 }else{
5031 xMerge = fts5MergePrefixLists;
5032 xAppend = fts5AppendPoslist;
5033 }
5034
5035 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
5036 pStruct = fts5StructureRead(p);
5037
5038 if( aBuf && pStruct ){
5039 const int flags = FTS5INDEX_QUERY_SCAN
5040 | FTS5INDEX_QUERY_SKIPEMPTY
5041 | FTS5INDEX_QUERY_NOOUTPUT;
5042 int i;
5043 i64 iLastRowid = 0;
5044 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
5045 Fts5Data *pData;
5046 Fts5Buffer doclist;
5047 int bNewTerm = 1;
5048
5049 memset(&doclist, 0, sizeof(doclist));
5050 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
5051 fts5IterSetOutputCb(&p->rc, p1);
5052 for( /* no-op */ ;
5053 fts5MultiIterEof(p, p1)==0;
5054 fts5MultiIterNext2(p, p1, &bNewTerm)
5055 ){
5056 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
5057 int nTerm = pSeg->term.n;
5058 const u8 *pTerm = pSeg->term.p;
5059 p1->xSetOutputs(p1, pSeg);
5060
5061 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
5062 if( bNewTerm ){
5063 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
5064 }
5065
5066 if( p1->base.nData==0 ) continue;
5067
5068 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
5069 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
5070 assert( i<nBuf );
5071 if( aBuf[i].n==0 ){
5072 fts5BufferSwap(&doclist, &aBuf[i]);
5073 fts5BufferZero(&doclist);
5074 }else{
5075 xMerge(p, &doclist, &aBuf[i]);
5076 fts5BufferZero(&aBuf[i]);
5077 }
5078 }
5079 iLastRowid = 0;
5080 }
5081
5082 xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
5083 iLastRowid = p1->base.iRowid;
5084 }
5085
5086 for(i=0; i<nBuf; i++){
5087 if( p->rc==SQLITE_OK ){
5088 xMerge(p, &doclist, &aBuf[i]);
5089 }
5090 fts5BufferFree(&aBuf[i]);
5091 }
5092 fts5MultiIterFree(p1);
5093
5094 pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
5095 if( pData ){
5096 pData->p = (u8*)&pData[1];
5097 pData->nn = pData->szLeaf = doclist.n;
5098 if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
5099 fts5MultiIterNew2(p, pData, bDesc, ppIter);
5100 }
5101 fts5BufferFree(&doclist);
5102 }
5103
5104 fts5StructureRelease(pStruct);
5105 sqlite3_free(aBuf);
5106 }
5107
5108
5109 /*
5110 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
5111 ** to the document with rowid iRowid.
5112 */
sqlite3Fts5IndexBeginWrite(Fts5Index * p,int bDelete,i64 iRowid)5113 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
5114 assert( p->rc==SQLITE_OK );
5115
5116 /* Allocate the hash table if it has not already been allocated */
5117 if( p->pHash==0 ){
5118 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
5119 }
5120
5121 /* Flush the hash table to disk if required */
5122 if( iRowid<p->iWriteRowid
5123 || (iRowid==p->iWriteRowid && p->bDelete==0)
5124 || (p->nPendingData > p->pConfig->nHashSize)
5125 ){
5126 fts5IndexFlush(p);
5127 }
5128
5129 p->iWriteRowid = iRowid;
5130 p->bDelete = bDelete;
5131 return fts5IndexReturn(p);
5132 }
5133
5134 /*
5135 ** Commit data to disk.
5136 */
sqlite3Fts5IndexSync(Fts5Index * p)5137 int sqlite3Fts5IndexSync(Fts5Index *p){
5138 assert( p->rc==SQLITE_OK );
5139 fts5IndexFlush(p);
5140 fts5CloseReader(p);
5141 return fts5IndexReturn(p);
5142 }
5143
5144 /*
5145 ** Discard any data stored in the in-memory hash tables. Do not write it
5146 ** to the database. Additionally, assume that the contents of the %_data
5147 ** table may have changed on disk. So any in-memory caches of %_data
5148 ** records must be invalidated.
5149 */
sqlite3Fts5IndexRollback(Fts5Index * p)5150 int sqlite3Fts5IndexRollback(Fts5Index *p){
5151 fts5CloseReader(p);
5152 fts5IndexDiscardData(p);
5153 fts5StructureInvalidate(p);
5154 /* assert( p->rc==SQLITE_OK ); */
5155 return SQLITE_OK;
5156 }
5157
5158 /*
5159 ** The %_data table is completely empty when this function is called. This
5160 ** function populates it with the initial structure objects for each index,
5161 ** and the initial version of the "averages" record (a zero-byte blob).
5162 */
sqlite3Fts5IndexReinit(Fts5Index * p)5163 int sqlite3Fts5IndexReinit(Fts5Index *p){
5164 Fts5Structure s;
5165 fts5StructureInvalidate(p);
5166 memset(&s, 0, sizeof(Fts5Structure));
5167 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
5168 fts5StructureWrite(p, &s);
5169 return fts5IndexReturn(p);
5170 }
5171
5172 /*
5173 ** Open a new Fts5Index handle. If the bCreate argument is true, create
5174 ** and initialize the underlying %_data table.
5175 **
5176 ** If successful, set *pp to point to the new object and return SQLITE_OK.
5177 ** Otherwise, set *pp to NULL and return an SQLite error code.
5178 */
sqlite3Fts5IndexOpen(Fts5Config * pConfig,int bCreate,Fts5Index ** pp,char ** pzErr)5179 int sqlite3Fts5IndexOpen(
5180 Fts5Config *pConfig,
5181 int bCreate,
5182 Fts5Index **pp,
5183 char **pzErr
5184 ){
5185 int rc = SQLITE_OK;
5186 Fts5Index *p; /* New object */
5187
5188 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
5189 if( rc==SQLITE_OK ){
5190 p->pConfig = pConfig;
5191 p->nWorkUnit = FTS5_WORK_UNIT;
5192 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
5193 if( p->zDataTbl && bCreate ){
5194 rc = sqlite3Fts5CreateTable(
5195 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
5196 );
5197 if( rc==SQLITE_OK ){
5198 rc = sqlite3Fts5CreateTable(pConfig, "idx",
5199 "segid, term, pgno, PRIMARY KEY(segid, term)",
5200 1, pzErr
5201 );
5202 }
5203 if( rc==SQLITE_OK ){
5204 rc = sqlite3Fts5IndexReinit(p);
5205 }
5206 }
5207 }
5208
5209 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
5210 if( rc ){
5211 sqlite3Fts5IndexClose(p);
5212 *pp = 0;
5213 }
5214 return rc;
5215 }
5216
5217 /*
5218 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
5219 */
sqlite3Fts5IndexClose(Fts5Index * p)5220 int sqlite3Fts5IndexClose(Fts5Index *p){
5221 int rc = SQLITE_OK;
5222 if( p ){
5223 assert( p->pReader==0 );
5224 fts5StructureInvalidate(p);
5225 sqlite3_finalize(p->pWriter);
5226 sqlite3_finalize(p->pDeleter);
5227 sqlite3_finalize(p->pIdxWriter);
5228 sqlite3_finalize(p->pIdxDeleter);
5229 sqlite3_finalize(p->pIdxSelect);
5230 sqlite3_finalize(p->pDataVersion);
5231 sqlite3Fts5HashFree(p->pHash);
5232 sqlite3_free(p->zDataTbl);
5233 sqlite3_free(p);
5234 }
5235 return rc;
5236 }
5237
5238 /*
5239 ** Argument p points to a buffer containing utf-8 text that is n bytes in
5240 ** size. Return the number of bytes in the nChar character prefix of the
5241 ** buffer, or 0 if there are less than nChar characters in total.
5242 */
sqlite3Fts5IndexCharlenToBytelen(const char * p,int nByte,int nChar)5243 int sqlite3Fts5IndexCharlenToBytelen(
5244 const char *p,
5245 int nByte,
5246 int nChar
5247 ){
5248 int n = 0;
5249 int i;
5250 for(i=0; i<nChar; i++){
5251 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
5252 if( (unsigned char)p[n++]>=0xc0 ){
5253 while( (p[n] & 0xc0)==0x80 ) n++;
5254 }
5255 }
5256 return n;
5257 }
5258
5259 /*
5260 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
5261 ** unicode characters in the string.
5262 */
fts5IndexCharlen(const char * pIn,int nIn)5263 static int fts5IndexCharlen(const char *pIn, int nIn){
5264 int nChar = 0;
5265 int i = 0;
5266 while( i<nIn ){
5267 if( (unsigned char)pIn[i++]>=0xc0 ){
5268 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
5269 }
5270 nChar++;
5271 }
5272 return nChar;
5273 }
5274
5275 /*
5276 ** Insert or remove data to or from the index. Each time a document is
5277 ** added to or removed from the index, this function is called one or more
5278 ** times.
5279 **
5280 ** For an insert, it must be called once for each token in the new document.
5281 ** If the operation is a delete, it must be called (at least) once for each
5282 ** unique token in the document with an iCol value less than zero. The iPos
5283 ** argument is ignored for a delete.
5284 */
sqlite3Fts5IndexWrite(Fts5Index * p,int iCol,int iPos,const char * pToken,int nToken)5285 int sqlite3Fts5IndexWrite(
5286 Fts5Index *p, /* Index to write to */
5287 int iCol, /* Column token appears in (-ve -> delete) */
5288 int iPos, /* Position of token within column */
5289 const char *pToken, int nToken /* Token to add or remove to or from index */
5290 ){
5291 int i; /* Used to iterate through indexes */
5292 int rc = SQLITE_OK; /* Return code */
5293 Fts5Config *pConfig = p->pConfig;
5294
5295 assert( p->rc==SQLITE_OK );
5296 assert( (iCol<0)==p->bDelete );
5297
5298 /* Add the entry to the main terms index. */
5299 rc = sqlite3Fts5HashWrite(
5300 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
5301 );
5302
5303 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
5304 const int nChar = pConfig->aPrefix[i];
5305 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
5306 if( nByte ){
5307 rc = sqlite3Fts5HashWrite(p->pHash,
5308 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
5309 nByte
5310 );
5311 }
5312 }
5313
5314 return rc;
5315 }
5316
5317 /*
5318 ** Open a new iterator to iterate though all rowid that match the
5319 ** specified token or token prefix.
5320 */
sqlite3Fts5IndexQuery(Fts5Index * p,const char * pToken,int nToken,int flags,Fts5Colset * pColset,Fts5IndexIter ** ppIter)5321 int sqlite3Fts5IndexQuery(
5322 Fts5Index *p, /* FTS index to query */
5323 const char *pToken, int nToken, /* Token (or prefix) to query for */
5324 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
5325 Fts5Colset *pColset, /* Match these columns only */
5326 Fts5IndexIter **ppIter /* OUT: New iterator object */
5327 ){
5328 Fts5Config *pConfig = p->pConfig;
5329 Fts5Iter *pRet = 0;
5330 Fts5Buffer buf = {0, 0, 0};
5331
5332 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
5333 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
5334
5335 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
5336 int iIdx = 0; /* Index to search */
5337 if( nToken ) memcpy(&buf.p[1], pToken, nToken);
5338
5339 /* Figure out which index to search and set iIdx accordingly. If this
5340 ** is a prefix query for which there is no prefix index, set iIdx to
5341 ** greater than pConfig->nPrefix to indicate that the query will be
5342 ** satisfied by scanning multiple terms in the main index.
5343 **
5344 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
5345 ** prefix-query. Instead of using a prefix-index (if one exists),
5346 ** evaluate the prefix query using the main FTS index. This is used
5347 ** for internal sanity checking by the integrity-check in debug
5348 ** mode only. */
5349 #ifdef SQLITE_DEBUG
5350 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
5351 assert( flags & FTS5INDEX_QUERY_PREFIX );
5352 iIdx = 1+pConfig->nPrefix;
5353 }else
5354 #endif
5355 if( flags & FTS5INDEX_QUERY_PREFIX ){
5356 int nChar = fts5IndexCharlen(pToken, nToken);
5357 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
5358 if( pConfig->aPrefix[iIdx-1]==nChar ) break;
5359 }
5360 }
5361
5362 if( iIdx<=pConfig->nPrefix ){
5363 /* Straight index lookup */
5364 Fts5Structure *pStruct = fts5StructureRead(p);
5365 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
5366 if( pStruct ){
5367 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
5368 pColset, buf.p, nToken+1, -1, 0, &pRet
5369 );
5370 fts5StructureRelease(pStruct);
5371 }
5372 }else{
5373 /* Scan multiple terms in the main index */
5374 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
5375 buf.p[0] = FTS5_MAIN_PREFIX;
5376 fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
5377 assert( p->rc!=SQLITE_OK || pRet->pColset==0 );
5378 fts5IterSetOutputCb(&p->rc, pRet);
5379 if( p->rc==SQLITE_OK ){
5380 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
5381 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
5382 }
5383 }
5384
5385 if( p->rc ){
5386 sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
5387 pRet = 0;
5388 fts5CloseReader(p);
5389 }
5390
5391 *ppIter = &pRet->base;
5392 sqlite3Fts5BufferFree(&buf);
5393 }
5394 return fts5IndexReturn(p);
5395 }
5396
5397 /*
5398 ** Return true if the iterator passed as the only argument is at EOF.
5399 */
5400 /*
5401 ** Move to the next matching rowid.
5402 */
sqlite3Fts5IterNext(Fts5IndexIter * pIndexIter)5403 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
5404 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5405 assert( pIter->pIndex->rc==SQLITE_OK );
5406 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
5407 return fts5IndexReturn(pIter->pIndex);
5408 }
5409
5410 /*
5411 ** Move to the next matching term/rowid. Used by the fts5vocab module.
5412 */
sqlite3Fts5IterNextScan(Fts5IndexIter * pIndexIter)5413 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
5414 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5415 Fts5Index *p = pIter->pIndex;
5416
5417 assert( pIter->pIndex->rc==SQLITE_OK );
5418
5419 fts5MultiIterNext(p, pIter, 0, 0);
5420 if( p->rc==SQLITE_OK ){
5421 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
5422 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
5423 fts5DataRelease(pSeg->pLeaf);
5424 pSeg->pLeaf = 0;
5425 pIter->base.bEof = 1;
5426 }
5427 }
5428
5429 return fts5IndexReturn(pIter->pIndex);
5430 }
5431
5432 /*
5433 ** Move to the next matching rowid that occurs at or after iMatch. The
5434 ** definition of "at or after" depends on whether this iterator iterates
5435 ** in ascending or descending rowid order.
5436 */
sqlite3Fts5IterNextFrom(Fts5IndexIter * pIndexIter,i64 iMatch)5437 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
5438 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5439 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
5440 return fts5IndexReturn(pIter->pIndex);
5441 }
5442
5443 /*
5444 ** Return the current term.
5445 */
sqlite3Fts5IterTerm(Fts5IndexIter * pIndexIter,int * pn)5446 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
5447 int n;
5448 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
5449 *pn = n-1;
5450 return &z[1];
5451 }
5452
5453 /*
5454 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
5455 */
sqlite3Fts5IterClose(Fts5IndexIter * pIndexIter)5456 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
5457 if( pIndexIter ){
5458 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5459 Fts5Index *pIndex = pIter->pIndex;
5460 fts5MultiIterFree(pIter);
5461 fts5CloseReader(pIndex);
5462 }
5463 }
5464
5465 /*
5466 ** Read and decode the "averages" record from the database.
5467 **
5468 ** Parameter anSize must point to an array of size nCol, where nCol is
5469 ** the number of user defined columns in the FTS table.
5470 */
sqlite3Fts5IndexGetAverages(Fts5Index * p,i64 * pnRow,i64 * anSize)5471 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
5472 int nCol = p->pConfig->nCol;
5473 Fts5Data *pData;
5474
5475 *pnRow = 0;
5476 memset(anSize, 0, sizeof(i64) * nCol);
5477 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
5478 if( p->rc==SQLITE_OK && pData->nn ){
5479 int i = 0;
5480 int iCol;
5481 i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
5482 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
5483 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
5484 }
5485 }
5486
5487 fts5DataRelease(pData);
5488 return fts5IndexReturn(p);
5489 }
5490
5491 /*
5492 ** Replace the current "averages" record with the contents of the buffer
5493 ** supplied as the second argument.
5494 */
sqlite3Fts5IndexSetAverages(Fts5Index * p,const u8 * pData,int nData)5495 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
5496 assert( p->rc==SQLITE_OK );
5497 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
5498 return fts5IndexReturn(p);
5499 }
5500
5501 /*
5502 ** Return the total number of blocks this module has read from the %_data
5503 ** table since it was created.
5504 */
sqlite3Fts5IndexReads(Fts5Index * p)5505 int sqlite3Fts5IndexReads(Fts5Index *p){
5506 return p->nRead;
5507 }
5508
5509 /*
5510 ** Set the 32-bit cookie value stored at the start of all structure
5511 ** records to the value passed as the second argument.
5512 **
5513 ** Return SQLITE_OK if successful, or an SQLite error code if an error
5514 ** occurs.
5515 */
sqlite3Fts5IndexSetCookie(Fts5Index * p,int iNew)5516 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
5517 int rc; /* Return code */
5518 Fts5Config *pConfig = p->pConfig; /* Configuration object */
5519 u8 aCookie[4]; /* Binary representation of iNew */
5520 sqlite3_blob *pBlob = 0;
5521
5522 assert( p->rc==SQLITE_OK );
5523 sqlite3Fts5Put32(aCookie, iNew);
5524
5525 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
5526 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
5527 );
5528 if( rc==SQLITE_OK ){
5529 sqlite3_blob_write(pBlob, aCookie, 4, 0);
5530 rc = sqlite3_blob_close(pBlob);
5531 }
5532
5533 return rc;
5534 }
5535
sqlite3Fts5IndexLoadConfig(Fts5Index * p)5536 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
5537 Fts5Structure *pStruct;
5538 pStruct = fts5StructureRead(p);
5539 fts5StructureRelease(pStruct);
5540 return fts5IndexReturn(p);
5541 }
5542
5543
5544 /*************************************************************************
5545 **************************************************************************
5546 ** Below this point is the implementation of the integrity-check
5547 ** functionality.
5548 */
5549
5550 /*
5551 ** Return a simple checksum value based on the arguments.
5552 */
sqlite3Fts5IndexEntryCksum(i64 iRowid,int iCol,int iPos,int iIdx,const char * pTerm,int nTerm)5553 u64 sqlite3Fts5IndexEntryCksum(
5554 i64 iRowid,
5555 int iCol,
5556 int iPos,
5557 int iIdx,
5558 const char *pTerm,
5559 int nTerm
5560 ){
5561 int i;
5562 u64 ret = iRowid;
5563 ret += (ret<<3) + iCol;
5564 ret += (ret<<3) + iPos;
5565 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
5566 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
5567 return ret;
5568 }
5569
5570 #ifdef SQLITE_DEBUG
5571 /*
5572 ** This function is purely an internal test. It does not contribute to
5573 ** FTS functionality, or even the integrity-check, in any way.
5574 **
5575 ** Instead, it tests that the same set of pgno/rowid combinations are
5576 ** visited regardless of whether the doclist-index identified by parameters
5577 ** iSegid/iLeaf is iterated in forwards or reverse order.
5578 */
fts5TestDlidxReverse(Fts5Index * p,int iSegid,int iLeaf)5579 static void fts5TestDlidxReverse(
5580 Fts5Index *p,
5581 int iSegid, /* Segment id to load from */
5582 int iLeaf /* Load doclist-index for this leaf */
5583 ){
5584 Fts5DlidxIter *pDlidx = 0;
5585 u64 cksum1 = 13;
5586 u64 cksum2 = 13;
5587
5588 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
5589 fts5DlidxIterEof(p, pDlidx)==0;
5590 fts5DlidxIterNext(p, pDlidx)
5591 ){
5592 i64 iRowid = fts5DlidxIterRowid(pDlidx);
5593 int pgno = fts5DlidxIterPgno(pDlidx);
5594 assert( pgno>iLeaf );
5595 cksum1 += iRowid + ((i64)pgno<<32);
5596 }
5597 fts5DlidxIterFree(pDlidx);
5598 pDlidx = 0;
5599
5600 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
5601 fts5DlidxIterEof(p, pDlidx)==0;
5602 fts5DlidxIterPrev(p, pDlidx)
5603 ){
5604 i64 iRowid = fts5DlidxIterRowid(pDlidx);
5605 int pgno = fts5DlidxIterPgno(pDlidx);
5606 assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
5607 cksum2 += iRowid + ((i64)pgno<<32);
5608 }
5609 fts5DlidxIterFree(pDlidx);
5610 pDlidx = 0;
5611
5612 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
5613 }
5614
fts5QueryCksum(Fts5Index * p,int iIdx,const char * z,int n,int flags,u64 * pCksum)5615 static int fts5QueryCksum(
5616 Fts5Index *p, /* Fts5 index object */
5617 int iIdx,
5618 const char *z, /* Index key to query for */
5619 int n, /* Size of index key in bytes */
5620 int flags, /* Flags for Fts5IndexQuery */
5621 u64 *pCksum /* IN/OUT: Checksum value */
5622 ){
5623 int eDetail = p->pConfig->eDetail;
5624 u64 cksum = *pCksum;
5625 Fts5IndexIter *pIter = 0;
5626 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
5627
5628 while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
5629 i64 rowid = pIter->iRowid;
5630
5631 if( eDetail==FTS5_DETAIL_NONE ){
5632 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
5633 }else{
5634 Fts5PoslistReader sReader;
5635 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
5636 sReader.bEof==0;
5637 sqlite3Fts5PoslistReaderNext(&sReader)
5638 ){
5639 int iCol = FTS5_POS2COLUMN(sReader.iPos);
5640 int iOff = FTS5_POS2OFFSET(sReader.iPos);
5641 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
5642 }
5643 }
5644 if( rc==SQLITE_OK ){
5645 rc = sqlite3Fts5IterNext(pIter);
5646 }
5647 }
5648 sqlite3Fts5IterClose(pIter);
5649
5650 *pCksum = cksum;
5651 return rc;
5652 }
5653
5654
5655 /*
5656 ** This function is also purely an internal test. It does not contribute to
5657 ** FTS functionality, or even the integrity-check, in any way.
5658 */
fts5TestTerm(Fts5Index * p,Fts5Buffer * pPrev,const char * z,int n,u64 expected,u64 * pCksum)5659 static void fts5TestTerm(
5660 Fts5Index *p,
5661 Fts5Buffer *pPrev, /* Previous term */
5662 const char *z, int n, /* Possibly new term to test */
5663 u64 expected,
5664 u64 *pCksum
5665 ){
5666 int rc = p->rc;
5667 if( pPrev->n==0 ){
5668 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5669 }else
5670 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
5671 u64 cksum3 = *pCksum;
5672 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
5673 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
5674 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
5675 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
5676 u64 ck1 = 0;
5677 u64 ck2 = 0;
5678
5679 /* Check that the results returned for ASC and DESC queries are
5680 ** the same. If not, call this corruption. */
5681 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
5682 if( rc==SQLITE_OK ){
5683 int f = flags|FTS5INDEX_QUERY_DESC;
5684 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5685 }
5686 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5687
5688 /* If this is a prefix query, check that the results returned if the
5689 ** the index is disabled are the same. In both ASC and DESC order.
5690 **
5691 ** This check may only be performed if the hash table is empty. This
5692 ** is because the hash table only supports a single scan query at
5693 ** a time, and the multi-iter loop from which this function is called
5694 ** is already performing such a scan. */
5695 if( p->nPendingData==0 ){
5696 if( iIdx>0 && rc==SQLITE_OK ){
5697 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
5698 ck2 = 0;
5699 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5700 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5701 }
5702 if( iIdx>0 && rc==SQLITE_OK ){
5703 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
5704 ck2 = 0;
5705 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5706 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5707 }
5708 }
5709
5710 cksum3 ^= ck1;
5711 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5712
5713 if( rc==SQLITE_OK && cksum3!=expected ){
5714 rc = FTS5_CORRUPT;
5715 }
5716 *pCksum = cksum3;
5717 }
5718 p->rc = rc;
5719 }
5720
5721 #else
5722 # define fts5TestDlidxReverse(x,y,z)
5723 # define fts5TestTerm(u,v,w,x,y,z)
5724 #endif
5725
5726 /*
5727 ** Check that:
5728 **
5729 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
5730 ** contain zero terms.
5731 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
5732 ** contain zero rowids.
5733 */
fts5IndexIntegrityCheckEmpty(Fts5Index * p,Fts5StructureSegment * pSeg,int iFirst,int iNoRowid,int iLast)5734 static void fts5IndexIntegrityCheckEmpty(
5735 Fts5Index *p,
5736 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
5737 int iFirst,
5738 int iNoRowid,
5739 int iLast
5740 ){
5741 int i;
5742
5743 /* Now check that the iter.nEmpty leaves following the current leaf
5744 ** (a) exist and (b) contain no terms. */
5745 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
5746 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
5747 if( pLeaf ){
5748 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
5749 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
5750 }
5751 fts5DataRelease(pLeaf);
5752 }
5753 }
5754
fts5IntegrityCheckPgidx(Fts5Index * p,Fts5Data * pLeaf)5755 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
5756 int iTermOff = 0;
5757 int ii;
5758
5759 Fts5Buffer buf1 = {0,0,0};
5760 Fts5Buffer buf2 = {0,0,0};
5761
5762 ii = pLeaf->szLeaf;
5763 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
5764 int res;
5765 int iOff;
5766 int nIncr;
5767
5768 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
5769 iTermOff += nIncr;
5770 iOff = iTermOff;
5771
5772 if( iOff>=pLeaf->szLeaf ){
5773 p->rc = FTS5_CORRUPT;
5774 }else if( iTermOff==nIncr ){
5775 int nByte;
5776 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
5777 if( (iOff+nByte)>pLeaf->szLeaf ){
5778 p->rc = FTS5_CORRUPT;
5779 }else{
5780 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
5781 }
5782 }else{
5783 int nKeep, nByte;
5784 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
5785 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
5786 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
5787 p->rc = FTS5_CORRUPT;
5788 }else{
5789 buf1.n = nKeep;
5790 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
5791 }
5792
5793 if( p->rc==SQLITE_OK ){
5794 res = fts5BufferCompare(&buf1, &buf2);
5795 if( res<=0 ) p->rc = FTS5_CORRUPT;
5796 }
5797 }
5798 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
5799 }
5800
5801 fts5BufferFree(&buf1);
5802 fts5BufferFree(&buf2);
5803 }
5804
fts5IndexIntegrityCheckSegment(Fts5Index * p,Fts5StructureSegment * pSeg)5805 static void fts5IndexIntegrityCheckSegment(
5806 Fts5Index *p, /* FTS5 backend object */
5807 Fts5StructureSegment *pSeg /* Segment to check internal consistency */
5808 ){
5809 Fts5Config *pConfig = p->pConfig;
5810 sqlite3_stmt *pStmt = 0;
5811 int rc2;
5812 int iIdxPrevLeaf = pSeg->pgnoFirst-1;
5813 int iDlidxPrevLeaf = pSeg->pgnoLast;
5814
5815 if( pSeg->pgnoFirst==0 ) return;
5816
5817 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
5818 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d",
5819 pConfig->zDb, pConfig->zName, pSeg->iSegid
5820 ));
5821
5822 /* Iterate through the b-tree hierarchy. */
5823 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
5824 i64 iRow; /* Rowid for this leaf */
5825 Fts5Data *pLeaf; /* Data for this leaf */
5826
5827 int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
5828 const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
5829 int iIdxLeaf = sqlite3_column_int(pStmt, 2);
5830 int bIdxDlidx = sqlite3_column_int(pStmt, 3);
5831
5832 /* If the leaf in question has already been trimmed from the segment,
5833 ** ignore this b-tree entry. Otherwise, load it into memory. */
5834 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
5835 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
5836 pLeaf = fts5LeafRead(p, iRow);
5837 if( pLeaf==0 ) break;
5838
5839 /* Check that the leaf contains at least one term, and that it is equal
5840 ** to or larger than the split-key in zIdxTerm. Also check that if there
5841 ** is also a rowid pointer within the leaf page header, it points to a
5842 ** location before the term. */
5843 if( pLeaf->nn<=pLeaf->szLeaf ){
5844 p->rc = FTS5_CORRUPT;
5845 }else{
5846 int iOff; /* Offset of first term on leaf */
5847 int iRowidOff; /* Offset of first rowid on leaf */
5848 int nTerm; /* Size of term on leaf in bytes */
5849 int res; /* Comparison of term and split-key */
5850
5851 iOff = fts5LeafFirstTermOff(pLeaf);
5852 iRowidOff = fts5LeafFirstRowidOff(pLeaf);
5853 if( iRowidOff>=iOff ){
5854 p->rc = FTS5_CORRUPT;
5855 }else{
5856 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
5857 res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
5858 if( res==0 ) res = nTerm - nIdxTerm;
5859 if( res<0 ) p->rc = FTS5_CORRUPT;
5860 }
5861
5862 fts5IntegrityCheckPgidx(p, pLeaf);
5863 }
5864 fts5DataRelease(pLeaf);
5865 if( p->rc ) break;
5866
5867 /* Now check that the iter.nEmpty leaves following the current leaf
5868 ** (a) exist and (b) contain no terms. */
5869 fts5IndexIntegrityCheckEmpty(
5870 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
5871 );
5872 if( p->rc ) break;
5873
5874 /* If there is a doclist-index, check that it looks right. */
5875 if( bIdxDlidx ){
5876 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
5877 int iPrevLeaf = iIdxLeaf;
5878 int iSegid = pSeg->iSegid;
5879 int iPg = 0;
5880 i64 iKey;
5881
5882 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
5883 fts5DlidxIterEof(p, pDlidx)==0;
5884 fts5DlidxIterNext(p, pDlidx)
5885 ){
5886
5887 /* Check any rowid-less pages that occur before the current leaf. */
5888 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
5889 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
5890 pLeaf = fts5DataRead(p, iKey);
5891 if( pLeaf ){
5892 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
5893 fts5DataRelease(pLeaf);
5894 }
5895 }
5896 iPrevLeaf = fts5DlidxIterPgno(pDlidx);
5897
5898 /* Check that the leaf page indicated by the iterator really does
5899 ** contain the rowid suggested by the same. */
5900 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
5901 pLeaf = fts5DataRead(p, iKey);
5902 if( pLeaf ){
5903 i64 iRowid;
5904 int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
5905 ASSERT_SZLEAF_OK(pLeaf);
5906 if( iRowidOff>=pLeaf->szLeaf ){
5907 p->rc = FTS5_CORRUPT;
5908 }else{
5909 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
5910 if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
5911 }
5912 fts5DataRelease(pLeaf);
5913 }
5914 }
5915
5916 iDlidxPrevLeaf = iPg;
5917 fts5DlidxIterFree(pDlidx);
5918 fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
5919 }else{
5920 iDlidxPrevLeaf = pSeg->pgnoLast;
5921 /* TODO: Check there is no doclist index */
5922 }
5923
5924 iIdxPrevLeaf = iIdxLeaf;
5925 }
5926
5927 rc2 = sqlite3_finalize(pStmt);
5928 if( p->rc==SQLITE_OK ) p->rc = rc2;
5929
5930 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
5931 #if 0
5932 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
5933 p->rc = FTS5_CORRUPT;
5934 }
5935 #endif
5936 }
5937
5938
5939 /*
5940 ** Run internal checks to ensure that the FTS index (a) is internally
5941 ** consistent and (b) contains entries for which the XOR of the checksums
5942 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
5943 **
5944 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
5945 ** checksum does not match. Return SQLITE_OK if all checks pass without
5946 ** error, or some other SQLite error code if another error (e.g. OOM)
5947 ** occurs.
5948 */
sqlite3Fts5IndexIntegrityCheck(Fts5Index * p,u64 cksum)5949 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
5950 int eDetail = p->pConfig->eDetail;
5951 u64 cksum2 = 0; /* Checksum based on contents of indexes */
5952 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
5953 Fts5Iter *pIter; /* Used to iterate through entire index */
5954 Fts5Structure *pStruct; /* Index structure */
5955
5956 #ifdef SQLITE_DEBUG
5957 /* Used by extra internal tests only run if NDEBUG is not defined */
5958 u64 cksum3 = 0; /* Checksum based on contents of indexes */
5959 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
5960 #endif
5961 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
5962
5963 /* Load the FTS index structure */
5964 pStruct = fts5StructureRead(p);
5965
5966 /* Check that the internal nodes of each segment match the leaves */
5967 if( pStruct ){
5968 int iLvl, iSeg;
5969 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
5970 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
5971 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
5972 fts5IndexIntegrityCheckSegment(p, pSeg);
5973 }
5974 }
5975 }
5976
5977 /* The cksum argument passed to this function is a checksum calculated
5978 ** based on all expected entries in the FTS index (including prefix index
5979 ** entries). This block checks that a checksum calculated based on the
5980 ** actual contents of FTS index is identical.
5981 **
5982 ** Two versions of the same checksum are calculated. The first (stack
5983 ** variable cksum2) based on entries extracted from the full-text index
5984 ** while doing a linear scan of each individual index in turn.
5985 **
5986 ** As each term visited by the linear scans, a separate query for the
5987 ** same term is performed. cksum3 is calculated based on the entries
5988 ** extracted by these queries.
5989 */
5990 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
5991 fts5MultiIterEof(p, pIter)==0;
5992 fts5MultiIterNext(p, pIter, 0, 0)
5993 ){
5994 int n; /* Size of term in bytes */
5995 i64 iPos = 0; /* Position read from poslist */
5996 int iOff = 0; /* Offset within poslist */
5997 i64 iRowid = fts5MultiIterRowid(pIter);
5998 char *z = (char*)fts5MultiIterTerm(pIter, &n);
5999
6000 /* If this is a new term, query for it. Update cksum3 with the results. */
6001 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
6002
6003 if( eDetail==FTS5_DETAIL_NONE ){
6004 if( 0==fts5MultiIterIsEmpty(p, pIter) ){
6005 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
6006 }
6007 }else{
6008 poslist.n = 0;
6009 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
6010 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
6011 int iCol = FTS5_POS2COLUMN(iPos);
6012 int iTokOff = FTS5_POS2OFFSET(iPos);
6013 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
6014 }
6015 }
6016 }
6017 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
6018
6019 fts5MultiIterFree(pIter);
6020 if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
6021
6022 fts5StructureRelease(pStruct);
6023 #ifdef SQLITE_DEBUG
6024 fts5BufferFree(&term);
6025 #endif
6026 fts5BufferFree(&poslist);
6027 return fts5IndexReturn(p);
6028 }
6029
6030 /*************************************************************************
6031 **************************************************************************
6032 ** Below this point is the implementation of the fts5_decode() scalar
6033 ** function only.
6034 */
6035
6036 /*
6037 ** Decode a segment-data rowid from the %_data table. This function is
6038 ** the opposite of macro FTS5_SEGMENT_ROWID().
6039 */
fts5DecodeRowid(i64 iRowid,int * piSegid,int * pbDlidx,int * piHeight,int * piPgno)6040 static void fts5DecodeRowid(
6041 i64 iRowid, /* Rowid from %_data table */
6042 int *piSegid, /* OUT: Segment id */
6043 int *pbDlidx, /* OUT: Dlidx flag */
6044 int *piHeight, /* OUT: Height */
6045 int *piPgno /* OUT: Page number */
6046 ){
6047 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
6048 iRowid >>= FTS5_DATA_PAGE_B;
6049
6050 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
6051 iRowid >>= FTS5_DATA_HEIGHT_B;
6052
6053 *pbDlidx = (int)(iRowid & 0x0001);
6054 iRowid >>= FTS5_DATA_DLI_B;
6055
6056 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
6057 }
6058
fts5DebugRowid(int * pRc,Fts5Buffer * pBuf,i64 iKey)6059 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
6060 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */
6061 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
6062
6063 if( iSegid==0 ){
6064 if( iKey==FTS5_AVERAGES_ROWID ){
6065 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
6066 }else{
6067 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
6068 }
6069 }
6070 else{
6071 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
6072 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
6073 );
6074 }
6075 }
6076
fts5DebugStructure(int * pRc,Fts5Buffer * pBuf,Fts5Structure * p)6077 static void fts5DebugStructure(
6078 int *pRc, /* IN/OUT: error code */
6079 Fts5Buffer *pBuf,
6080 Fts5Structure *p
6081 ){
6082 int iLvl, iSeg; /* Iterate through levels, segments */
6083
6084 for(iLvl=0; iLvl<p->nLevel; iLvl++){
6085 Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
6086 sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
6087 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
6088 );
6089 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
6090 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
6091 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
6092 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
6093 );
6094 }
6095 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
6096 }
6097 }
6098
6099 /*
6100 ** This is part of the fts5_decode() debugging aid.
6101 **
6102 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
6103 ** function appends a human-readable representation of the same object
6104 ** to the buffer passed as the second argument.
6105 */
fts5DecodeStructure(int * pRc,Fts5Buffer * pBuf,const u8 * pBlob,int nBlob)6106 static void fts5DecodeStructure(
6107 int *pRc, /* IN/OUT: error code */
6108 Fts5Buffer *pBuf,
6109 const u8 *pBlob, int nBlob
6110 ){
6111 int rc; /* Return code */
6112 Fts5Structure *p = 0; /* Decoded structure object */
6113
6114 rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
6115 if( rc!=SQLITE_OK ){
6116 *pRc = rc;
6117 return;
6118 }
6119
6120 fts5DebugStructure(pRc, pBuf, p);
6121 fts5StructureRelease(p);
6122 }
6123
6124 /*
6125 ** This is part of the fts5_decode() debugging aid.
6126 **
6127 ** Arguments pBlob/nBlob contain an "averages" record. This function
6128 ** appends a human-readable representation of record to the buffer passed
6129 ** as the second argument.
6130 */
fts5DecodeAverages(int * pRc,Fts5Buffer * pBuf,const u8 * pBlob,int nBlob)6131 static void fts5DecodeAverages(
6132 int *pRc, /* IN/OUT: error code */
6133 Fts5Buffer *pBuf,
6134 const u8 *pBlob, int nBlob
6135 ){
6136 int i = 0;
6137 const char *zSpace = "";
6138
6139 while( i<nBlob ){
6140 u64 iVal;
6141 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
6142 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
6143 zSpace = " ";
6144 }
6145 }
6146
6147 /*
6148 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
6149 ** each varint and append its string representation to buffer pBuf. Return
6150 ** after either the input buffer is exhausted or a 0 value is read.
6151 **
6152 ** The return value is the number of bytes read from the input buffer.
6153 */
fts5DecodePoslist(int * pRc,Fts5Buffer * pBuf,const u8 * a,int n)6154 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6155 int iOff = 0;
6156 while( iOff<n ){
6157 int iVal;
6158 iOff += fts5GetVarint32(&a[iOff], iVal);
6159 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
6160 }
6161 return iOff;
6162 }
6163
6164 /*
6165 ** The start of buffer (a/n) contains the start of a doclist. The doclist
6166 ** may or may not finish within the buffer. This function appends a text
6167 ** representation of the part of the doclist that is present to buffer
6168 ** pBuf.
6169 **
6170 ** The return value is the number of bytes read from the input buffer.
6171 */
fts5DecodeDoclist(int * pRc,Fts5Buffer * pBuf,const u8 * a,int n)6172 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6173 i64 iDocid = 0;
6174 int iOff = 0;
6175
6176 if( n>0 ){
6177 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
6178 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6179 }
6180 while( iOff<n ){
6181 int nPos;
6182 int bDel;
6183 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
6184 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
6185 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
6186 if( iOff<n ){
6187 i64 iDelta;
6188 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
6189 iDocid += iDelta;
6190 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6191 }
6192 }
6193
6194 return iOff;
6195 }
6196
6197 /*
6198 ** This function is part of the fts5_decode() debugging function. It is
6199 ** only ever used with detail=none tables.
6200 **
6201 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
6202 ** tables. This function appends a human-readable version of that list to
6203 ** buffer pBuf.
6204 **
6205 ** If *pRc is other than SQLITE_OK when this function is called, it is a
6206 ** no-op. If an OOM or other error occurs within this function, *pRc is
6207 ** set to an SQLite error code before returning. The final state of buffer
6208 ** pBuf is undefined in this case.
6209 */
fts5DecodeRowidList(int * pRc,Fts5Buffer * pBuf,const u8 * pData,int nData)6210 static void fts5DecodeRowidList(
6211 int *pRc, /* IN/OUT: Error code */
6212 Fts5Buffer *pBuf, /* Buffer to append text to */
6213 const u8 *pData, int nData /* Data to decode list-of-rowids from */
6214 ){
6215 int i = 0;
6216 i64 iRowid = 0;
6217
6218 while( i<nData ){
6219 const char *zApp = "";
6220 u64 iVal;
6221 i += sqlite3Fts5GetVarint(&pData[i], &iVal);
6222 iRowid += iVal;
6223
6224 if( i<nData && pData[i]==0x00 ){
6225 i++;
6226 if( i<nData && pData[i]==0x00 ){
6227 i++;
6228 zApp = "+";
6229 }else{
6230 zApp = "*";
6231 }
6232 }
6233
6234 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
6235 }
6236 }
6237
6238 /*
6239 ** The implementation of user-defined scalar function fts5_decode().
6240 */
fts5DecodeFunction(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)6241 static void fts5DecodeFunction(
6242 sqlite3_context *pCtx, /* Function call context */
6243 int nArg, /* Number of args (always 2) */
6244 sqlite3_value **apVal /* Function arguments */
6245 ){
6246 i64 iRowid; /* Rowid for record being decoded */
6247 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
6248 const u8 *aBlob; int n; /* Record to decode */
6249 u8 *a = 0;
6250 Fts5Buffer s; /* Build up text to return here */
6251 int rc = SQLITE_OK; /* Return code */
6252 int nSpace = 0;
6253 int eDetailNone = (sqlite3_user_data(pCtx)!=0);
6254
6255 assert( nArg==2 );
6256 UNUSED_PARAM(nArg);
6257 memset(&s, 0, sizeof(Fts5Buffer));
6258 iRowid = sqlite3_value_int64(apVal[0]);
6259
6260 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
6261 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
6262 ** buffer overreads even if the record is corrupt. */
6263 n = sqlite3_value_bytes(apVal[1]);
6264 aBlob = sqlite3_value_blob(apVal[1]);
6265 nSpace = n + FTS5_DATA_ZERO_PADDING;
6266 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
6267 if( a==0 ) goto decode_out;
6268 memcpy(a, aBlob, n);
6269
6270
6271 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
6272
6273 fts5DebugRowid(&rc, &s, iRowid);
6274 if( bDlidx ){
6275 Fts5Data dlidx;
6276 Fts5DlidxLvl lvl;
6277
6278 dlidx.p = a;
6279 dlidx.nn = n;
6280
6281 memset(&lvl, 0, sizeof(Fts5DlidxLvl));
6282 lvl.pData = &dlidx;
6283 lvl.iLeafPgno = iPgno;
6284
6285 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
6286 sqlite3Fts5BufferAppendPrintf(&rc, &s,
6287 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
6288 );
6289 }
6290 }else if( iSegid==0 ){
6291 if( iRowid==FTS5_AVERAGES_ROWID ){
6292 fts5DecodeAverages(&rc, &s, a, n);
6293 }else{
6294 fts5DecodeStructure(&rc, &s, a, n);
6295 }
6296 }else if( eDetailNone ){
6297 Fts5Buffer term; /* Current term read from page */
6298 int szLeaf;
6299 int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6300 int iTermOff;
6301 int nKeep = 0;
6302 int iOff;
6303
6304 memset(&term, 0, sizeof(Fts5Buffer));
6305
6306 /* Decode any entries that occur before the first term. */
6307 if( szLeaf<n ){
6308 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
6309 }else{
6310 iTermOff = szLeaf;
6311 }
6312 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
6313
6314 iOff = iTermOff;
6315 while( iOff<szLeaf ){
6316 int nAppend;
6317
6318 /* Read the term data for the next term*/
6319 iOff += fts5GetVarint32(&a[iOff], nAppend);
6320 term.n = nKeep;
6321 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
6322 sqlite3Fts5BufferAppendPrintf(
6323 &rc, &s, " term=%.*s", term.n, (const char*)term.p
6324 );
6325 iOff += nAppend;
6326
6327 /* Figure out where the doclist for this term ends */
6328 if( iPgidxOff<n ){
6329 int nIncr;
6330 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
6331 iTermOff += nIncr;
6332 }else{
6333 iTermOff = szLeaf;
6334 }
6335
6336 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
6337 iOff = iTermOff;
6338 if( iOff<szLeaf ){
6339 iOff += fts5GetVarint32(&a[iOff], nKeep);
6340 }
6341 }
6342
6343 fts5BufferFree(&term);
6344 }else{
6345 Fts5Buffer term; /* Current term read from page */
6346 int szLeaf; /* Offset of pgidx in a[] */
6347 int iPgidxOff;
6348 int iPgidxPrev = 0; /* Previous value read from pgidx */
6349 int iTermOff = 0;
6350 int iRowidOff = 0;
6351 int iOff;
6352 int nDoclist;
6353
6354 memset(&term, 0, sizeof(Fts5Buffer));
6355
6356 if( n<4 ){
6357 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
6358 goto decode_out;
6359 }else{
6360 iRowidOff = fts5GetU16(&a[0]);
6361 iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6362 if( iPgidxOff<n ){
6363 fts5GetVarint32(&a[iPgidxOff], iTermOff);
6364 }
6365 }
6366
6367 /* Decode the position list tail at the start of the page */
6368 if( iRowidOff!=0 ){
6369 iOff = iRowidOff;
6370 }else if( iTermOff!=0 ){
6371 iOff = iTermOff;
6372 }else{
6373 iOff = szLeaf;
6374 }
6375 fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
6376
6377 /* Decode any more doclist data that appears on the page before the
6378 ** first term. */
6379 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
6380 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
6381
6382 while( iPgidxOff<n ){
6383 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
6384 int nByte; /* Bytes of data */
6385 int iEnd;
6386
6387 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
6388 iPgidxPrev += nByte;
6389 iOff = iPgidxPrev;
6390
6391 if( iPgidxOff<n ){
6392 fts5GetVarint32(&a[iPgidxOff], nByte);
6393 iEnd = iPgidxPrev + nByte;
6394 }else{
6395 iEnd = szLeaf;
6396 }
6397
6398 if( bFirst==0 ){
6399 iOff += fts5GetVarint32(&a[iOff], nByte);
6400 term.n = nByte;
6401 }
6402 iOff += fts5GetVarint32(&a[iOff], nByte);
6403 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
6404 iOff += nByte;
6405
6406 sqlite3Fts5BufferAppendPrintf(
6407 &rc, &s, " term=%.*s", term.n, (const char*)term.p
6408 );
6409 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
6410 }
6411
6412 fts5BufferFree(&term);
6413 }
6414
6415 decode_out:
6416 sqlite3_free(a);
6417 if( rc==SQLITE_OK ){
6418 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
6419 }else{
6420 sqlite3_result_error_code(pCtx, rc);
6421 }
6422 fts5BufferFree(&s);
6423 }
6424
6425 /*
6426 ** The implementation of user-defined scalar function fts5_rowid().
6427 */
fts5RowidFunction(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)6428 static void fts5RowidFunction(
6429 sqlite3_context *pCtx, /* Function call context */
6430 int nArg, /* Number of args (always 2) */
6431 sqlite3_value **apVal /* Function arguments */
6432 ){
6433 const char *zArg;
6434 if( nArg==0 ){
6435 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
6436 }else{
6437 zArg = (const char*)sqlite3_value_text(apVal[0]);
6438 if( 0==sqlite3_stricmp(zArg, "segment") ){
6439 i64 iRowid;
6440 int segid, pgno;
6441 if( nArg!=3 ){
6442 sqlite3_result_error(pCtx,
6443 "should be: fts5_rowid('segment', segid, pgno))", -1
6444 );
6445 }else{
6446 segid = sqlite3_value_int(apVal[1]);
6447 pgno = sqlite3_value_int(apVal[2]);
6448 iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
6449 sqlite3_result_int64(pCtx, iRowid);
6450 }
6451 }else{
6452 sqlite3_result_error(pCtx,
6453 "first arg to fts5_rowid() must be 'segment'" , -1
6454 );
6455 }
6456 }
6457 }
6458
6459 /*
6460 ** This is called as part of registering the FTS5 module with database
6461 ** connection db. It registers several user-defined scalar functions useful
6462 ** with FTS5.
6463 **
6464 ** If successful, SQLITE_OK is returned. If an error occurs, some other
6465 ** SQLite error code is returned instead.
6466 */
sqlite3Fts5IndexInit(sqlite3 * db)6467 int sqlite3Fts5IndexInit(sqlite3 *db){
6468 int rc = sqlite3_create_function(
6469 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
6470 );
6471
6472 if( rc==SQLITE_OK ){
6473 rc = sqlite3_create_function(
6474 db, "fts5_decode_none", 2,
6475 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
6476 );
6477 }
6478
6479 if( rc==SQLITE_OK ){
6480 rc = sqlite3_create_function(
6481 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
6482 );
6483 }
6484 return rc;
6485 }
6486
6487
sqlite3Fts5IndexReset(Fts5Index * p)6488 int sqlite3Fts5IndexReset(Fts5Index *p){
6489 assert( p->pStruct==0 || p->iStructVersion!=0 );
6490 if( fts5IndexDataVersion(p)!=p->iStructVersion ){
6491 fts5StructureInvalidate(p);
6492 }
6493 return fts5IndexReturn(p);
6494 }
6495