1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 */
13 
14 
15 
16 #include "fts5Int.h"
17 
sqlite3Fts5BufferSize(int * pRc,Fts5Buffer * pBuf,u32 nByte)18 int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){
19   if( (u32)pBuf->nSpace<nByte ){
20     u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64;
21     u8 *pNew;
22     while( nNew<nByte ){
23       nNew = nNew * 2;
24     }
25     pNew = sqlite3_realloc64(pBuf->p, nNew);
26     if( pNew==0 ){
27       *pRc = SQLITE_NOMEM;
28       return 1;
29     }else{
30       pBuf->nSpace = (int)nNew;
31       pBuf->p = pNew;
32     }
33   }
34   return 0;
35 }
36 
37 
38 /*
39 ** Encode value iVal as an SQLite varint and append it to the buffer object
40 ** pBuf. If an OOM error occurs, set the error code in p.
41 */
sqlite3Fts5BufferAppendVarint(int * pRc,Fts5Buffer * pBuf,i64 iVal)42 void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
43   if( fts5BufferGrow(pRc, pBuf, 9) ) return;
44   pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
45 }
46 
sqlite3Fts5Put32(u8 * aBuf,int iVal)47 void sqlite3Fts5Put32(u8 *aBuf, int iVal){
48   aBuf[0] = (iVal>>24) & 0x00FF;
49   aBuf[1] = (iVal>>16) & 0x00FF;
50   aBuf[2] = (iVal>> 8) & 0x00FF;
51   aBuf[3] = (iVal>> 0) & 0x00FF;
52 }
53 
sqlite3Fts5Get32(const u8 * aBuf)54 int sqlite3Fts5Get32(const u8 *aBuf){
55   return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]);
56 }
57 
58 /*
59 ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
60 ** the error code in p. If an error has already occurred when this function
61 ** is called, it is a no-op.
62 */
sqlite3Fts5BufferAppendBlob(int * pRc,Fts5Buffer * pBuf,u32 nData,const u8 * pData)63 void sqlite3Fts5BufferAppendBlob(
64   int *pRc,
65   Fts5Buffer *pBuf,
66   u32 nData,
67   const u8 *pData
68 ){
69   assert_nc( *pRc || nData>=0 );
70   if( nData ){
71     if( fts5BufferGrow(pRc, pBuf, nData) ) return;
72     memcpy(&pBuf->p[pBuf->n], pData, nData);
73     pBuf->n += nData;
74   }
75 }
76 
77 /*
78 ** Append the nul-terminated string zStr to the buffer pBuf. This function
79 ** ensures that the byte following the buffer data is set to 0x00, even
80 ** though this byte is not included in the pBuf->n count.
81 */
sqlite3Fts5BufferAppendString(int * pRc,Fts5Buffer * pBuf,const char * zStr)82 void sqlite3Fts5BufferAppendString(
83   int *pRc,
84   Fts5Buffer *pBuf,
85   const char *zStr
86 ){
87   int nStr = (int)strlen(zStr);
88   sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
89   pBuf->n--;
90 }
91 
92 /*
93 ** Argument zFmt is a printf() style format string. This function performs
94 ** the printf() style processing, then appends the results to buffer pBuf.
95 **
96 ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
97 ** following the buffer data is set to 0x00, even though this byte is not
98 ** included in the pBuf->n count.
99 */
sqlite3Fts5BufferAppendPrintf(int * pRc,Fts5Buffer * pBuf,char * zFmt,...)100 void sqlite3Fts5BufferAppendPrintf(
101   int *pRc,
102   Fts5Buffer *pBuf,
103   char *zFmt, ...
104 ){
105   if( *pRc==SQLITE_OK ){
106     char *zTmp;
107     va_list ap;
108     va_start(ap, zFmt);
109     zTmp = sqlite3_vmprintf(zFmt, ap);
110     va_end(ap);
111 
112     if( zTmp==0 ){
113       *pRc = SQLITE_NOMEM;
114     }else{
115       sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
116       sqlite3_free(zTmp);
117     }
118   }
119 }
120 
sqlite3Fts5Mprintf(int * pRc,const char * zFmt,...)121 char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
122   char *zRet = 0;
123   if( *pRc==SQLITE_OK ){
124     va_list ap;
125     va_start(ap, zFmt);
126     zRet = sqlite3_vmprintf(zFmt, ap);
127     va_end(ap);
128     if( zRet==0 ){
129       *pRc = SQLITE_NOMEM;
130     }
131   }
132   return zRet;
133 }
134 
135 
136 /*
137 ** Free any buffer allocated by pBuf. Zero the structure before returning.
138 */
sqlite3Fts5BufferFree(Fts5Buffer * pBuf)139 void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
140   sqlite3_free(pBuf->p);
141   memset(pBuf, 0, sizeof(Fts5Buffer));
142 }
143 
144 /*
145 ** Zero the contents of the buffer object. But do not free the associated
146 ** memory allocation.
147 */
sqlite3Fts5BufferZero(Fts5Buffer * pBuf)148 void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
149   pBuf->n = 0;
150 }
151 
152 /*
153 ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
154 ** the error code in p. If an error has already occurred when this function
155 ** is called, it is a no-op.
156 */
sqlite3Fts5BufferSet(int * pRc,Fts5Buffer * pBuf,int nData,const u8 * pData)157 void sqlite3Fts5BufferSet(
158   int *pRc,
159   Fts5Buffer *pBuf,
160   int nData,
161   const u8 *pData
162 ){
163   pBuf->n = 0;
164   sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
165 }
166 
sqlite3Fts5PoslistNext64(const u8 * a,int n,int * pi,i64 * piOff)167 int sqlite3Fts5PoslistNext64(
168   const u8 *a, int n,             /* Buffer containing poslist */
169   int *pi,                        /* IN/OUT: Offset within a[] */
170   i64 *piOff                      /* IN/OUT: Current offset */
171 ){
172   int i = *pi;
173   if( i>=n ){
174     /* EOF */
175     *piOff = -1;
176     return 1;
177   }else{
178     i64 iOff = *piOff;
179     int iVal;
180     fts5FastGetVarint32(a, i, iVal);
181     if( iVal<=1 ){
182       if( iVal==0 ){
183         *pi = i;
184         return 0;
185       }
186       fts5FastGetVarint32(a, i, iVal);
187       iOff = ((i64)iVal) << 32;
188       fts5FastGetVarint32(a, i, iVal);
189       if( iVal<2 ){
190         /* This is a corrupt record. So stop parsing it here. */
191         *piOff = -1;
192         return 1;
193       }
194     }
195     *piOff = iOff + ((iVal-2) & 0x7FFFFFFF);
196     *pi = i;
197     return 0;
198   }
199 }
200 
201 
202 /*
203 ** Advance the iterator object passed as the only argument. Return true
204 ** if the iterator reaches EOF, or false otherwise.
205 */
sqlite3Fts5PoslistReaderNext(Fts5PoslistReader * pIter)206 int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
207   if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
208     pIter->bEof = 1;
209   }
210   return pIter->bEof;
211 }
212 
sqlite3Fts5PoslistReaderInit(const u8 * a,int n,Fts5PoslistReader * pIter)213 int sqlite3Fts5PoslistReaderInit(
214   const u8 *a, int n,             /* Poslist buffer to iterate through */
215   Fts5PoslistReader *pIter        /* Iterator object to initialize */
216 ){
217   memset(pIter, 0, sizeof(*pIter));
218   pIter->a = a;
219   pIter->n = n;
220   sqlite3Fts5PoslistReaderNext(pIter);
221   return pIter->bEof;
222 }
223 
224 /*
225 ** Append position iPos to the position list being accumulated in buffer
226 ** pBuf, which must be already be large enough to hold the new data.
227 ** The previous position written to this list is *piPrev. *piPrev is set
228 ** to iPos before returning.
229 */
sqlite3Fts5PoslistSafeAppend(Fts5Buffer * pBuf,i64 * piPrev,i64 iPos)230 void sqlite3Fts5PoslistSafeAppend(
231   Fts5Buffer *pBuf,
232   i64 *piPrev,
233   i64 iPos
234 ){
235   static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
236   if( (iPos & colmask) != (*piPrev & colmask) ){
237     pBuf->p[pBuf->n++] = 1;
238     pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
239     *piPrev = (iPos & colmask);
240   }
241   pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
242   *piPrev = iPos;
243 }
244 
sqlite3Fts5PoslistWriterAppend(Fts5Buffer * pBuf,Fts5PoslistWriter * pWriter,i64 iPos)245 int sqlite3Fts5PoslistWriterAppend(
246   Fts5Buffer *pBuf,
247   Fts5PoslistWriter *pWriter,
248   i64 iPos
249 ){
250   int rc = 0;   /* Initialized only to suppress erroneous warning from Clang */
251   if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc;
252   sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos);
253   return SQLITE_OK;
254 }
255 
sqlite3Fts5MallocZero(int * pRc,sqlite3_int64 nByte)256 void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){
257   void *pRet = 0;
258   if( *pRc==SQLITE_OK ){
259     pRet = sqlite3_malloc64(nByte);
260     if( pRet==0 ){
261       if( nByte>0 ) *pRc = SQLITE_NOMEM;
262     }else{
263       memset(pRet, 0, (size_t)nByte);
264     }
265   }
266   return pRet;
267 }
268 
269 /*
270 ** Return a nul-terminated copy of the string indicated by pIn. If nIn
271 ** is non-negative, then it is the length of the string in bytes. Otherwise,
272 ** the length of the string is determined using strlen().
273 **
274 ** It is the responsibility of the caller to eventually free the returned
275 ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
276 */
sqlite3Fts5Strndup(int * pRc,const char * pIn,int nIn)277 char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
278   char *zRet = 0;
279   if( *pRc==SQLITE_OK ){
280     if( nIn<0 ){
281       nIn = (int)strlen(pIn);
282     }
283     zRet = (char*)sqlite3_malloc(nIn+1);
284     if( zRet ){
285       memcpy(zRet, pIn, nIn);
286       zRet[nIn] = '\0';
287     }else{
288       *pRc = SQLITE_NOMEM;
289     }
290   }
291   return zRet;
292 }
293 
294 
295 /*
296 ** Return true if character 't' may be part of an FTS5 bareword, or false
297 ** otherwise. Characters that may be part of barewords:
298 **
299 **   * All non-ASCII characters,
300 **   * The 52 upper and lower case ASCII characters, and
301 **   * The 10 integer ASCII characters.
302 **   * The underscore character "_" (0x5F).
303 **   * The unicode "subsitute" character (0x1A).
304 */
sqlite3Fts5IsBareword(char t)305 int sqlite3Fts5IsBareword(char t){
306   u8 aBareword[128] = {
307     0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x00 .. 0x0F */
308     0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 1, 0, 0, 0, 0, 0,   /* 0x10 .. 0x1F */
309     0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x20 .. 0x2F */
310     1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 0, 0, 0, 0, 0, 0,   /* 0x30 .. 0x3F */
311     0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x40 .. 0x4F */
312     1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 1,   /* 0x50 .. 0x5F */
313     0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x60 .. 0x6F */
314     1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 0    /* 0x70 .. 0x7F */
315   };
316 
317   return (t & 0x80) || aBareword[(int)t];
318 }
319 
320 
321 /*************************************************************************
322 */
323 typedef struct Fts5TermsetEntry Fts5TermsetEntry;
324 struct Fts5TermsetEntry {
325   char *pTerm;
326   int nTerm;
327   int iIdx;                       /* Index (main or aPrefix[] entry) */
328   Fts5TermsetEntry *pNext;
329 };
330 
331 struct Fts5Termset {
332   Fts5TermsetEntry *apHash[512];
333 };
334 
sqlite3Fts5TermsetNew(Fts5Termset ** pp)335 int sqlite3Fts5TermsetNew(Fts5Termset **pp){
336   int rc = SQLITE_OK;
337   *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
338   return rc;
339 }
340 
sqlite3Fts5TermsetAdd(Fts5Termset * p,int iIdx,const char * pTerm,int nTerm,int * pbPresent)341 int sqlite3Fts5TermsetAdd(
342   Fts5Termset *p,
343   int iIdx,
344   const char *pTerm, int nTerm,
345   int *pbPresent
346 ){
347   int rc = SQLITE_OK;
348   *pbPresent = 0;
349   if( p ){
350     int i;
351     u32 hash = 13;
352     Fts5TermsetEntry *pEntry;
353 
354     /* Calculate a hash value for this term. This is the same hash checksum
355     ** used by the fts5_hash.c module. This is not important for correct
356     ** operation of the module, but is necessary to ensure that some tests
357     ** designed to produce hash table collisions really do work.  */
358     for(i=nTerm-1; i>=0; i--){
359       hash = (hash << 3) ^ hash ^ pTerm[i];
360     }
361     hash = (hash << 3) ^ hash ^ iIdx;
362     hash = hash % ArraySize(p->apHash);
363 
364     for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
365       if( pEntry->iIdx==iIdx
366           && pEntry->nTerm==nTerm
367           && memcmp(pEntry->pTerm, pTerm, nTerm)==0
368       ){
369         *pbPresent = 1;
370         break;
371       }
372     }
373 
374     if( pEntry==0 ){
375       pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
376       if( pEntry ){
377         pEntry->pTerm = (char*)&pEntry[1];
378         pEntry->nTerm = nTerm;
379         pEntry->iIdx = iIdx;
380         memcpy(pEntry->pTerm, pTerm, nTerm);
381         pEntry->pNext = p->apHash[hash];
382         p->apHash[hash] = pEntry;
383       }
384     }
385   }
386 
387   return rc;
388 }
389 
sqlite3Fts5TermsetFree(Fts5Termset * p)390 void sqlite3Fts5TermsetFree(Fts5Termset *p){
391   if( p ){
392     u32 i;
393     for(i=0; i<ArraySize(p->apHash); i++){
394       Fts5TermsetEntry *pEntry = p->apHash[i];
395       while( pEntry ){
396         Fts5TermsetEntry *pDel = pEntry;
397         pEntry = pEntry->pNext;
398         sqlite3_free(pDel);
399       }
400     }
401     sqlite3_free(p);
402   }
403 }
404