1 /*
2 ** 2015-11-16
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This file implements a virtual table for SQLite3 around the LSM
14 ** storage engine from SQLite4.
15 **
16 ** USAGE
17 **
18 **   CREATE VIRTUAL TABLE demo USING lsm1(filename,key,keytype,value1,...);
19 **
20 ** The filename parameter is the name of the LSM database file, which is
21 ** separate and distinct from the SQLite3 database file.
22 **
23 ** The keytype must be one of: UINT, TEXT, BLOB.  All keys must be of that
24 ** one type.  "UINT" means unsigned integer.  The values may be of any
25 ** SQLite datatype: BLOB, TEXT, INTEGER, FLOAT, or NULL.
26 **
27 ** The virtual table contains read-only hidden columns:
28 **
29 **     lsm1_key	      A BLOB which is the raw LSM key.  If the "keytype"
30 **                    is BLOB or TEXT then this column is exactly the
31 **                    same as the key.  For the UINT keytype, this column
32 **                    will be a variable-length integer encoding of the key.
33 **
34 **     lsm1_value     A BLOB which is the raw LSM value.  All of the value
35 **                    columns are packed into this BLOB using the encoding
36 **                    described below.
37 **
38 ** Attempts to write values into the lsm1_key and lsm1_value columns are
39 ** silently ignored.
40 **
41 ** EXAMPLE
42 **
43 ** The virtual table declared this way:
44 **
45 **    CREATE VIRTUAL TABLE demo2 USING lsm1('x.lsm',id,UINT,a,b,c,d);
46 **
47 ** Results in a new virtual table named "demo2" that acts as if it has
48 ** the following schema:
49 **
50 **    CREATE TABLE demo2(
51 **      id UINT PRIMARY KEY ON CONFLICT REPLACE,
52 **      a ANY,
53 **      b ANY,
54 **      c ANY,
55 **      d ANY,
56 **      lsm1_key BLOB HIDDEN,
57 **      lsm1_value BLOB HIDDEN
58 **    ) WITHOUT ROWID;
59 **
60 **
61 **
62 ** INTERNALS
63 **
64 ** The key encoding for BLOB and TEXT is just a copy of the blob or text.
65 ** UTF-8 is used for text.  The key encoding for UINT is the variable-length
66 ** integer format at https://sqlite.org/src4/doc/trunk/www/varint.wiki.
67 **
68 ** The values are encoded as a single blob (since that is what lsm stores as
69 ** its content).  There is a "type integer" followed by "content" for each
70 ** value, alternating back and forth.  The content might be empty.
71 **
72 **    TYPE1  CONTENT1  TYPE2  CONTENT2  TYPE3  CONTENT3 ....
73 **
74 ** Each "type integer" is encoded as a variable-length integer in the
75 ** format of the link above.  Let the type integer be T.  The actual
76 ** datatype is an integer 0-5 equal to T%6.  Values 1 through 5 correspond
77 ** to SQLITE_INTEGER through SQLITE_NULL.  The size of the content in bytes
78 ** is T/6.  Type value 0 means that the value is an integer whose actual
79 ** values is T/6 and there is no content.  The type-value-0 integer format
80 ** only works for integers in the range of 0 through 40.
81 **
82 ** There is no content for NULL or type-0 integers.  For BLOB and TEXT
83 ** values, the content is the blob data or the UTF-8 text data.  For
84 ** non-negative integers X, the content is a variable-length integer X*2.
85 ** For negative integers Y, the content is varaible-length integer (1-Y)*2+1.
86 ** For FLOAT values, the content is the IEEE754 floating point value in
87 ** native byte-order.  This means that FLOAT values will be corrupted when
88 ** database file is moved between big-endian and little-endian machines.
89 */
90 #include "sqlite3ext.h"
91 SQLITE_EXTENSION_INIT1
92 #include "lsm.h"
93 #include <assert.h>
94 #include <string.h>
95 
96 /* Forward declaration of subclasses of virtual table objects */
97 typedef struct lsm1_vtab lsm1_vtab;
98 typedef struct lsm1_cursor lsm1_cursor;
99 typedef struct lsm1_vblob lsm1_vblob;
100 
101 /* Primitive types */
102 typedef unsigned char u8;
103 typedef unsigned int u32;
104 typedef sqlite3_uint64 u64;
105 
106 /* An open connection to an LSM table */
107 struct lsm1_vtab {
108   sqlite3_vtab base;          /* Base class - must be first */
109   lsm_db *pDb;                /* Open connection to the LSM table */
110   u8 keyType;                 /* SQLITE_BLOB, _TEXT, or _INTEGER */
111   u32 nVal;                   /* Number of value columns */
112 };
113 
114 
115 /* lsm1_cursor is a subclass of sqlite3_vtab_cursor which will
116 ** serve as the underlying representation of a cursor that scans
117 ** over rows of the result
118 */
119 struct lsm1_cursor {
120   sqlite3_vtab_cursor base;  /* Base class - must be first */
121   lsm_cursor *pLsmCur;       /* The LSM cursor */
122   u8 isDesc;                 /* 0: scan forward.  1: scan reverse */
123   u8 atEof;                  /* True if the scan is complete */
124   u8 bUnique;                /* True if no more than one row of output */
125   u8 *zData;                 /* Content of the current row */
126   u32 nData;                 /* Number of bytes in the current row */
127   u8 *aeType;                /* Types for all column values */
128   u32 *aiOfst;               /* Offsets to the various fields */
129   u32 *aiLen;                /* Length of each field */
130   u8 *pKey2;                 /* Loop termination key, or NULL */
131   u32 nKey2;                 /* Length of the loop termination key */
132 };
133 
134 /* An extensible buffer object.
135 **
136 ** Content can be appended.  Space to hold new content is automatically
137 ** allocated.
138 */
139 struct lsm1_vblob {
140   u8 *a;             /* Space to hold content, from sqlite3_malloc64() */
141   u64 n;             /* Bytes of space used */
142   u64 nAlloc;        /* Bytes of space allocated */
143   u8 errNoMem;       /* True if a memory allocation error has been seen */
144 };
145 
146 #if defined(__GNUC__)
147 #  define LSM1_NOINLINE  __attribute__((noinline))
148 #elif defined(_MSC_VER) && _MSC_VER>=1310
149 #  define LSM1_NOINLINE  __declspec(noinline)
150 #else
151 #  define LSM1_NOINLINE
152 #endif
153 
154 
155 /* Increase the available space in the vblob object so that it can hold
156 ** at least N more bytes.  Return the number of errors.
157 */
lsm1VblobEnlarge(lsm1_vblob * p,u32 N)158 static int lsm1VblobEnlarge(lsm1_vblob *p, u32 N){
159   if( p->n+N>p->nAlloc ){
160     if( p->errNoMem ) return 1;
161     p->nAlloc += N + (p->nAlloc ? p->nAlloc : N);
162     p->a = sqlite3_realloc64(p->a, p->nAlloc);
163     if( p->a==0 ){
164       p->n = 0;
165       p->nAlloc = 0;
166       p->errNoMem = 1;
167       return 1;
168     }
169     p->nAlloc = sqlite3_msize(p->a);
170   }
171   return 0;
172 }
173 
174 /* Append N bytes to a vblob after first enlarging it */
lsm1VblobEnlargeAndAppend(lsm1_vblob * p,const u8 * pData,u32 N)175 static LSM1_NOINLINE void lsm1VblobEnlargeAndAppend(
176   lsm1_vblob *p,
177   const u8 *pData,
178   u32 N
179 ){
180   if( p->n+N>p->nAlloc && lsm1VblobEnlarge(p, N) ) return;
181   memcpy(p->a+p->n, pData, N);
182   p->n += N;
183 }
184 
185 /* Append N bytes to a vblob */
lsm1VblobAppend(lsm1_vblob * p,const u8 * pData,u32 N)186 static void lsm1VblobAppend(lsm1_vblob *p, const u8 *pData, u32 N){
187   sqlite3_int64 n = p->n;
188   if( n+N>p->nAlloc ){
189     lsm1VblobEnlargeAndAppend(p, pData, N);
190   }else{
191     p->n += N;
192     memcpy(p->a+n, pData, N);
193   }
194 }
195 
196 /* append text to a vblob */
lsm1VblobAppendText(lsm1_vblob * p,const char * z)197 static void lsm1VblobAppendText(lsm1_vblob *p, const char *z){
198   lsm1VblobAppend(p, (u8*)z, (u32)strlen(z));
199 }
200 
201 /* Dequote the string */
lsm1Dequote(char * z)202 static void lsm1Dequote(char *z){
203   int j;
204   char cQuote = z[0];
205   size_t i, n;
206 
207   if( cQuote!='\'' && cQuote!='"' ) return;
208   n = strlen(z);
209   if( n<2 || z[n-1]!=z[0] ) return;
210   for(i=1, j=0; i<n-1; i++){
211     if( z[i]==cQuote && z[i+1]==cQuote ) i++;
212     z[j++] = z[i];
213   }
214   z[j] = 0;
215 }
216 
217 
218 /*
219 ** The lsm1Connect() method is invoked to create a new
220 ** lsm1_vtab that describes the virtual table.
221 */
lsm1Connect(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVtab,char ** pzErr)222 static int lsm1Connect(
223   sqlite3 *db,
224   void *pAux,
225   int argc, const char *const*argv,
226   sqlite3_vtab **ppVtab,
227   char **pzErr
228 ){
229   lsm1_vtab *pNew;
230   int rc;
231   char *zFilename;
232   u8 keyType = 0;
233   int i;
234   lsm1_vblob sql;
235   static const char *azTypes[] = { "UINT",         "TEXT",     "BLOB" };
236   static const u8 aeTypes[] =    { SQLITE_INTEGER, SQLITE_TEXT, SQLITE_BLOB };
237   static const char *azArgName[] = {"filename", "key", "key type", "value1" };
238 
239   for(i=0; i<sizeof(azArgName)/sizeof(azArgName[0]); i++){
240     if( argc<i+4 || argv[i+3]==0 || argv[i+3][0]==0 ){
241       *pzErr = sqlite3_mprintf("%s (%r) argument missing",
242                                azArgName[i], i+1);
243       return SQLITE_ERROR;
244     }
245   }
246   for(i=0; i<sizeof(azTypes)/sizeof(azTypes[0]); i++){
247     if( sqlite3_stricmp(azTypes[i],argv[5])==0 ){
248       keyType = aeTypes[i];
249       break;
250     }
251   }
252   if( keyType==0 ){
253     *pzErr = sqlite3_mprintf("key type should be INT, TEXT, or BLOB");
254     return SQLITE_ERROR;
255   }
256   *ppVtab = sqlite3_malloc( sizeof(*pNew) );
257   pNew = (lsm1_vtab*)*ppVtab;
258   if( pNew==0 ){
259     return SQLITE_NOMEM;
260   }
261   memset(pNew, 0, sizeof(*pNew));
262   pNew->keyType = keyType;
263   rc = lsm_new(0, &pNew->pDb);
264   if( rc ){
265     *pzErr = sqlite3_mprintf("lsm_new failed with error code %d",  rc);
266     rc = SQLITE_ERROR;
267     goto connect_failed;
268   }
269   zFilename = sqlite3_mprintf("%s", argv[3]);
270   lsm1Dequote(zFilename);
271   rc = lsm_open(pNew->pDb, zFilename);
272   sqlite3_free(zFilename);
273   if( rc ){
274     *pzErr = sqlite3_mprintf("lsm_open failed with %d", rc);
275     rc = SQLITE_ERROR;
276     goto connect_failed;
277   }
278 
279   memset(&sql, 0, sizeof(sql));
280   lsm1VblobAppendText(&sql, "CREATE TABLE x(");
281   lsm1VblobAppendText(&sql, argv[4]);
282   lsm1VblobAppendText(&sql, " ");
283   lsm1VblobAppendText(&sql, argv[5]);
284   lsm1VblobAppendText(&sql, " PRIMARY KEY");
285   for(i=6; i<argc; i++){
286     lsm1VblobAppendText(&sql, ", ");
287     lsm1VblobAppendText(&sql, argv[i]);
288     pNew->nVal++;
289   }
290   lsm1VblobAppendText(&sql,
291       ", lsm1_command HIDDEN"
292       ", lsm1_key HIDDEN"
293       ", lsm1_value HIDDEN) WITHOUT ROWID");
294   lsm1VblobAppend(&sql, (u8*)"", 1);
295   if( sql.errNoMem ){
296     rc = SQLITE_NOMEM;
297     goto connect_failed;
298   }
299   rc = sqlite3_declare_vtab(db, (const char*)sql.a);
300   sqlite3_free(sql.a);
301 
302 connect_failed:
303   if( rc!=SQLITE_OK ){
304     if( pNew ){
305       if( pNew->pDb ) lsm_close(pNew->pDb);
306       sqlite3_free(pNew);
307     }
308     *ppVtab = 0;
309   }
310   return rc;
311 }
312 
313 /*
314 ** This method is the destructor for lsm1_cursor objects.
315 */
lsm1Disconnect(sqlite3_vtab * pVtab)316 static int lsm1Disconnect(sqlite3_vtab *pVtab){
317   lsm1_vtab *p = (lsm1_vtab*)pVtab;
318   lsm_close(p->pDb);
319   sqlite3_free(p);
320   return SQLITE_OK;
321 }
322 
323 /*
324 ** Constructor for a new lsm1_cursor object.
325 */
lsm1Open(sqlite3_vtab * pVtab,sqlite3_vtab_cursor ** ppCursor)326 static int lsm1Open(sqlite3_vtab *pVtab, sqlite3_vtab_cursor **ppCursor){
327   lsm1_vtab *p = (lsm1_vtab*)pVtab;
328   lsm1_cursor *pCur;
329   int rc;
330   pCur = sqlite3_malloc64( sizeof(*pCur)
331                  + p->nVal*(sizeof(pCur->aiOfst)+sizeof(pCur->aiLen)+1) );
332   if( pCur==0 ) return SQLITE_NOMEM;
333   memset(pCur, 0, sizeof(*pCur));
334   pCur->aiOfst = (u32*)&pCur[1];
335   pCur->aiLen = &pCur->aiOfst[p->nVal];
336   pCur->aeType = (u8*)&pCur->aiLen[p->nVal];
337   *ppCursor = &pCur->base;
338   rc = lsm_csr_open(p->pDb, &pCur->pLsmCur);
339   if( rc==LSM_OK ){
340     rc = SQLITE_OK;
341   }else{
342     sqlite3_free(pCur);
343     *ppCursor = 0;
344     rc = SQLITE_ERROR;
345   }
346   return rc;
347 }
348 
349 /*
350 ** Destructor for a lsm1_cursor.
351 */
lsm1Close(sqlite3_vtab_cursor * cur)352 static int lsm1Close(sqlite3_vtab_cursor *cur){
353   lsm1_cursor *pCur = (lsm1_cursor*)cur;
354   sqlite3_free(pCur->pKey2);
355   lsm_csr_close(pCur->pLsmCur);
356   sqlite3_free(pCur);
357   return SQLITE_OK;
358 }
359 
360 
361 /*
362 ** Advance a lsm1_cursor to its next row of output.
363 */
lsm1Next(sqlite3_vtab_cursor * cur)364 static int lsm1Next(sqlite3_vtab_cursor *cur){
365   lsm1_cursor *pCur = (lsm1_cursor*)cur;
366   int rc = LSM_OK;
367   if( pCur->bUnique ){
368     pCur->atEof = 1;
369   }else{
370     if( pCur->isDesc ){
371       rc = lsm_csr_prev(pCur->pLsmCur);
372     }else{
373       rc = lsm_csr_next(pCur->pLsmCur);
374     }
375     if( rc==LSM_OK && lsm_csr_valid(pCur->pLsmCur)==0 ){
376       pCur->atEof = 1;
377     }
378     if( pCur->pKey2 && pCur->atEof==0 ){
379       const u8 *pVal;
380       u32 nVal;
381       assert( pCur->isDesc==0 );
382       rc = lsm_csr_key(pCur->pLsmCur, (const void**)&pVal, (int*)&nVal);
383       if( rc==LSM_OK ){
384         u32 len = pCur->nKey2;
385         int c;
386         if( len>nVal ) len = nVal;
387         c = memcmp(pVal, pCur->pKey2, len);
388         if( c==0 ) c = nVal - pCur->nKey2;
389         if( c>0 ) pCur->atEof = 1;
390       }
391     }
392     pCur->zData = 0;
393   }
394   return rc==LSM_OK ? SQLITE_OK : SQLITE_ERROR;
395 }
396 
397 /*
398 ** Return TRUE if the cursor has been moved off of the last
399 ** row of output.
400 */
lsm1Eof(sqlite3_vtab_cursor * cur)401 static int lsm1Eof(sqlite3_vtab_cursor *cur){
402   lsm1_cursor *pCur = (lsm1_cursor*)cur;
403   return pCur->atEof;
404 }
405 
406 /*
407 ** Rowids are not supported by the underlying virtual table.  So always
408 ** return 0 for the rowid.
409 */
lsm1Rowid(sqlite3_vtab_cursor * cur,sqlite_int64 * pRowid)410 static int lsm1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
411   *pRowid = 0;
412   return SQLITE_OK;
413 }
414 
415 /*
416 ** Type prefixes on LSM keys
417 */
418 #define LSM1_TYPE_NEGATIVE   0
419 #define LSM1_TYPE_POSITIVE   1
420 #define LSM1_TYPE_TEXT       2
421 #define LSM1_TYPE_BLOB       3
422 
423 /*
424 ** Write a 32-bit unsigned integer as 4 big-endian bytes.
425 */
varintWrite32(unsigned char * z,unsigned int y)426 static void varintWrite32(unsigned char *z, unsigned int y){
427   z[0] = (unsigned char)(y>>24);
428   z[1] = (unsigned char)(y>>16);
429   z[2] = (unsigned char)(y>>8);
430   z[3] = (unsigned char)(y);
431 }
432 
433 /*
434 ** Write a varint into z[].  The buffer z[] must be at least 9 characters
435 ** long to accommodate the largest possible varint.  Return the number of
436 ** bytes of z[] used.
437 */
lsm1PutVarint64(unsigned char * z,sqlite3_uint64 x)438 static int lsm1PutVarint64(unsigned char *z, sqlite3_uint64 x){
439   unsigned int w, y;
440   if( x<=240 ){
441     z[0] = (unsigned char)x;
442     return 1;
443   }
444   if( x<=2287 ){
445     y = (unsigned int)(x - 240);
446     z[0] = (unsigned char)(y/256 + 241);
447     z[1] = (unsigned char)(y%256);
448     return 2;
449   }
450   if( x<=67823 ){
451     y = (unsigned int)(x - 2288);
452     z[0] = 249;
453     z[1] = (unsigned char)(y/256);
454     z[2] = (unsigned char)(y%256);
455     return 3;
456   }
457   y = (unsigned int)x;
458   w = (unsigned int)(x>>32);
459   if( w==0 ){
460     if( y<=16777215 ){
461       z[0] = 250;
462       z[1] = (unsigned char)(y>>16);
463       z[2] = (unsigned char)(y>>8);
464       z[3] = (unsigned char)(y);
465       return 4;
466     }
467     z[0] = 251;
468     varintWrite32(z+1, y);
469     return 5;
470   }
471   if( w<=255 ){
472     z[0] = 252;
473     z[1] = (unsigned char)w;
474     varintWrite32(z+2, y);
475     return 6;
476   }
477   if( w<=65535 ){
478     z[0] = 253;
479     z[1] = (unsigned char)(w>>8);
480     z[2] = (unsigned char)w;
481     varintWrite32(z+3, y);
482     return 7;
483   }
484   if( w<=16777215 ){
485     z[0] = 254;
486     z[1] = (unsigned char)(w>>16);
487     z[2] = (unsigned char)(w>>8);
488     z[3] = (unsigned char)w;
489     varintWrite32(z+4, y);
490     return 8;
491   }
492   z[0] = 255;
493   varintWrite32(z+1, w);
494   varintWrite32(z+5, y);
495   return 9;
496 }
497 
498 /* Append non-negative integer x as a variable-length integer.
499 */
lsm1VblobAppendVarint(lsm1_vblob * p,sqlite3_uint64 x)500 static void lsm1VblobAppendVarint(lsm1_vblob *p, sqlite3_uint64 x){
501   sqlite3_int64 n = p->n;
502   if( n+9>p->nAlloc && lsm1VblobEnlarge(p, 9) ) return;
503   p->n += lsm1PutVarint64(p->a+p->n, x);
504 }
505 
506 /*
507 ** Decode the varint in the first n bytes z[].  Write the integer value
508 ** into *pResult and return the number of bytes in the varint.
509 **
510 ** If the decode fails because there are not enough bytes in z[] then
511 ** return 0;
512 */
lsm1GetVarint64(const unsigned char * z,int n,sqlite3_uint64 * pResult)513 static int lsm1GetVarint64(
514   const unsigned char *z,
515   int n,
516   sqlite3_uint64 *pResult
517 ){
518   unsigned int x;
519   if( n<1 ) return 0;
520   if( z[0]<=240 ){
521     *pResult = z[0];
522     return 1;
523   }
524   if( z[0]<=248 ){
525     if( n<2 ) return 0;
526     *pResult = (z[0]-241)*256 + z[1] + 240;
527     return 2;
528   }
529   if( n<z[0]-246 ) return 0;
530   if( z[0]==249 ){
531     *pResult = 2288 + 256*z[1] + z[2];
532     return 3;
533   }
534   if( z[0]==250 ){
535     *pResult = (z[1]<<16) + (z[2]<<8) + z[3];
536     return 4;
537   }
538   x = (z[1]<<24) + (z[2]<<16) + (z[3]<<8) + z[4];
539   if( z[0]==251 ){
540     *pResult = x;
541     return 5;
542   }
543   if( z[0]==252 ){
544     *pResult = (((sqlite3_uint64)x)<<8) + z[5];
545     return 6;
546   }
547   if( z[0]==253 ){
548     *pResult = (((sqlite3_uint64)x)<<16) + (z[5]<<8) + z[6];
549     return 7;
550   }
551   if( z[0]==254 ){
552     *pResult = (((sqlite3_uint64)x)<<24) + (z[5]<<16) + (z[6]<<8) + z[7];
553     return 8;
554   }
555   *pResult = (((sqlite3_uint64)x)<<32) +
556                (0xffffffff & ((z[5]<<24) + (z[6]<<16) + (z[7]<<8) + z[8]));
557   return 9;
558 }
559 
560 /* Encoded a signed integer as a varint.  Numbers close to zero uses fewer
561 ** bytes than numbers far away from zero.  However, the result is not in
562 ** lexicographical order.
563 **
564 ** Encoding:  Non-negative integer X is encoding as an unsigned
565 ** varint X*2.  Negative integer Y is encoding as an unsigned
566 ** varint (1-Y)*2 + 1.
567 */
lsm1PutSignedVarint64(u8 * z,sqlite3_int64 v)568 static int lsm1PutSignedVarint64(u8 *z, sqlite3_int64 v){
569   sqlite3_uint64 u;
570   if( v>=0 ){
571     u = (sqlite3_uint64)v;
572     return lsm1PutVarint64(z, u*2);
573   }else{
574     u = (sqlite3_uint64)(-1-v);
575     return lsm1PutVarint64(z, u*2+1);
576   }
577 }
578 
579 /* Decoded a signed varint. */
lsm1GetSignedVarint64(const unsigned char * z,int n,sqlite3_int64 * pResult)580 static int lsm1GetSignedVarint64(
581   const unsigned char *z,
582   int n,
583   sqlite3_int64 *pResult
584 ){
585   sqlite3_uint64 u = 0;
586   n = lsm1GetVarint64(z, n, &u);
587   if( u&1 ){
588     *pResult = -1 - (sqlite3_int64)(u>>1);
589   }else{
590     *pResult = (sqlite3_int64)(u>>1);
591   }
592   return n;
593 }
594 
595 
596 /*
597 ** Read the value part of the key-value pair and decode it into columns.
598 */
lsm1DecodeValues(lsm1_cursor * pCur)599 static int lsm1DecodeValues(lsm1_cursor *pCur){
600   lsm1_vtab *pTab = (lsm1_vtab*)(pCur->base.pVtab);
601   int i, n;
602   int rc;
603   u8 eType;
604   sqlite3_uint64 v;
605 
606   if( pCur->zData ) return 1;
607   rc = lsm_csr_value(pCur->pLsmCur, (const void**)&pCur->zData,
608                      (int*)&pCur->nData);
609   if( rc ) return 0;
610   for(i=n=0; i<pTab->nVal; i++){
611     v = 0;
612     n += lsm1GetVarint64(pCur->zData+n, pCur->nData-n, &v);
613     pCur->aeType[i] = eType = (u8)(v%6);
614     if( eType==0 ){
615       pCur->aiOfst[i] = (u32)(v/6);
616       pCur->aiLen[i] = 0;
617     }else{
618       pCur->aiOfst[i] = n;
619       n += (pCur->aiLen[i] = (u32)(v/6));
620     }
621     if( n>pCur->nData ) break;
622   }
623   if( i<pTab->nVal ){
624     pCur->zData = 0;
625     return 0;
626   }
627   return 1;
628 }
629 
630 /*
631 ** Return values of columns for the row at which the lsm1_cursor
632 ** is currently pointing.
633 */
lsm1Column(sqlite3_vtab_cursor * cur,sqlite3_context * ctx,int i)634 static int lsm1Column(
635   sqlite3_vtab_cursor *cur,   /* The cursor */
636   sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
637   int i                       /* Which column to return */
638 ){
639   lsm1_cursor *pCur = (lsm1_cursor*)cur;
640   lsm1_vtab *pTab = (lsm1_vtab*)(cur->pVtab);
641   if( i==0 ){
642     /* The key column */
643     const void *pVal;
644     int nVal;
645     if( lsm_csr_key(pCur->pLsmCur, &pVal, &nVal)==LSM_OK ){
646       if( pTab->keyType==SQLITE_BLOB ){
647         sqlite3_result_blob(ctx, pVal, nVal, SQLITE_TRANSIENT);
648       }else if( pTab->keyType==SQLITE_TEXT ){
649         sqlite3_result_text(ctx,(const char*)pVal, nVal, SQLITE_TRANSIENT);
650       }else{
651         const unsigned char *z = (const unsigned char*)pVal;
652         sqlite3_uint64 v1;
653         lsm1GetVarint64(z, nVal, &v1);
654         sqlite3_result_int64(ctx, (sqlite3_int64)v1);
655       }
656     }
657   }else if( i>pTab->nVal ){
658     if( i==pTab->nVal+2 ){  /* lsm1_key */
659       const void *pVal;
660       int nVal;
661       if( lsm_csr_key(pCur->pLsmCur, &pVal, &nVal)==LSM_OK ){
662         sqlite3_result_blob(ctx, pVal, nVal, SQLITE_TRANSIENT);
663       }
664     }else if( i==pTab->nVal+3 ){  /* lsm1_value */
665       const void *pVal;
666       int nVal;
667       if( lsm_csr_value(pCur->pLsmCur, &pVal, &nVal)==LSM_OK ){
668         sqlite3_result_blob(ctx, pVal, nVal, SQLITE_TRANSIENT);
669       }
670     }
671   }else if( lsm1DecodeValues(pCur) ){
672     /* The i-th value column (where leftmost is 1) */
673     const u8 *zData;
674     u32 nData;
675     i--;
676     zData = pCur->zData + pCur->aiOfst[i];
677     nData = pCur->aiLen[i];
678     switch( pCur->aeType[i] ){
679       case 0: {  /* in-line integer */
680         sqlite3_result_int(ctx, pCur->aiOfst[i]);
681         break;
682       }
683       case SQLITE_INTEGER: {
684         sqlite3_int64 v;
685         lsm1GetSignedVarint64(zData, nData, &v);
686         sqlite3_result_int64(ctx, v);
687         break;
688       }
689       case SQLITE_FLOAT: {
690         double v;
691         if( nData==sizeof(v) ){
692           memcpy(&v, zData, sizeof(v));
693           sqlite3_result_double(ctx, v);
694         }
695         break;
696       }
697       case SQLITE_TEXT: {
698         sqlite3_result_text(ctx, (const char*)zData, nData, SQLITE_TRANSIENT);
699         break;
700       }
701       case SQLITE_BLOB: {
702         sqlite3_result_blob(ctx, zData, nData, SQLITE_TRANSIENT);
703         break;
704       }
705       default: {
706          /* A NULL.  Do nothing */
707       }
708     }
709   }
710   return SQLITE_OK;
711 }
712 
713 /* Parameter "pValue" contains an SQL value that is to be used as
714 ** a key in an LSM table.  The type of the key is determined by
715 ** "keyType".  Extract the raw bytes used for the key in LSM1.
716 */
lsm1KeyFromValue(int keyType,sqlite3_value * pValue,u8 * pBuf,const u8 ** ppKey,int * pnKey)717 static void lsm1KeyFromValue(
718   int keyType,                 /* The key type */
719   sqlite3_value *pValue,       /* The key value */
720   u8 *pBuf,                    /* Storage space for a generated key */
721   const u8 **ppKey,            /* OUT: the bytes of the key */
722   int *pnKey                   /* OUT: size of the key */
723 ){
724   if( keyType==SQLITE_BLOB ){
725     *ppKey = (const u8*)sqlite3_value_blob(pValue);
726     *pnKey = sqlite3_value_bytes(pValue);
727   }else if( keyType==SQLITE_TEXT ){
728     *ppKey = (const u8*)sqlite3_value_text(pValue);
729     *pnKey = sqlite3_value_bytes(pValue);
730   }else{
731     sqlite3_int64 v = sqlite3_value_int64(pValue);
732     if( v<0 ) v = 0;
733     *pnKey = lsm1PutVarint64(pBuf, v);
734     *ppKey = pBuf;
735   }
736 }
737 
738 /* Move to the first row to return.
739 */
lsm1Filter(sqlite3_vtab_cursor * pVtabCursor,int idxNum,const char * idxStr,int argc,sqlite3_value ** argv)740 static int lsm1Filter(
741   sqlite3_vtab_cursor *pVtabCursor,
742   int idxNum, const char *idxStr,
743   int argc, sqlite3_value **argv
744 ){
745   lsm1_cursor *pCur = (lsm1_cursor *)pVtabCursor;
746   lsm1_vtab *pTab = (lsm1_vtab*)(pCur->base.pVtab);
747   int rc = LSM_OK;
748   int seekType = -1;
749   const u8 *pVal = 0;
750   int nVal;
751   u8 keyType = pTab->keyType;
752   u8 aKey1[16];
753 
754   pCur->atEof = 1;
755   sqlite3_free(pCur->pKey2);
756   pCur->pKey2 = 0;
757   if( idxNum<99 ){
758     lsm1KeyFromValue(keyType, argv[0], aKey1, &pVal, &nVal);
759   }
760   switch( idxNum ){
761     case 0: {   /* key==argv[0] */
762       assert( argc==1 );
763       seekType = LSM_SEEK_EQ;
764       pCur->isDesc = 0;
765       pCur->bUnique = 1;
766       break;
767     }
768     case 1: {  /* key>=argv[0] AND key<=argv[1] */
769       u8 aKey[12];
770       seekType = LSM_SEEK_GE;
771       pCur->isDesc = 0;
772       pCur->bUnique = 0;
773       if( keyType==SQLITE_INTEGER ){
774         sqlite3_int64 v = sqlite3_value_int64(argv[1]);
775         if( v<0 ) v = 0;
776         pCur->nKey2 = lsm1PutVarint64(aKey, (sqlite3_uint64)v);
777         pCur->pKey2 = sqlite3_malloc( pCur->nKey2 );
778         if( pCur->pKey2==0 ) return SQLITE_NOMEM;
779         memcpy(pCur->pKey2, aKey, pCur->nKey2);
780       }else{
781         pCur->nKey2 = sqlite3_value_bytes(argv[1]);
782         pCur->pKey2 = sqlite3_malloc( pCur->nKey2 );
783         if( pCur->pKey2==0 ) return SQLITE_NOMEM;
784         if( keyType==SQLITE_BLOB ){
785           memcpy(pCur->pKey2, sqlite3_value_blob(argv[1]), pCur->nKey2);
786         }else{
787           memcpy(pCur->pKey2, sqlite3_value_text(argv[1]), pCur->nKey2);
788         }
789       }
790       break;
791     }
792     case 2: {  /* key>=argv[0] */
793       seekType = LSM_SEEK_GE;
794       pCur->isDesc = 0;
795       pCur->bUnique = 0;
796       break;
797     }
798     case 3: {  /* key<=argv[0] */
799       seekType = LSM_SEEK_LE;
800       pCur->isDesc = 1;
801       pCur->bUnique = 0;
802       break;
803     }
804     default: { /* full table scan */
805       pCur->isDesc = 0;
806       pCur->bUnique = 0;
807       break;
808     }
809   }
810   if( pVal ){
811     rc = lsm_csr_seek(pCur->pLsmCur, pVal, nVal, seekType);
812   }else{
813     rc = lsm_csr_first(pCur->pLsmCur);
814   }
815   if( rc==LSM_OK && lsm_csr_valid(pCur->pLsmCur)!=0 ){
816     pCur->atEof = 0;
817   }
818   return rc==LSM_OK ? SQLITE_OK : SQLITE_ERROR;
819 }
820 
821 /*
822 ** Only comparisons against the key are allowed.  The idxNum defines
823 ** which comparisons are available:
824 **
825 **     0        key==?1
826 **     1        key>=?1 AND key<=?2
827 **     2        key>?1 or key>=?1
828 **     3        key<?1 or key<=?1
829 **    99        Full table scan only
830 */
lsm1BestIndex(sqlite3_vtab * tab,sqlite3_index_info * pIdxInfo)831 static int lsm1BestIndex(
832   sqlite3_vtab *tab,
833   sqlite3_index_info *pIdxInfo
834 ){
835   int i;                 /* Loop over constraints */
836   int idxNum = 99;       /* The query plan */
837   int nArg = 0;          /* Number of arguments to xFilter */
838   int argIdx = -1;       /* Index of the key== constraint, or -1 if none */
839   int iIdx2 = -1;        /* The index of the second key */
840   int omit1 = 0;
841   int omit2 = 0;
842 
843   const struct sqlite3_index_constraint *pConstraint;
844   pConstraint = pIdxInfo->aConstraint;
845   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
846     if( pConstraint->usable==0 ) continue;
847     if( pConstraint->iColumn!=0 ) continue;
848     switch( pConstraint->op ){
849       case SQLITE_INDEX_CONSTRAINT_EQ: {
850         if( idxNum>0 ){
851           argIdx = i;
852           iIdx2 = -1;
853           idxNum = 0;
854           omit1 = 1;
855         }
856         break;
857       }
858       case SQLITE_INDEX_CONSTRAINT_GE:
859       case SQLITE_INDEX_CONSTRAINT_GT: {
860         if( idxNum==99 ){
861           argIdx = i;
862           idxNum = 2;
863           omit1 = pConstraint->op==SQLITE_INDEX_CONSTRAINT_GE;
864         }else if( idxNum==3 ){
865           iIdx2 = idxNum;
866           omit2 = omit1;
867           argIdx = i;
868           idxNum = 1;
869           omit1 = pConstraint->op==SQLITE_INDEX_CONSTRAINT_GE;
870         }
871         break;
872       }
873       case SQLITE_INDEX_CONSTRAINT_LE:
874       case SQLITE_INDEX_CONSTRAINT_LT: {
875         if( idxNum==99 ){
876           argIdx = i;
877           idxNum = 3;
878           omit1 = pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE;
879         }else if( idxNum==2 ){
880           iIdx2 = i;
881           idxNum = 1;
882           omit1 = pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE;
883         }
884         break;
885       }
886     }
887   }
888   if( argIdx>=0 ){
889     pIdxInfo->aConstraintUsage[argIdx].argvIndex = ++nArg;
890     pIdxInfo->aConstraintUsage[argIdx].omit = omit1;
891   }
892   if( iIdx2>=0 ){
893     pIdxInfo->aConstraintUsage[iIdx2].argvIndex = ++nArg;
894     pIdxInfo->aConstraintUsage[iIdx2].omit = omit2;
895   }
896   if( idxNum==0 ){
897     pIdxInfo->estimatedCost = (double)1;
898     pIdxInfo->estimatedRows = 1;
899     pIdxInfo->orderByConsumed = 1;
900   }else if( idxNum==1 ){
901     pIdxInfo->estimatedCost = (double)100;
902     pIdxInfo->estimatedRows = 100;
903   }else if( idxNum<99 ){
904     pIdxInfo->estimatedCost = (double)5000;
905     pIdxInfo->estimatedRows = 5000;
906   }else{
907     /* Full table scan */
908     pIdxInfo->estimatedCost = (double)2147483647;
909     pIdxInfo->estimatedRows = 2147483647;
910   }
911   pIdxInfo->idxNum = idxNum;
912   return SQLITE_OK;
913 }
914 
915 /*
916 ** The xUpdate method is normally used for INSERT, REPLACE, UPDATE, and
917 ** DELETE.  But this virtual table only supports INSERT and REPLACE.
918 ** DELETE is accomplished by inserting a record with a value of NULL.
919 ** UPDATE is achieved by using REPLACE.
920 */
lsm1Update(sqlite3_vtab * pVTab,int argc,sqlite3_value ** argv,sqlite_int64 * pRowid)921 int lsm1Update(
922   sqlite3_vtab *pVTab,
923   int argc,
924   sqlite3_value **argv,
925   sqlite_int64 *pRowid
926 ){
927   lsm1_vtab *p = (lsm1_vtab*)pVTab;
928   int nKey, nKey2;
929   int i;
930   int rc = LSM_OK;
931   const u8 *pKey, *pKey2;
932   unsigned char aKey[16];
933   unsigned char pSpace[16];
934   lsm1_vblob val;
935 
936   if( argc==1 ){
937     /* DELETE the record whose key is argv[0] */
938     lsm1KeyFromValue(p->keyType, argv[0], aKey, &pKey, &nKey);
939     lsm_delete(p->pDb, pKey, nKey);
940     return SQLITE_OK;
941   }
942 
943   if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){
944     /* An UPDATE */
945     lsm1KeyFromValue(p->keyType, argv[0], aKey, &pKey, &nKey);
946     lsm1KeyFromValue(p->keyType, argv[1], pSpace, &pKey2, &nKey2);
947     if( nKey!=nKey2 || memcmp(pKey, pKey2, nKey)!=0 ){
948       /* The UPDATE changes the PRIMARY KEY value.  DELETE the old key */
949       lsm_delete(p->pDb, pKey, nKey);
950     }
951     /* Fall through into the INSERT case to complete the UPDATE */
952   }
953 
954   /* "INSERT INTO tab(lsm1_command) VALUES('....')" is used to implement
955   ** special commands.
956   */
957   if( sqlite3_value_type(argv[3+p->nVal])!=SQLITE_NULL ){
958     return SQLITE_OK;
959   }
960   lsm1KeyFromValue(p->keyType, argv[2], aKey, &pKey, &nKey);
961   memset(&val, 0, sizeof(val));
962   for(i=0; i<p->nVal; i++){
963     sqlite3_value *pArg = argv[3+i];
964     u8 eType = sqlite3_value_type(pArg);
965     switch( eType ){
966       case SQLITE_NULL: {
967         lsm1VblobAppendVarint(&val, SQLITE_NULL);
968         break;
969       }
970       case SQLITE_INTEGER: {
971         sqlite3_int64 v = sqlite3_value_int64(pArg);
972         if( v>=0 && v<=240/6 ){
973           lsm1VblobAppendVarint(&val, v*6);
974         }else{
975           int n = lsm1PutSignedVarint64(pSpace, v);
976           lsm1VblobAppendVarint(&val, SQLITE_INTEGER + n*6);
977           lsm1VblobAppend(&val, pSpace, n);
978         }
979         break;
980       }
981       case SQLITE_FLOAT: {
982         double r = sqlite3_value_double(pArg);
983         lsm1VblobAppendVarint(&val, SQLITE_FLOAT + 8*6);
984         lsm1VblobAppend(&val, (u8*)&r, sizeof(r));
985         break;
986       }
987       case SQLITE_BLOB: {
988         int n = sqlite3_value_bytes(pArg);
989         lsm1VblobAppendVarint(&val, n*6 + SQLITE_BLOB);
990         lsm1VblobAppend(&val, sqlite3_value_blob(pArg), n);
991         break;
992       }
993       case SQLITE_TEXT: {
994         int n = sqlite3_value_bytes(pArg);
995         lsm1VblobAppendVarint(&val, n*6 + SQLITE_TEXT);
996         lsm1VblobAppend(&val, sqlite3_value_text(pArg), n);
997         break;
998       }
999     }
1000   }
1001   if( val.errNoMem ){
1002     return SQLITE_NOMEM;
1003   }
1004   rc = lsm_insert(p->pDb, pKey, nKey, val.a, val.n);
1005   sqlite3_free(val.a);
1006   return rc==LSM_OK ? SQLITE_OK : SQLITE_ERROR;
1007 }
1008 
1009 /* Begin a transaction
1010 */
lsm1Begin(sqlite3_vtab * pVtab)1011 static int lsm1Begin(sqlite3_vtab *pVtab){
1012   lsm1_vtab *p = (lsm1_vtab*)pVtab;
1013   int rc = lsm_begin(p->pDb, 1);
1014   return rc==LSM_OK ? SQLITE_OK : SQLITE_ERROR;
1015 }
1016 
1017 /* Phase 1 of a transaction commit.
1018 */
lsm1Sync(sqlite3_vtab * pVtab)1019 static int lsm1Sync(sqlite3_vtab *pVtab){
1020   return SQLITE_OK;
1021 }
1022 
1023 /* Commit a transaction
1024 */
lsm1Commit(sqlite3_vtab * pVtab)1025 static int lsm1Commit(sqlite3_vtab *pVtab){
1026   lsm1_vtab *p = (lsm1_vtab*)pVtab;
1027   int rc = lsm_commit(p->pDb, 0);
1028   return rc==LSM_OK ? SQLITE_OK : SQLITE_ERROR;
1029 }
1030 
1031 /* Rollback a transaction
1032 */
lsm1Rollback(sqlite3_vtab * pVtab)1033 static int lsm1Rollback(sqlite3_vtab *pVtab){
1034   lsm1_vtab *p = (lsm1_vtab*)pVtab;
1035   int rc = lsm_rollback(p->pDb, 0);
1036   return rc==LSM_OK ? SQLITE_OK : SQLITE_ERROR;
1037 }
1038 
1039 /*
1040 ** This following structure defines all the methods for the
1041 ** generate_lsm1 virtual table.
1042 */
1043 static sqlite3_module lsm1Module = {
1044   0,                       /* iVersion */
1045   lsm1Connect,             /* xCreate */
1046   lsm1Connect,             /* xConnect */
1047   lsm1BestIndex,           /* xBestIndex */
1048   lsm1Disconnect,          /* xDisconnect */
1049   lsm1Disconnect,          /* xDestroy */
1050   lsm1Open,                /* xOpen - open a cursor */
1051   lsm1Close,               /* xClose - close a cursor */
1052   lsm1Filter,              /* xFilter - configure scan constraints */
1053   lsm1Next,                /* xNext - advance a cursor */
1054   lsm1Eof,                 /* xEof - check for end of scan */
1055   lsm1Column,              /* xColumn - read data */
1056   lsm1Rowid,               /* xRowid - read data */
1057   lsm1Update,              /* xUpdate */
1058   lsm1Begin,               /* xBegin */
1059   lsm1Sync,                /* xSync */
1060   lsm1Commit,              /* xCommit */
1061   lsm1Rollback,            /* xRollback */
1062   0,                       /* xFindMethod */
1063   0,                       /* xRename */
1064 };
1065 
1066 
1067 #ifdef _WIN32
1068 __declspec(dllexport)
1069 #endif
sqlite3_lsm_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)1070 int sqlite3_lsm_init(
1071   sqlite3 *db,
1072   char **pzErrMsg,
1073   const sqlite3_api_routines *pApi
1074 ){
1075   int rc = SQLITE_OK;
1076   SQLITE_EXTENSION_INIT2(pApi);
1077   rc = sqlite3_create_module(db, "lsm1", &lsm1Module, 0);
1078   return rc;
1079 }
1080