1 /*
2 ** 2016-05-28
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains the implementation of an SQLite virtual table for
14 ** reading CSV files.
15 **
16 ** Usage:
17 **
18 ** .load ./csv
19 ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20 ** SELECT * FROM csv;
21 **
22 ** The columns are named "c1", "c2", "c3", ... by default. Or the
23 ** application can define its own CREATE TABLE statement using the
24 ** schema= parameter, like this:
25 **
26 ** CREATE VIRTUAL TABLE temp.csv2 USING csv(
27 ** filename = "../http.log",
28 ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29 ** );
30 **
31 ** Instead of specifying a file, the text of the CSV can be loaded using
32 ** the data= parameter.
33 **
34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have
35 ** N columns. If both the columns= and schema= parameters are omitted, then
36 ** the number and names of the columns is determined by the first line of
37 ** the CSV input.
38 **
39 ** Some extra debugging features (used for testing virtual tables) are available
40 ** if this module is compiled with -DSQLITE_TEST.
41 */
42 #include <sqlite3ext.h>
43 SQLITE_EXTENSION_INIT1
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <stdarg.h>
48 #include <ctype.h>
49 #include <stdio.h>
50
51 #ifndef SQLITE_OMIT_VIRTUALTABLE
52
53 /*
54 ** A macro to hint to the compiler that a function should not be
55 ** inlined.
56 */
57 #if defined(__GNUC__)
58 # define CSV_NOINLINE __attribute__((noinline))
59 #elif defined(_MSC_VER) && _MSC_VER>=1310
60 # define CSV_NOINLINE __declspec(noinline)
61 #else
62 # define CSV_NOINLINE
63 #endif
64
65
66 /* Max size of the error message in a CsvReader */
67 #define CSV_MXERR 200
68
69 /* Size of the CsvReader input buffer */
70 #define CSV_INBUFSZ 1024
71
72 /* A context object used when read a CSV file. */
73 typedef struct CsvReader CsvReader;
74 struct CsvReader {
75 FILE *in; /* Read the CSV text from this input stream */
76 char *z; /* Accumulated text for a field */
77 int n; /* Number of bytes in z */
78 int nAlloc; /* Space allocated for z[] */
79 int nLine; /* Current line number */
80 int bNotFirst; /* True if prior text has been seen */
81 int cTerm; /* Character that terminated the most recent field */
82 size_t iIn; /* Next unread character in the input buffer */
83 size_t nIn; /* Number of characters in the input buffer */
84 char *zIn; /* The input buffer */
85 char zErr[CSV_MXERR]; /* Error message */
86 };
87
88 /* Initialize a CsvReader object */
csv_reader_init(CsvReader * p)89 static void csv_reader_init(CsvReader *p){
90 p->in = 0;
91 p->z = 0;
92 p->n = 0;
93 p->nAlloc = 0;
94 p->nLine = 0;
95 p->bNotFirst = 0;
96 p->nIn = 0;
97 p->zIn = 0;
98 p->zErr[0] = 0;
99 }
100
101 /* Close and reset a CsvReader object */
csv_reader_reset(CsvReader * p)102 static void csv_reader_reset(CsvReader *p){
103 if( p->in ){
104 fclose(p->in);
105 sqlite3_free(p->zIn);
106 }
107 sqlite3_free(p->z);
108 csv_reader_init(p);
109 }
110
111 /* Report an error on a CsvReader */
csv_errmsg(CsvReader * p,const char * zFormat,...)112 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
113 va_list ap;
114 va_start(ap, zFormat);
115 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
116 va_end(ap);
117 }
118
119 /* Open the file associated with a CsvReader
120 ** Return the number of errors.
121 */
csv_reader_open(CsvReader * p,const char * zFilename,const char * zData)122 static int csv_reader_open(
123 CsvReader *p, /* The reader to open */
124 const char *zFilename, /* Read from this filename */
125 const char *zData /* ... or use this data */
126 ){
127 if( zFilename ){
128 p->zIn = sqlite3_malloc( CSV_INBUFSZ );
129 if( p->zIn==0 ){
130 csv_errmsg(p, "out of memory");
131 return 1;
132 }
133 p->in = fopen(zFilename, "rb");
134 if( p->in==0 ){
135 sqlite3_free(p->zIn);
136 csv_reader_reset(p);
137 csv_errmsg(p, "cannot open '%s' for reading", zFilename);
138 return 1;
139 }
140 }else{
141 assert( p->in==0 );
142 p->zIn = (char*)zData;
143 p->nIn = strlen(zData);
144 }
145 return 0;
146 }
147
148 /* The input buffer has overflowed. Refill the input buffer, then
149 ** return the next character
150 */
csv_getc_refill(CsvReader * p)151 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
152 size_t got;
153
154 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
155 assert( p->in!=0 ); /* Only called if reading froma file */
156
157 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
158 if( got==0 ) return EOF;
159 p->nIn = got;
160 p->iIn = 1;
161 return p->zIn[0];
162 }
163
164 /* Return the next character of input. Return EOF at end of input. */
csv_getc(CsvReader * p)165 static int csv_getc(CsvReader *p){
166 if( p->iIn >= p->nIn ){
167 if( p->in!=0 ) return csv_getc_refill(p);
168 return EOF;
169 }
170 return ((unsigned char*)p->zIn)[p->iIn++];
171 }
172
173 /* Increase the size of p->z and append character c to the end.
174 ** Return 0 on success and non-zero if there is an OOM error */
csv_resize_and_append(CsvReader * p,char c)175 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
176 char *zNew;
177 int nNew = p->nAlloc*2 + 100;
178 zNew = sqlite3_realloc64(p->z, nNew);
179 if( zNew ){
180 p->z = zNew;
181 p->nAlloc = nNew;
182 p->z[p->n++] = c;
183 return 0;
184 }else{
185 csv_errmsg(p, "out of memory");
186 return 1;
187 }
188 }
189
190 /* Append a single character to the CsvReader.z[] array.
191 ** Return 0 on success and non-zero if there is an OOM error */
csv_append(CsvReader * p,char c)192 static int csv_append(CsvReader *p, char c){
193 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
194 p->z[p->n++] = c;
195 return 0;
196 }
197
198 /* Read a single field of CSV text. Compatible with rfc4180 and extended
199 ** with the option of having a separator other than ",".
200 **
201 ** + Input comes from p->in.
202 ** + Store results in p->z of length p->n. Space to hold p->z comes
203 ** from sqlite3_malloc64().
204 ** + Keep track of the line number in p->nLine.
205 ** + Store the character that terminates the field in p->cTerm. Store
206 ** EOF on end-of-file.
207 **
208 ** Return 0 at EOF or on OOM. On EOF, the p->cTerm character will have
209 ** been set to EOF.
210 */
csv_read_one_field(CsvReader * p)211 static char *csv_read_one_field(CsvReader *p){
212 int c;
213 p->n = 0;
214 c = csv_getc(p);
215 if( c==EOF ){
216 p->cTerm = EOF;
217 return 0;
218 }
219 if( c=='"' ){
220 int pc, ppc;
221 int startLine = p->nLine;
222 pc = ppc = 0;
223 while( 1 ){
224 c = csv_getc(p);
225 if( c<='"' || pc=='"' ){
226 if( c=='\n' ) p->nLine++;
227 if( c=='"' ){
228 if( pc=='"' ){
229 pc = 0;
230 continue;
231 }
232 }
233 if( (c==',' && pc=='"')
234 || (c=='\n' && pc=='"')
235 || (c=='\n' && pc=='\r' && ppc=='"')
236 || (c==EOF && pc=='"')
237 ){
238 do{ p->n--; }while( p->z[p->n]!='"' );
239 p->cTerm = (char)c;
240 break;
241 }
242 if( pc=='"' && c!='\r' ){
243 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
244 break;
245 }
246 if( c==EOF ){
247 csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
248 startLine, '"');
249 p->cTerm = (char)c;
250 break;
251 }
252 }
253 if( csv_append(p, (char)c) ) return 0;
254 ppc = pc;
255 pc = c;
256 }
257 }else{
258 /* If this is the first field being parsed and it begins with the
259 ** UTF-8 BOM (0xEF BB BF) then skip the BOM */
260 if( (c&0xff)==0xef && p->bNotFirst==0 ){
261 csv_append(p, (char)c);
262 c = csv_getc(p);
263 if( (c&0xff)==0xbb ){
264 csv_append(p, (char)c);
265 c = csv_getc(p);
266 if( (c&0xff)==0xbf ){
267 p->bNotFirst = 1;
268 p->n = 0;
269 return csv_read_one_field(p);
270 }
271 }
272 }
273 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
274 if( csv_append(p, (char)c) ) return 0;
275 c = csv_getc(p);
276 }
277 if( c=='\n' ){
278 p->nLine++;
279 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
280 }
281 p->cTerm = (char)c;
282 }
283 if( p->z ) p->z[p->n] = 0;
284 p->bNotFirst = 1;
285 return p->z;
286 }
287
288
289 /* Forward references to the various virtual table methods implemented
290 ** in this file. */
291 static int csvtabCreate(sqlite3*, void*, int, const char*const*,
292 sqlite3_vtab**,char**);
293 static int csvtabConnect(sqlite3*, void*, int, const char*const*,
294 sqlite3_vtab**,char**);
295 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
296 static int csvtabDisconnect(sqlite3_vtab*);
297 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
298 static int csvtabClose(sqlite3_vtab_cursor*);
299 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
300 int argc, sqlite3_value **argv);
301 static int csvtabNext(sqlite3_vtab_cursor*);
302 static int csvtabEof(sqlite3_vtab_cursor*);
303 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
304 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
305
306 /* An instance of the CSV virtual table */
307 typedef struct CsvTable {
308 sqlite3_vtab base; /* Base class. Must be first */
309 char *zFilename; /* Name of the CSV file */
310 char *zData; /* Raw CSV data in lieu of zFilename */
311 long iStart; /* Offset to start of data in zFilename */
312 int nCol; /* Number of columns in the CSV file */
313 unsigned int tstFlags; /* Bit values used for testing */
314 } CsvTable;
315
316 /* Allowed values for tstFlags */
317 #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
318
319 /* A cursor for the CSV virtual table */
320 typedef struct CsvCursor {
321 sqlite3_vtab_cursor base; /* Base class. Must be first */
322 CsvReader rdr; /* The CsvReader object */
323 char **azVal; /* Value of the current row */
324 int *aLen; /* Length of each entry */
325 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
326 } CsvCursor;
327
328 /* Transfer error message text from a reader into a CsvTable */
csv_xfer_error(CsvTable * pTab,CsvReader * pRdr)329 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
330 sqlite3_free(pTab->base.zErrMsg);
331 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
332 }
333
334 /*
335 ** This method is the destructor fo a CsvTable object.
336 */
csvtabDisconnect(sqlite3_vtab * pVtab)337 static int csvtabDisconnect(sqlite3_vtab *pVtab){
338 CsvTable *p = (CsvTable*)pVtab;
339 sqlite3_free(p->zFilename);
340 sqlite3_free(p->zData);
341 sqlite3_free(p);
342 return SQLITE_OK;
343 }
344
345 /* Skip leading whitespace. Return a pointer to the first non-whitespace
346 ** character, or to the zero terminator if the string has only whitespace */
csv_skip_whitespace(const char * z)347 static const char *csv_skip_whitespace(const char *z){
348 while( isspace((unsigned char)z[0]) ) z++;
349 return z;
350 }
351
352 /* Remove trailing whitespace from the end of string z[] */
csv_trim_whitespace(char * z)353 static void csv_trim_whitespace(char *z){
354 size_t n = strlen(z);
355 while( n>0 && isspace((unsigned char)z[n]) ) n--;
356 z[n] = 0;
357 }
358
359 /* Dequote the string */
csv_dequote(char * z)360 static void csv_dequote(char *z){
361 int j;
362 char cQuote = z[0];
363 size_t i, n;
364
365 if( cQuote!='\'' && cQuote!='"' ) return;
366 n = strlen(z);
367 if( n<2 || z[n-1]!=z[0] ) return;
368 for(i=1, j=0; i<n-1; i++){
369 if( z[i]==cQuote && z[i+1]==cQuote ) i++;
370 z[j++] = z[i];
371 }
372 z[j] = 0;
373 }
374
375 /* Check to see if the string is of the form: "TAG = VALUE" with optional
376 ** whitespace before and around tokens. If it is, return a pointer to the
377 ** first character of VALUE. If it is not, return NULL.
378 */
csv_parameter(const char * zTag,int nTag,const char * z)379 static const char *csv_parameter(const char *zTag, int nTag, const char *z){
380 z = csv_skip_whitespace(z);
381 if( strncmp(zTag, z, nTag)!=0 ) return 0;
382 z = csv_skip_whitespace(z+nTag);
383 if( z[0]!='=' ) return 0;
384 return csv_skip_whitespace(z+1);
385 }
386
387 /* Decode a parameter that requires a dequoted string.
388 **
389 ** Return 1 if the parameter is seen, or 0 if not. 1 is returned
390 ** even if there is an error. If an error occurs, then an error message
391 ** is left in p->zErr. If there are no errors, p->zErr[0]==0.
392 */
csv_string_parameter(CsvReader * p,const char * zParam,const char * zArg,char ** pzVal)393 static int csv_string_parameter(
394 CsvReader *p, /* Leave the error message here, if there is one */
395 const char *zParam, /* Parameter we are checking for */
396 const char *zArg, /* Raw text of the virtual table argment */
397 char **pzVal /* Write the dequoted string value here */
398 ){
399 const char *zValue;
400 zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
401 if( zValue==0 ) return 0;
402 p->zErr[0] = 0;
403 if( *pzVal ){
404 csv_errmsg(p, "more than one '%s' parameter", zParam);
405 return 1;
406 }
407 *pzVal = sqlite3_mprintf("%s", zValue);
408 if( *pzVal==0 ){
409 csv_errmsg(p, "out of memory");
410 return 1;
411 }
412 csv_trim_whitespace(*pzVal);
413 csv_dequote(*pzVal);
414 return 1;
415 }
416
417
418 /* Return 0 if the argument is false and 1 if it is true. Return -1 if
419 ** we cannot really tell.
420 */
csv_boolean(const char * z)421 static int csv_boolean(const char *z){
422 if( sqlite3_stricmp("yes",z)==0
423 || sqlite3_stricmp("on",z)==0
424 || sqlite3_stricmp("true",z)==0
425 || (z[0]=='1' && z[1]==0)
426 ){
427 return 1;
428 }
429 if( sqlite3_stricmp("no",z)==0
430 || sqlite3_stricmp("off",z)==0
431 || sqlite3_stricmp("false",z)==0
432 || (z[0]=='0' && z[1]==0)
433 ){
434 return 0;
435 }
436 return -1;
437 }
438
439 /* Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG".
440 ** If it is, set *pValue to be the value of the boolean ("true" if there is
441 ** not "= BOOLEAN" component) and return non-zero. If the input string
442 ** does not begin with TAG, return zero.
443 */
csv_boolean_parameter(const char * zTag,int nTag,const char * z,int * pValue)444 static int csv_boolean_parameter(
445 const char *zTag, /* Tag we are looking for */
446 int nTag, /* Size of the tag in bytes */
447 const char *z, /* Input parameter */
448 int *pValue /* Write boolean value here */
449 ){
450 int b;
451 z = csv_skip_whitespace(z);
452 if( strncmp(zTag, z, nTag)!=0 ) return 0;
453 z = csv_skip_whitespace(z + nTag);
454 if( z[0]==0 ){
455 *pValue = 1;
456 return 1;
457 }
458 if( z[0]!='=' ) return 0;
459 z = csv_skip_whitespace(z+1);
460 b = csv_boolean(z);
461 if( b>=0 ){
462 *pValue = b;
463 return 1;
464 }
465 return 0;
466 }
467
468 /*
469 ** Parameters:
470 ** filename=FILENAME Name of file containing CSV content
471 ** data=TEXT Direct CSV content.
472 ** schema=SCHEMA Alternative CSV schema.
473 ** header=YES|NO First row of CSV defines the names of
474 ** columns if "yes". Default "no".
475 ** columns=N Assume the CSV file contains N columns.
476 **
477 ** Only available if compiled with SQLITE_TEST:
478 **
479 ** testflags=N Bitmask of test flags. Optional
480 **
481 ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
482 ** and so forth. If columns=N is omitted, then the file is opened and
483 ** the number of columns in the first row is counted to determine the
484 ** column count. If header=YES, then the first row is skipped.
485 */
csvtabConnect(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVtab,char ** pzErr)486 static int csvtabConnect(
487 sqlite3 *db,
488 void *pAux,
489 int argc, const char *const*argv,
490 sqlite3_vtab **ppVtab,
491 char **pzErr
492 ){
493 CsvTable *pNew = 0; /* The CsvTable object to construct */
494 int bHeader = -1; /* header= flags. -1 means not seen yet */
495 int rc = SQLITE_OK; /* Result code from this routine */
496 int i, j; /* Loop counters */
497 #ifdef SQLITE_TEST
498 int tstFlags = 0; /* Value for testflags=N parameter */
499 #endif
500 int b; /* Value of a boolean parameter */
501 int nCol = -99; /* Value of the columns= parameter */
502 CsvReader sRdr; /* A CSV file reader used to store an error
503 ** message and/or to count the number of columns */
504 static const char *azParam[] = {
505 "filename", "data", "schema",
506 };
507 char *azPValue[3]; /* Parameter values */
508 # define CSV_FILENAME (azPValue[0])
509 # define CSV_DATA (azPValue[1])
510 # define CSV_SCHEMA (azPValue[2])
511
512
513 assert( sizeof(azPValue)==sizeof(azParam) );
514 memset(&sRdr, 0, sizeof(sRdr));
515 memset(azPValue, 0, sizeof(azPValue));
516 for(i=3; i<argc; i++){
517 const char *z = argv[i];
518 const char *zValue;
519 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
520 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
521 }
522 if( j<sizeof(azParam)/sizeof(azParam[0]) ){
523 if( sRdr.zErr[0] ) goto csvtab_connect_error;
524 }else
525 if( csv_boolean_parameter("header",6,z,&b) ){
526 if( bHeader>=0 ){
527 csv_errmsg(&sRdr, "more than one 'header' parameter");
528 goto csvtab_connect_error;
529 }
530 bHeader = b;
531 }else
532 #ifdef SQLITE_TEST
533 if( (zValue = csv_parameter("testflags",9,z))!=0 ){
534 tstFlags = (unsigned int)atoi(zValue);
535 }else
536 #endif
537 if( (zValue = csv_parameter("columns",7,z))!=0 ){
538 if( nCol>0 ){
539 csv_errmsg(&sRdr, "more than one 'columns' parameter");
540 goto csvtab_connect_error;
541 }
542 nCol = atoi(zValue);
543 if( nCol<=0 ){
544 csv_errmsg(&sRdr, "column= value must be positive");
545 goto csvtab_connect_error;
546 }
547 }else
548 {
549 csv_errmsg(&sRdr, "bad parameter: '%s'", z);
550 goto csvtab_connect_error;
551 }
552 }
553 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
554 csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
555 goto csvtab_connect_error;
556 }
557
558 if( (nCol<=0 || bHeader==1)
559 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
560 ){
561 goto csvtab_connect_error;
562 }
563 pNew = sqlite3_malloc( sizeof(*pNew) );
564 *ppVtab = (sqlite3_vtab*)pNew;
565 if( pNew==0 ) goto csvtab_connect_oom;
566 memset(pNew, 0, sizeof(*pNew));
567 if( CSV_SCHEMA==0 ){
568 sqlite3_str *pStr = sqlite3_str_new(0);
569 char *zSep = "";
570 int iCol = 0;
571 sqlite3_str_appendf(pStr, "CREATE TABLE x(");
572 if( nCol<0 && bHeader<1 ){
573 nCol = 0;
574 do{
575 csv_read_one_field(&sRdr);
576 nCol++;
577 }while( sRdr.cTerm==',' );
578 }
579 if( nCol>0 && bHeader<1 ){
580 for(iCol=0; iCol<nCol; iCol++){
581 sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
582 zSep = ",";
583 }
584 }else{
585 do{
586 char *z = csv_read_one_field(&sRdr);
587 if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
588 sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
589 zSep = ",";
590 iCol++;
591 }
592 }while( sRdr.cTerm==',' );
593 if( nCol<0 ){
594 nCol = iCol;
595 }else{
596 while( iCol<nCol ){
597 sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
598 zSep = ",";
599 }
600 }
601 }
602 pNew->nCol = nCol;
603 sqlite3_str_appendf(pStr, ")");
604 CSV_SCHEMA = sqlite3_str_finish(pStr);
605 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
606 }else if( nCol<0 ){
607 do{
608 csv_read_one_field(&sRdr);
609 pNew->nCol++;
610 }while( sRdr.cTerm==',' );
611 }else{
612 pNew->nCol = nCol;
613 }
614 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
615 pNew->zData = CSV_DATA; CSV_DATA = 0;
616 #ifdef SQLITE_TEST
617 pNew->tstFlags = tstFlags;
618 #endif
619 if( bHeader!=1 ){
620 pNew->iStart = 0;
621 }else if( pNew->zData ){
622 pNew->iStart = (int)sRdr.iIn;
623 }else{
624 pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn);
625 }
626 csv_reader_reset(&sRdr);
627 rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
628 if( rc ){
629 csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
630 goto csvtab_connect_error;
631 }
632 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
633 sqlite3_free(azPValue[i]);
634 }
635 /* Rationale for DIRECTONLY:
636 ** An attacker who controls a database schema could use this vtab
637 ** to exfiltrate sensitive data from other files in the filesystem.
638 ** And, recommended practice is to put all CSV virtual tables in the
639 ** TEMP namespace, so they should still be usable from within TEMP
640 ** views, so there shouldn't be a serious loss of functionality by
641 ** prohibiting the use of this vtab from persistent triggers and views.
642 */
643 sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
644 return SQLITE_OK;
645
646 csvtab_connect_oom:
647 rc = SQLITE_NOMEM;
648 csv_errmsg(&sRdr, "out of memory");
649
650 csvtab_connect_error:
651 if( pNew ) csvtabDisconnect(&pNew->base);
652 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
653 sqlite3_free(azPValue[i]);
654 }
655 if( sRdr.zErr[0] ){
656 sqlite3_free(*pzErr);
657 *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
658 }
659 csv_reader_reset(&sRdr);
660 if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
661 return rc;
662 }
663
664 /*
665 ** Reset the current row content held by a CsvCursor.
666 */
csvtabCursorRowReset(CsvCursor * pCur)667 static void csvtabCursorRowReset(CsvCursor *pCur){
668 CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
669 int i;
670 for(i=0; i<pTab->nCol; i++){
671 sqlite3_free(pCur->azVal[i]);
672 pCur->azVal[i] = 0;
673 pCur->aLen[i] = 0;
674 }
675 }
676
677 /*
678 ** The xConnect and xCreate methods do the same thing, but they must be
679 ** different so that the virtual table is not an eponymous virtual table.
680 */
csvtabCreate(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVtab,char ** pzErr)681 static int csvtabCreate(
682 sqlite3 *db,
683 void *pAux,
684 int argc, const char *const*argv,
685 sqlite3_vtab **ppVtab,
686 char **pzErr
687 ){
688 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
689 }
690
691 /*
692 ** Destructor for a CsvCursor.
693 */
csvtabClose(sqlite3_vtab_cursor * cur)694 static int csvtabClose(sqlite3_vtab_cursor *cur){
695 CsvCursor *pCur = (CsvCursor*)cur;
696 csvtabCursorRowReset(pCur);
697 csv_reader_reset(&pCur->rdr);
698 sqlite3_free(cur);
699 return SQLITE_OK;
700 }
701
702 /*
703 ** Constructor for a new CsvTable cursor object.
704 */
csvtabOpen(sqlite3_vtab * p,sqlite3_vtab_cursor ** ppCursor)705 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
706 CsvTable *pTab = (CsvTable*)p;
707 CsvCursor *pCur;
708 size_t nByte;
709 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
710 pCur = sqlite3_malloc64( nByte );
711 if( pCur==0 ) return SQLITE_NOMEM;
712 memset(pCur, 0, nByte);
713 pCur->azVal = (char**)&pCur[1];
714 pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
715 *ppCursor = &pCur->base;
716 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
717 csv_xfer_error(pTab, &pCur->rdr);
718 return SQLITE_ERROR;
719 }
720 return SQLITE_OK;
721 }
722
723
724 /*
725 ** Advance a CsvCursor to its next row of input.
726 ** Set the EOF marker if we reach the end of input.
727 */
csvtabNext(sqlite3_vtab_cursor * cur)728 static int csvtabNext(sqlite3_vtab_cursor *cur){
729 CsvCursor *pCur = (CsvCursor*)cur;
730 CsvTable *pTab = (CsvTable*)cur->pVtab;
731 int i = 0;
732 char *z;
733 do{
734 z = csv_read_one_field(&pCur->rdr);
735 if( z==0 ){
736 break;
737 }
738 if( i<pTab->nCol ){
739 if( pCur->aLen[i] < pCur->rdr.n+1 ){
740 char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
741 if( zNew==0 ){
742 csv_errmsg(&pCur->rdr, "out of memory");
743 csv_xfer_error(pTab, &pCur->rdr);
744 break;
745 }
746 pCur->azVal[i] = zNew;
747 pCur->aLen[i] = pCur->rdr.n+1;
748 }
749 memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
750 i++;
751 }
752 }while( pCur->rdr.cTerm==',' );
753 if( z==0 || (pCur->rdr.cTerm==EOF && i<pTab->nCol) ){
754 pCur->iRowid = -1;
755 }else{
756 pCur->iRowid++;
757 while( i<pTab->nCol ){
758 sqlite3_free(pCur->azVal[i]);
759 pCur->azVal[i] = 0;
760 pCur->aLen[i] = 0;
761 i++;
762 }
763 }
764 return SQLITE_OK;
765 }
766
767 /*
768 ** Return values of columns for the row at which the CsvCursor
769 ** is currently pointing.
770 */
csvtabColumn(sqlite3_vtab_cursor * cur,sqlite3_context * ctx,int i)771 static int csvtabColumn(
772 sqlite3_vtab_cursor *cur, /* The cursor */
773 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
774 int i /* Which column to return */
775 ){
776 CsvCursor *pCur = (CsvCursor*)cur;
777 CsvTable *pTab = (CsvTable*)cur->pVtab;
778 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
779 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
780 }
781 return SQLITE_OK;
782 }
783
784 /*
785 ** Return the rowid for the current row.
786 */
csvtabRowid(sqlite3_vtab_cursor * cur,sqlite_int64 * pRowid)787 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
788 CsvCursor *pCur = (CsvCursor*)cur;
789 *pRowid = pCur->iRowid;
790 return SQLITE_OK;
791 }
792
793 /*
794 ** Return TRUE if the cursor has been moved off of the last
795 ** row of output.
796 */
csvtabEof(sqlite3_vtab_cursor * cur)797 static int csvtabEof(sqlite3_vtab_cursor *cur){
798 CsvCursor *pCur = (CsvCursor*)cur;
799 return pCur->iRowid<0;
800 }
801
802 /*
803 ** Only a full table scan is supported. So xFilter simply rewinds to
804 ** the beginning.
805 */
csvtabFilter(sqlite3_vtab_cursor * pVtabCursor,int idxNum,const char * idxStr,int argc,sqlite3_value ** argv)806 static int csvtabFilter(
807 sqlite3_vtab_cursor *pVtabCursor,
808 int idxNum, const char *idxStr,
809 int argc, sqlite3_value **argv
810 ){
811 CsvCursor *pCur = (CsvCursor*)pVtabCursor;
812 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
813 pCur->iRowid = 0;
814 if( pCur->rdr.in==0 ){
815 assert( pCur->rdr.zIn==pTab->zData );
816 assert( pTab->iStart>=0 );
817 assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
818 pCur->rdr.iIn = pTab->iStart;
819 }else{
820 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
821 pCur->rdr.iIn = 0;
822 pCur->rdr.nIn = 0;
823 }
824 return csvtabNext(pVtabCursor);
825 }
826
827 /*
828 ** Only a forward full table scan is supported. xBestIndex is mostly
829 ** a no-op. If CSVTEST_FIDX is set, then the presence of equality
830 ** constraints lowers the estimated cost, which is fiction, but is useful
831 ** for testing certain kinds of virtual table behavior.
832 */
csvtabBestIndex(sqlite3_vtab * tab,sqlite3_index_info * pIdxInfo)833 static int csvtabBestIndex(
834 sqlite3_vtab *tab,
835 sqlite3_index_info *pIdxInfo
836 ){
837 pIdxInfo->estimatedCost = 1000000;
838 #ifdef SQLITE_TEST
839 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
840 /* The usual (and sensible) case is to always do a full table scan.
841 ** The code in this branch only runs when testflags=1. This code
842 ** generates an artifical and unrealistic plan which is useful
843 ** for testing virtual table logic but is not helpful to real applications.
844 **
845 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
846 ** table (even though it is not) and the cost of running the virtual table
847 ** is reduced from 1 million to just 10. The constraints are *not* marked
848 ** as omittable, however, so the query planner should still generate a
849 ** plan that gives a correct answer, even if they plan is not optimal.
850 */
851 int i;
852 int nConst = 0;
853 for(i=0; i<pIdxInfo->nConstraint; i++){
854 unsigned char op;
855 if( pIdxInfo->aConstraint[i].usable==0 ) continue;
856 op = pIdxInfo->aConstraint[i].op;
857 if( op==SQLITE_INDEX_CONSTRAINT_EQ
858 || op==SQLITE_INDEX_CONSTRAINT_LIKE
859 || op==SQLITE_INDEX_CONSTRAINT_GLOB
860 ){
861 pIdxInfo->estimatedCost = 10;
862 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
863 nConst++;
864 }
865 }
866 }
867 #endif
868 return SQLITE_OK;
869 }
870
871
872 static sqlite3_module CsvModule = {
873 0, /* iVersion */
874 csvtabCreate, /* xCreate */
875 csvtabConnect, /* xConnect */
876 csvtabBestIndex, /* xBestIndex */
877 csvtabDisconnect, /* xDisconnect */
878 csvtabDisconnect, /* xDestroy */
879 csvtabOpen, /* xOpen - open a cursor */
880 csvtabClose, /* xClose - close a cursor */
881 csvtabFilter, /* xFilter - configure scan constraints */
882 csvtabNext, /* xNext - advance a cursor */
883 csvtabEof, /* xEof - check for end of scan */
884 csvtabColumn, /* xColumn - read data */
885 csvtabRowid, /* xRowid - read data */
886 0, /* xUpdate */
887 0, /* xBegin */
888 0, /* xSync */
889 0, /* xCommit */
890 0, /* xRollback */
891 0, /* xFindMethod */
892 0, /* xRename */
893 };
894
895 #ifdef SQLITE_TEST
896 /*
897 ** For virtual table testing, make a version of the CSV virtual table
898 ** available that has an xUpdate function. But the xUpdate always returns
899 ** SQLITE_READONLY since the CSV file is not really writable.
900 */
csvtabUpdate(sqlite3_vtab * p,int n,sqlite3_value ** v,sqlite3_int64 * x)901 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
902 return SQLITE_READONLY;
903 }
904 static sqlite3_module CsvModuleFauxWrite = {
905 0, /* iVersion */
906 csvtabCreate, /* xCreate */
907 csvtabConnect, /* xConnect */
908 csvtabBestIndex, /* xBestIndex */
909 csvtabDisconnect, /* xDisconnect */
910 csvtabDisconnect, /* xDestroy */
911 csvtabOpen, /* xOpen - open a cursor */
912 csvtabClose, /* xClose - close a cursor */
913 csvtabFilter, /* xFilter - configure scan constraints */
914 csvtabNext, /* xNext - advance a cursor */
915 csvtabEof, /* xEof - check for end of scan */
916 csvtabColumn, /* xColumn - read data */
917 csvtabRowid, /* xRowid - read data */
918 csvtabUpdate, /* xUpdate */
919 0, /* xBegin */
920 0, /* xSync */
921 0, /* xCommit */
922 0, /* xRollback */
923 0, /* xFindMethod */
924 0, /* xRename */
925 };
926 #endif /* SQLITE_TEST */
927
928 #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
929
930
931 #ifdef _WIN32
932 __declspec(dllexport)
933 #endif
934 /*
935 ** This routine is called when the extension is loaded. The new
936 ** CSV virtual table module is registered with the calling database
937 ** connection.
938 */
sqlite3_csv_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)939 int sqlite3_csv_init(
940 sqlite3 *db,
941 char **pzErrMsg,
942 const sqlite3_api_routines *pApi
943 ){
944 #ifndef SQLITE_OMIT_VIRTUALTABLE
945 int rc;
946 SQLITE_EXTENSION_INIT2(pApi);
947 rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
948 #ifdef SQLITE_TEST
949 if( rc==SQLITE_OK ){
950 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
951 }
952 #endif
953 return rc;
954 #else
955 return SQLITE_OK;
956 #endif
957 }
958