1 /*
2 ** 2013-10-14
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This SQLite extension implements functions tointeger(X) and toreal(X).
14 **
15 ** If X is an integer, real, or string value that can be
16 ** losslessly represented as an integer, then tointeger(X)
17 ** returns the corresponding integer value.
18 ** If X is an 8-byte BLOB then that blob is interpreted as
19 ** a signed two-compliment little-endian encoding of an integer
20 ** and tointeger(X) returns the corresponding integer value.
21 ** Otherwise tointeger(X) return NULL.
22 **
23 ** If X is an integer, real, or string value that can be
24 ** convert into a real number, preserving at least 15 digits
25 ** of precision, then toreal(X) returns the corresponding real value.
26 ** If X is an 8-byte BLOB then that blob is interpreted as
27 ** a 64-bit IEEE754 big-endian floating point value
28 ** and toreal(X) returns the corresponding real value.
29 ** Otherwise toreal(X) return NULL.
30 **
31 ** Note that tointeger(X) of an 8-byte BLOB assumes a little-endian
32 ** encoding whereas toreal(X) of an 8-byte BLOB assumes a big-endian
33 ** encoding.
34 */
35 #include "sqlite3ext.h"
36 SQLITE_EXTENSION_INIT1
37 #include <assert.h>
38 #include <string.h>
39 
40 /*
41 ** Determine if this is running on a big-endian or little-endian
42 ** processor
43 */
44 #if defined(i386) || defined(__i386__) || defined(_M_IX86)\
45                              || defined(__x86_64) || defined(__x86_64__)
46 # define TOTYPE_BIGENDIAN    0
47 # define TOTYPE_LITTLEENDIAN 1
48 #else
49   const int totype_one = 1;
50 # define TOTYPE_BIGENDIAN    (*(char *)(&totype_one)==0)
51 # define TOTYPE_LITTLEENDIAN (*(char *)(&totype_one)==1)
52 #endif
53 
54 /*
55 ** Constants for the largest and smallest possible 64-bit signed integers.
56 ** These macros are designed to work correctly on both 32-bit and 64-bit
57 ** compilers.
58 */
59 #ifndef LARGEST_INT64
60 # define LARGEST_INT64   (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32))
61 #endif
62 
63 #ifndef SMALLEST_INT64
64 # define SMALLEST_INT64  (((sqlite3_int64)-1) - LARGEST_INT64)
65 #endif
66 
67 /*
68 ** Return TRUE if character c is a whitespace character
69 */
totypeIsspace(unsigned char c)70 static int totypeIsspace(unsigned char c){
71   return c==' ' || c=='\t' || c=='\n' || c=='\v' || c=='\f' || c=='\r';
72 }
73 
74 /*
75 ** Return TRUE if character c is a digit
76 */
totypeIsdigit(unsigned char c)77 static int totypeIsdigit(unsigned char c){
78   return c>='0' && c<='9';
79 }
80 
81 /*
82 ** Compare the 19-character string zNum against the text representation
83 ** value 2^63:  9223372036854775808.  Return negative, zero, or positive
84 ** if zNum is less than, equal to, or greater than the string.
85 ** Note that zNum must contain exactly 19 characters.
86 **
87 ** Unlike memcmp() this routine is guaranteed to return the difference
88 ** in the values of the last digit if the only difference is in the
89 ** last digit.  So, for example,
90 **
91 **      totypeCompare2pow63("9223372036854775800")
92 **
93 ** will return -8.
94 */
totypeCompare2pow63(const char * zNum)95 static int totypeCompare2pow63(const char *zNum){
96   int c = 0;
97   int i;
98                     /* 012345678901234567 */
99   const char *pow63 = "922337203685477580";
100   for(i=0; c==0 && i<18; i++){
101     c = (zNum[i]-pow63[i])*10;
102   }
103   if( c==0 ){
104     c = zNum[18] - '8';
105   }
106   return c;
107 }
108 
109 /*
110 ** Convert zNum to a 64-bit signed integer.
111 **
112 ** If the zNum value is representable as a 64-bit twos-complement
113 ** integer, then write that value into *pNum and return 0.
114 **
115 ** If zNum is exactly 9223372036854665808, return 2.  This special
116 ** case is broken out because while 9223372036854665808 cannot be a
117 ** signed 64-bit integer, its negative -9223372036854665808 can be.
118 **
119 ** If zNum is too big for a 64-bit integer and is not
120 ** 9223372036854665808  or if zNum contains any non-numeric text,
121 ** then return 1.
122 **
123 ** The string is not necessarily zero-terminated.
124 */
totypeAtoi64(const char * zNum,sqlite3_int64 * pNum,int length)125 static int totypeAtoi64(const char *zNum, sqlite3_int64 *pNum, int length){
126   sqlite3_uint64 u = 0;
127   int neg = 0; /* assume positive */
128   int i;
129   int c = 0;
130   int nonNum = 0;
131   const char *zStart;
132   const char *zEnd = zNum + length;
133 
134   while( zNum<zEnd && totypeIsspace(*zNum) ) zNum++;
135   if( zNum<zEnd ){
136     if( *zNum=='-' ){
137       neg = 1;
138       zNum++;
139     }else if( *zNum=='+' ){
140       zNum++;
141     }
142   }
143   zStart = zNum;
144   while( zNum<zEnd && zNum[0]=='0' ){ zNum++; } /* Skip leading zeros. */
145   for(i=0; &zNum[i]<zEnd && (c=zNum[i])>='0' && c<='9'; i++){
146     u = u*10 + c - '0';
147   }
148   if( u>LARGEST_INT64 ){
149     *pNum = SMALLEST_INT64;
150   }else if( neg ){
151     *pNum = -(sqlite3_int64)u;
152   }else{
153     *pNum = (sqlite3_int64)u;
154   }
155   if( (c!=0 && &zNum[i]<zEnd) || (i==0 && zStart==zNum) || i>19 || nonNum ){
156     /* zNum is empty or contains non-numeric text or is longer
157     ** than 19 digits (thus guaranteeing that it is too large) */
158     return 1;
159   }else if( i<19 ){
160     /* Less than 19 digits, so we know that it fits in 64 bits */
161     assert( u<=LARGEST_INT64 );
162     return 0;
163   }else{
164     /* zNum is a 19-digit numbers.  Compare it against 9223372036854775808. */
165     c = totypeCompare2pow63(zNum);
166     if( c<0 ){
167       /* zNum is less than 9223372036854775808 so it fits */
168       assert( u<=LARGEST_INT64 );
169       return 0;
170     }else if( c>0 ){
171       /* zNum is greater than 9223372036854775808 so it overflows */
172       return 1;
173     }else{
174       /* zNum is exactly 9223372036854775808.  Fits if negative.  The
175       ** special case 2 overflow if positive */
176       assert( u-1==LARGEST_INT64 );
177       assert( (*pNum)==SMALLEST_INT64 );
178       return neg ? 0 : 2;
179     }
180   }
181 }
182 
183 /*
184 ** The string z[] is an text representation of a real number.
185 ** Convert this string to a double and write it into *pResult.
186 **
187 ** The string is not necessarily zero-terminated.
188 **
189 ** Return TRUE if the result is a valid real number (or integer) and FALSE
190 ** if the string is empty or contains extraneous text.  Valid numbers
191 ** are in one of these formats:
192 **
193 **    [+-]digits[E[+-]digits]
194 **    [+-]digits.[digits][E[+-]digits]
195 **    [+-].digits[E[+-]digits]
196 **
197 ** Leading and trailing whitespace is ignored for the purpose of determining
198 ** validity.
199 **
200 ** If some prefix of the input string is a valid number, this routine
201 ** returns FALSE but it still converts the prefix and writes the result
202 ** into *pResult.
203 */
totypeAtoF(const char * z,double * pResult,int length)204 static int totypeAtoF(const char *z, double *pResult, int length){
205   const char *zEnd = z + length;
206   /* sign * significand * (10 ^ (esign * exponent)) */
207   int sign = 1;    /* sign of significand */
208   sqlite3_int64 s = 0;       /* significand */
209   int d = 0;       /* adjust exponent for shifting decimal point */
210   int esign = 1;   /* sign of exponent */
211   int e = 0;       /* exponent */
212   int eValid = 1;  /* True exponent is either not used or is well-formed */
213   double result;
214   int nDigits = 0;
215   int nonNum = 0;
216 
217   *pResult = 0.0;   /* Default return value, in case of an error */
218 
219   /* skip leading spaces */
220   while( z<zEnd && totypeIsspace(*z) ) z++;
221   if( z>=zEnd ) return 0;
222 
223   /* get sign of significand */
224   if( *z=='-' ){
225     sign = -1;
226     z++;
227   }else if( *z=='+' ){
228     z++;
229   }
230 
231   /* skip leading zeroes */
232   while( z<zEnd && z[0]=='0' ) z++, nDigits++;
233 
234   /* copy max significant digits to significand */
235   while( z<zEnd && totypeIsdigit(*z) && s<((LARGEST_INT64-9)/10) ){
236     s = s*10 + (*z - '0');
237     z++, nDigits++;
238   }
239 
240   /* skip non-significant significand digits
241   ** (increase exponent by d to shift decimal left) */
242   while( z<zEnd && totypeIsdigit(*z) ) z++, nDigits++, d++;
243   if( z>=zEnd ) goto totype_atof_calc;
244 
245   /* if decimal point is present */
246   if( *z=='.' ){
247     z++;
248     /* copy digits from after decimal to significand
249     ** (decrease exponent by d to shift decimal right) */
250     while( z<zEnd && totypeIsdigit(*z) && s<((LARGEST_INT64-9)/10) ){
251       s = s*10 + (*z - '0');
252       z++, nDigits++, d--;
253     }
254     /* skip non-significant digits */
255     while( z<zEnd && totypeIsdigit(*z) ) z++, nDigits++;
256   }
257   if( z>=zEnd ) goto totype_atof_calc;
258 
259   /* if exponent is present */
260   if( *z=='e' || *z=='E' ){
261     z++;
262     eValid = 0;
263     if( z>=zEnd ) goto totype_atof_calc;
264     /* get sign of exponent */
265     if( *z=='-' ){
266       esign = -1;
267       z++;
268     }else if( *z=='+' ){
269       z++;
270     }
271     /* copy digits to exponent */
272     while( z<zEnd && totypeIsdigit(*z) ){
273       e = e<10000 ? (e*10 + (*z - '0')) : 10000;
274       z++;
275       eValid = 1;
276     }
277   }
278 
279   /* skip trailing spaces */
280   if( nDigits && eValid ){
281     while( z<zEnd && totypeIsspace(*z) ) z++;
282   }
283 
284 totype_atof_calc:
285   /* adjust exponent by d, and update sign */
286   e = (e*esign) + d;
287   if( e<0 ) {
288     esign = -1;
289     e *= -1;
290   } else {
291     esign = 1;
292   }
293 
294   /* if 0 significand */
295   if( !s ) {
296     /* In the IEEE 754 standard, zero is signed.
297     ** Add the sign if we've seen at least one digit */
298     result = (sign<0 && nDigits) ? -(double)0 : (double)0;
299   } else {
300     /* attempt to reduce exponent */
301     if( esign>0 ){
302       while( s<(LARGEST_INT64/10) && e>0 ) e--,s*=10;
303     }else{
304       while( !(s%10) && e>0 ) e--,s/=10;
305     }
306 
307     /* adjust the sign of significand */
308     s = sign<0 ? -s : s;
309 
310     /* if exponent, scale significand as appropriate
311     ** and store in result. */
312     if( e ){
313       double scale = 1.0;
314       /* attempt to handle extremely small/large numbers better */
315       if( e>307 && e<342 ){
316         while( e%308 ) { scale *= 1.0e+1; e -= 1; }
317         if( esign<0 ){
318           result = s / scale;
319           result /= 1.0e+308;
320         }else{
321           result = s * scale;
322           result *= 1.0e+308;
323         }
324       }else if( e>=342 ){
325         if( esign<0 ){
326           result = 0.0*s;
327         }else{
328           result = 1e308*1e308*s;  /* Infinity */
329         }
330       }else{
331         /* 1.0e+22 is the largest power of 10 than can be
332         ** represented exactly. */
333         while( e%22 ) { scale *= 1.0e+1; e -= 1; }
334         while( e>0 ) { scale *= 1.0e+22; e -= 22; }
335         if( esign<0 ){
336           result = s / scale;
337         }else{
338           result = s * scale;
339         }
340       }
341     } else {
342       result = (double)s;
343     }
344   }
345 
346   /* store the result */
347   *pResult = result;
348 
349   /* return true if number and no extra non-whitespace chracters after */
350   return z>=zEnd && nDigits>0 && eValid && nonNum==0;
351 }
352 
353 /*
354 ** tointeger(X):  If X is any value (integer, double, blob, or string) that
355 ** can be losslessly converted into an integer, then make the conversion and
356 ** return the result.  Otherwise, return NULL.
357 */
tointegerFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)358 static void tointegerFunc(
359   sqlite3_context *context,
360   int argc,
361   sqlite3_value **argv
362 ){
363   assert( argc==1 );
364   (void)argc;
365   switch( sqlite3_value_type(argv[0]) ){
366     case SQLITE_FLOAT: {
367       double rVal = sqlite3_value_double(argv[0]);
368       sqlite3_int64 iVal = (sqlite3_int64)rVal;
369       if( rVal==(double)iVal ){
370         sqlite3_result_int64(context, iVal);
371       }
372       break;
373     }
374     case SQLITE_INTEGER: {
375       sqlite3_result_int64(context, sqlite3_value_int64(argv[0]));
376       break;
377     }
378     case SQLITE_BLOB: {
379       const unsigned char *zBlob = sqlite3_value_blob(argv[0]);
380       if( zBlob ){
381         int nBlob = sqlite3_value_bytes(argv[0]);
382         if( nBlob==sizeof(sqlite3_int64) ){
383           sqlite3_int64 iVal;
384           if( TOTYPE_BIGENDIAN ){
385             int i;
386             unsigned char zBlobRev[sizeof(sqlite3_int64)];
387             for(i=0; i<sizeof(sqlite3_int64); i++){
388               zBlobRev[i] = zBlob[sizeof(sqlite3_int64)-1-i];
389             }
390             memcpy(&iVal, zBlobRev, sizeof(sqlite3_int64));
391           }else{
392             memcpy(&iVal, zBlob, sizeof(sqlite3_int64));
393           }
394           sqlite3_result_int64(context, iVal);
395         }
396       }
397       break;
398     }
399     case SQLITE_TEXT: {
400       const unsigned char *zStr = sqlite3_value_text(argv[0]);
401       if( zStr ){
402         int nStr = sqlite3_value_bytes(argv[0]);
403         if( nStr && !totypeIsspace(zStr[0]) ){
404           sqlite3_int64 iVal;
405           if( !totypeAtoi64((const char*)zStr, &iVal, nStr) ){
406             sqlite3_result_int64(context, iVal);
407           }
408         }
409       }
410       break;
411     }
412     default: {
413       assert( sqlite3_value_type(argv[0])==SQLITE_NULL );
414       break;
415     }
416   }
417 }
418 
419 /*
420 ** toreal(X): If X is any value (integer, double, blob, or string) that can
421 ** be losslessly converted into a real number, then do so and return that
422 ** real number.  Otherwise return NULL.
423 */
424 #if defined(_MSC_VER)
425 #pragma warning(disable: 4748)
426 #pragma optimize("", off)
427 #endif
torealFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)428 static void torealFunc(
429   sqlite3_context *context,
430   int argc,
431   sqlite3_value **argv
432 ){
433   assert( argc==1 );
434   (void)argc;
435   switch( sqlite3_value_type(argv[0]) ){
436     case SQLITE_FLOAT: {
437       sqlite3_result_double(context, sqlite3_value_double(argv[0]));
438       break;
439     }
440     case SQLITE_INTEGER: {
441       sqlite3_int64 iVal = sqlite3_value_int64(argv[0]);
442       double rVal = (double)iVal;
443       if( iVal==(sqlite3_int64)rVal ){
444         sqlite3_result_double(context, rVal);
445       }
446       break;
447     }
448     case SQLITE_BLOB: {
449       const unsigned char *zBlob = sqlite3_value_blob(argv[0]);
450       if( zBlob ){
451         int nBlob = sqlite3_value_bytes(argv[0]);
452         if( nBlob==sizeof(double) ){
453           double rVal;
454           if( TOTYPE_LITTLEENDIAN ){
455             int i;
456             unsigned char zBlobRev[sizeof(double)];
457             for(i=0; i<sizeof(double); i++){
458               zBlobRev[i] = zBlob[sizeof(double)-1-i];
459             }
460             memcpy(&rVal, zBlobRev, sizeof(double));
461           }else{
462             memcpy(&rVal, zBlob, sizeof(double));
463           }
464           sqlite3_result_double(context, rVal);
465         }
466       }
467       break;
468     }
469     case SQLITE_TEXT: {
470       const unsigned char *zStr = sqlite3_value_text(argv[0]);
471       if( zStr ){
472         int nStr = sqlite3_value_bytes(argv[0]);
473         if( nStr && !totypeIsspace(zStr[0]) && !totypeIsspace(zStr[nStr-1]) ){
474           double rVal;
475           if( totypeAtoF((const char*)zStr, &rVal, nStr) ){
476             sqlite3_result_double(context, rVal);
477             return;
478           }
479         }
480       }
481       break;
482     }
483     default: {
484       assert( sqlite3_value_type(argv[0])==SQLITE_NULL );
485       break;
486     }
487   }
488 }
489 #if defined(_MSC_VER)
490 #pragma optimize("", on)
491 #pragma warning(default: 4748)
492 #endif
493 
494 #ifdef _WIN32
495 __declspec(dllexport)
496 #endif
sqlite3_totype_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)497 int sqlite3_totype_init(
498   sqlite3 *db,
499   char **pzErrMsg,
500   const sqlite3_api_routines *pApi
501 ){
502   int rc = SQLITE_OK;
503   SQLITE_EXTENSION_INIT2(pApi);
504   (void)pzErrMsg;  /* Unused parameter */
505   rc = sqlite3_create_function(db, "tointeger", 1,
506         SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, 0,
507         tointegerFunc, 0, 0);
508   if( rc==SQLITE_OK ){
509     rc = sqlite3_create_function(db, "toreal", 1,
510         SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, 0,
511         torealFunc, 0, 0);
512   }
513   return rc;
514 }
515