1 /* Copyright 2019 Google Inc. All Rights Reserved.
2 ** Licensed under the Apache License, Version 2.0 (the "License");
3 **
4 ** Derived from the public domain SQLite (https://sqlite.org) sources.
5 */
6 
7 #include <fuzzer/FuzzedDataProvider.h>
8 #include <stddef.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <cstdlib>
12 #include <iomanip>
13 #include <ios>
14 #include <iostream>
15 #include <sstream>
16 #include <string>
17 
18 #include "third_party/sqlite/sqlite3.h"
19 
20 sqlite3 *g_database = 0; /* The database connection */
21 
22 enum class ColumnType {
23   kMin = 0,
24   kShortNumber = 0,
25   kBlob = 1,
26   kString = 2,
27   kAny = 3,
28   kMax = 3,
29 };
30 
31 enum class TableOperator : unsigned long {
32   kMin = 0,
33   kShadowTableMin = 0,
34   kSegdir = 0,
35   kContent = 1,
36   kDocsize = 2,
37   kSegments = 3,
38   kStat = 4,
39   kShadowTableMax = 4,
40   // 5-7: reserved for further use
41   kFuzzTable = 8,
42   kMax = 8,
43 };
44 
45 typedef struct {
46   uint8_t op_type : 4;
47   uint8_t column_op : 4;
48   uint8_t op_sql_operation : 4;
49   uint8_t select_operator_1 : 1;
50   uint8_t select_operator_2 : 3;
51 } opdata_16;
52 
53 /*
54 ** Callback for sqlite3_exec().
55 */
ExecHandler(void * pCnt,int argc,char ** argv,char ** namev)56 static int ExecHandler(void *pCnt, int argc, char **argv, char **namev) {
57   return ((*static_cast<int *>(pCnt))--) <= 0;
58 }
59 
GetOperator16(FuzzedDataProvider * data_provider,opdata_16 * op16)60 size_t GetOperator16(FuzzedDataProvider *data_provider, opdata_16 *op16) {
61   if (data_provider->remaining_bytes() < sizeof(uint16_t))
62     return 0;
63 
64   uint16_t operator_data = data_provider->ConsumeIntegral<uint16_t>();
65   memcpy(op16, &operator_data, sizeof(uint16_t));
66   return sizeof(uint16_t);
67 }
68 
GetValueByType(FuzzedDataProvider * data_provider,ColumnType type)69 std::string GetValueByType(FuzzedDataProvider *data_provider, ColumnType type) {
70   std::string out;
71   std::ostringstream ss;
72   switch (type) {
73   case ColumnType::kShortNumber: {
74     uint8_t number = data_provider->ConsumeIntegral<uint8_t>();
75     ss << static_cast<uint16_t>(number);
76   } break;
77   case ColumnType::kBlob: {
78     uint16_t length = data_provider->ConsumeIntegral<uint16_t>();
79     uint16_t value = 0;
80     if (length) {
81       ss << "x'" << std::hex;
82       for (uint16_t i = 0; i < length; i++) {
83         value = data_provider->ConsumeIntegral<uint8_t>();
84 
85         if (data_provider->remaining_bytes() == 0)
86           break;
87 
88         ss << std::setfill('0') << std::setw(2) << value;
89       }
90       ss << "'";
91     } else
92       return "x'00'";
93   } break;
94   default:
95     return "'NOT SUPPORTED'";
96   }
97   out = ss.str();
98   return out;
99 }
100 
RunSqlQuery(std::string & query,int * exec_count)101 void RunSqlQuery(std::string &query, int *exec_count) {
102   static bool should_print = ::getenv("DUMP_NATIVE_INPUT");
103   if (should_print)
104     std::cout << query << std::endl;
105   char *zErrMsg = 0; /* Error message returned by sqlite_exec() */
106   sqlite3_exec(g_database, query.c_str(), ExecHandler,
107                static_cast<void *>(exec_count), &zErrMsg);
108   sqlite3_free(zErrMsg);
109 }
110 
InitializeDB(FuzzedDataProvider * data_provider,int * exec_count)111 int InitializeDB(FuzzedDataProvider *data_provider, int *exec_count) {
112   int rc; /* Return code from various interfaces */
113   const char *icu_list[219] = {
114       "af_NA",      "af_ZA",       "ar_AE",      "ar_BH",      "ar_DZ",
115       "ar_EG",      "ar_IQ",       "ar_JO",      "ar_KW",      "ar_LB",
116       "ar_LY",      "ar_MA",       "ar_OM",      "ar_QA",      "ar_SA",
117       "ar_SD",      "ar_SY",       "ar_TN",      "ar_YE",      "as_IN",
118       "az_Latn",    "az_Latn_AZ",  "be_BY",      "bg_BG",      "bn_BD",
119       "bn_IN",      "bs_BA",       "ca_ES",      "chr",        "chr_US",
120       "cs_CZ",      "cy_GB",       "da_DK",      "de_AT",      "de_BE",
121       "de_CH",      "de_DE",       "de_LI",      "de_LU",      "el_CY",
122       "el_GR",      "en",          "en_AS",      "en_AU",      "en_BE",
123       "en_BW",      "en_BZ",       "en_CA",      "en_GB",      "en_GU",
124       "en_HK",      "en_IE",       "en_IN",      "en_JM",      "en_MH",
125       "en_MP",      "en_MT",       "en_MU",      "en_NA",      "en_NZ",
126       "en_PH",      "en_PK",       "en_SG",      "en_TT",      "en_UM",
127       "en_US",      "en_US_POSIX", "en_VI",      "en_ZA",      "en_ZW",
128       "es_419",     "es_AR",       "es_BO",      "es_CL",      "es_CO",
129       "es_CR",      "es_DO",       "es_EC",      "es_ES",      "es_GQ",
130       "es_GT",      "es_HN",       "es_MX",      "es_NI",      "es_PA",
131       "es_PE",      "es_PR",       "es_PY",      "es_SV",      "es_US",
132       "es_UY",      "es_VE",       "et_EE",      "fa_IR",      "fi_FI",
133       "fil_PH",     "fo_FO",       "fr_BE",      "fr_BF",      "fr_BI",
134       "fr_BJ",      "fr_BL",       "fr_CA",      "fr_CD",      "fr_CF",
135       "fr_CG",      "fr_CH",       "fr_CI",      "fr_CM",      "fr_DJ",
136       "fr_FR",      "fr_GA",       "fr_GN",      "fr_GP",      "fr_GQ",
137       "fr_KM",      "fr_LU",       "fr_MC",      "fr_MF",      "fr_MG",
138       "fr_ML",      "fr_MQ",       "fr_NE",      "fr_RE",      "fr_RW",
139       "fr_SN",      "fr_TD",       "fr_TG",      "ga",         "ga_IE",
140       "gu_IN",      "ha_Latn",     "ha_Latn_GH", "ha_Latn_NE", "ha_Latn_NG",
141       "he_IL",      "hi_IN",       "hr_HR",      "hu_HU",      "hy_AM",
142       "id",         "id_ID",       "ig_NG",      "is_IS",      "it",
143       "it_CH",      "it_IT",       "ja_JP",      "ka",         "ka_GE",
144       "kk_KZ",      "kl_GL",       "kn_IN",      "ko_KR",      "kok_IN",
145       "lt_LT",      "lv_LV",       "mk_MK",      "ml_IN",      "mr_IN",
146       "ms",         "ms_BN",       "ms_MY",      "mt_MT",      "nb_NO",
147       "nlnl_BE",    "nl_NL",       "nn_NO",      "om_ET",      "om_KE",
148       "or_IN",      "pa_Arab",     "pa_Arab_PK", "pa_Guru",    "pa_Guru_IN",
149       "pl_PL",      "ps_AF",       "pt",         "pt_BR",      "pt_PT",
150       "ro_MD",      "ro_RO",       "ru_MD",      "ru_RU",      "ru_UA",
151       "si_LK",      "sk_SK",       "sl_SI",      "sq_AL",      "sr_Cyrl",
152       "sr_Cyrl_BA", "sr_Cyrl_ME",  "sr_Cyrl_RS", "sr_Latn_BA", "sr_Latn_ME",
153       "sr_Latn_RS", "sv_FI",       "sv_SE",      "sw",         "sw_KE",
154       "sw_TZ",      "ta_IN",       "ta_LK",      "te_IN",      "th_TH",
155       "tr_TR",      "uk_UA",       "ur_IN",      "ur_PK",      "vi_VN",
156       "yo_NG",      "zh_Hans",     "zh_Hans_CN", "zh_Hans_SG", "zh_Hant_HK",
157       "zh_Hant_MO", "zh_Hant_TW",  "zu",         "zu_ZA"};
158 
159   rc = sqlite3_initialize();
160   if (rc)
161     return rc;
162 
163   /* Open the database connection.  Only use an in-memory database. */
164   rc = sqlite3_open_v2(
165       "fuzz.db", &g_database,
166       SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_MEMORY, 0);
167   if (rc)
168     return rc;
169 
170   /* enables foreign key constraints */
171   sqlite3_db_config(g_database, SQLITE_DBCONFIG_ENABLE_FKEY, 1, &rc);
172 
173   /* disable defence-in-depth to simplify the fuzzing on shadow tables */
174   sqlite3_db_config(g_database, SQLITE_DBCONFIG_DEFENSIVE, 0, &rc);
175   if (rc)
176     return rc;
177 
178   /* determine a limit on the number of output rows */
179   *exec_count = 0x3f;
180 
181   /* Some initial queries */
182   std::string init_drop_db("DROP TABLE IF EXISTS f;");
183   std::string init_drop_docsize("DROP TABLE IF EXISTS 'f_docsize';");
184   std::string init_drop_stat("DROP TABLE IF EXISTS 'f_stat';");
185   std::string init_create_fts3 = "CREATE VIRTUAL TABLE f USING fts3(a,b";
186   switch (data_provider->ConsumeIntegralInRange<uint8_t>(0, 3)) {
187   case 1:
188     init_create_fts3 += ",tokenize=porter";
189     break;
190   case 2:
191     init_create_fts3 += ",tokenize=icu ";
192     init_create_fts3 +=
193         icu_list[(data_provider->ConsumeIntegralInRange<uint8_t>(
194             0, ((sizeof(icu_list) / sizeof(const char *) % 0x100) - 1)))];
195     break;
196   case 3:
197     init_create_fts3 += ",tokenize=icu";
198     break;
199   default:
200   case 0:
201     /*if we don't set tokenizer to anything, it will goes default simple
202      * tokenizer*/
203     break;
204   }
205   init_create_fts3 += ");";
206   std::string create_fake_docsize(
207       "CREATE TABLE 'f_docsize'(docid INTEGER PRIMARY KEY, size BLOB);");
208   std::string create_fake_stat(
209       "CREATE TABLE 'f_stat'(id INTEGER PRIMARY KEY, value BLOB);");
210   std::string init_set_initial_data("INSERT INTO f VALUES (1, '1234');");
211 
212   RunSqlQuery(init_drop_db, exec_count);
213   RunSqlQuery(init_drop_docsize, exec_count);
214   RunSqlQuery(init_drop_stat, exec_count);
215   RunSqlQuery(init_create_fts3, exec_count);
216   RunSqlQuery(create_fake_docsize, exec_count);
217   RunSqlQuery(create_fake_stat, exec_count);
218   RunSqlQuery(init_set_initial_data, exec_count);
219 
220   return rc;
221 }
222 /*
223 ** Main entry point.  The fuzzer invokes this function with each
224 ** fuzzed input.
225 */
226 
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)227 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
228   if (size < 3 || size > 0x1000000)
229     return 0; /* Early out if unsufficient or too much data */
230 
231   FuzzedDataProvider data_provider(data, size);
232   int exec_count = 0; /* Abort row callback when count reaches zero */
233 
234   if (InitializeDB(&data_provider, &exec_count))
235     return 0;
236 
237   while (1) {
238     std::string op, target, target_column;
239     ColumnType target_column_type = ColumnType::kMin;
240 
241     opdata_16 op_16;
242     size_t ret = GetOperator16(&data_provider, &op_16);
243 
244     if (ret == 0)
245       break;
246     unsigned long table_operator_border =
247         static_cast<unsigned long>(TableOperator::kMax) + 1;
248     TableOperator op_type =
249         static_cast<TableOperator>(op_16.op_type % table_operator_border);
250 
251     /*
252         1. choose a table and a column to fuzz.
253     */
254     bool op_on_shadow = (op_type <= TableOperator::kShadowTableMax);
255     if (op_on_shadow) {
256       switch (op_type) {
257       case TableOperator::kSegdir:
258         target = "f_segdir";
259         switch (op_16.column_op % 5) {
260         case 0x00:
261           target_column = " level ";
262           target_column_type = ColumnType::kShortNumber;
263           break;
264         case 0x01:
265           target_column = " start_block ";
266           target_column_type = ColumnType::kShortNumber;
267           break;
268         case 0x02:
269           target_column = " end_block ";
270           target_column_type = ColumnType::kShortNumber;
271           break;
272         case 0x03:
273           target_column = " leaves_end_block ";
274           target_column_type = ColumnType::kShortNumber;
275           break;
276         case 0x04:
277           target_column = " root ";
278           target_column_type = ColumnType::kBlob;
279           break;
280         }
281         break;
282 
283       case TableOperator::kContent:
284         target = "f_content";
285         switch (op_16.column_op % 3) {
286         case 0x00:
287           target_column = " docid ";
288           target_column_type = ColumnType::kShortNumber;
289           break;
290         case 0x01:
291           target_column = " 'c0a' ";
292           target_column_type = ColumnType::kShortNumber;
293           break;
294         case 0x02:
295           target_column = " 'c1b' ";
296           target_column_type = ColumnType::kBlob;
297           break;
298         }
299         break;
300 
301       case TableOperator::kDocsize:
302         target = "f_docsize";
303         switch (op_16.column_op % 2) {
304         case 0x00:
305           target_column = " docid ";
306           target_column_type = ColumnType::kShortNumber;
307           break;
308         case 0x01:
309           target_column = " size ";
310           target_column_type = ColumnType::kBlob;
311           break;
312         }
313         break;
314 
315       case TableOperator::kSegments:
316         target = "f_segments";
317         switch (op_16.column_op % 2) {
318         case 0x00:
319           target_column = " blockid ";
320           target_column_type = ColumnType::kShortNumber;
321           break;
322         case 0x01:
323           target_column = " block ";
324           target_column_type = ColumnType::kBlob;
325           break;
326         }
327         break;
328 
329       case TableOperator::kStat:
330       default:
331         target = "f_stat";
332         switch (op_16.column_op % 2) {
333         case 0x00:
334           target_column = " id ";
335           target_column_type = ColumnType::kShortNumber;
336           break;
337         case 0x01:
338           target_column = " value ";
339           target_column_type = ColumnType::kBlob;
340           break;
341         }
342         break;
343       }
344     } else {
345       target = "f";
346       switch (op_16.column_op % 2) {
347       case 0x00:
348         target_column = " a ";
349         target_column_type = ColumnType::kShortNumber;
350         break;
351       case 0x01:
352         target_column = " b ";
353         target_column_type = ColumnType::kBlob;
354         break;
355       }
356       op_type = TableOperator::kFuzzTable;
357     }
358 
359     /*
360         2. choose a verb and generate some data if needed
361     */
362     switch (op_16.op_sql_operation % 6) {
363     case 0x01:
364       op = "UPDATE " + target + " SET " + target_column + " = ";
365       op += GetValueByType(&data_provider, target_column_type);
366       op += " WHERE " + target_column + " IN (SELECT " + target_column;
367       op += " FROM " + target + " LIMIT 1 OFFSET ";
368       op += GetValueByType(&data_provider, ColumnType::kShortNumber);
369       op += ");";
370       break;
371 
372     case 0x00:
373     case 0x02: {
374       std::ostringstream ss;
375       switch (op_type) {
376       case TableOperator::kSegdir:
377         ss << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
378            << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
379            << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
380            << GetValueByType(&data_provider, ColumnType::kShortNumber) << ",'"
381            << GetValueByType(&data_provider, ColumnType::kShortNumber) << " "
382            << GetValueByType(&data_provider, ColumnType::kShortNumber) << "',"
383            << GetValueByType(&data_provider, ColumnType::kBlob);
384         break;
385       case TableOperator::kContent:
386         ss << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
387            << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
388            << GetValueByType(&data_provider, ColumnType::kBlob);
389         break;
390       default:
391         /*
392             All other tables have the same type, so to simplify, use this
393            default instead.
394             BLOB is almost the same as STRING and it avoids the annoying
395            encoding problem, so choose BLOB instead when some columns
396            need STRING
397         */
398         ss << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
399            << GetValueByType(&data_provider, ColumnType::kBlob);
400         break;
401       }
402       op = "INSERT INTO " + target + " VALUES (" + ss.str() + ");";
403     } break;
404 
405     case 0x03:
406       op = "DELETE FROM f WHERE ";
407       op += (op_16.select_operator_1 ? "a" : "b");
408       op += "=";
409       op += GetValueByType(&data_provider, op_16.select_operator_1
410                                                ? ColumnType::kShortNumber
411                                                : ColumnType::kBlob);
412       op += ";";
413       break;
414 
415     case 0x04: {
416       uint8_t selector_indicator =
417           data_provider.ConsumeIntegralInRange<uint8_t>(0, 4);
418       std::string selector_string;
419       switch (selector_indicator) {
420       case 0x00: {
421         selector_string = " matchinfo( f , '";
422         uint8_t matchinfo_argdata = data_provider.ConsumeIntegral<uint8_t>();
423         if (matchinfo_argdata == 0) {
424           selector_string += "pcx"; // default value
425         }
426         /* to simplify I removed y, because it is almost the same as 'x'
427          * =>https://www.sqlite.org/fts3.html#matchinfo */
428         const char matchinfo_args[8] = {'p', 'c', 'n', 'a', 'l', 's', 'x', 'b'};
429         size_t matchinfo_counter = 0;
430         while (matchinfo_argdata > 0 &&
431                matchinfo_counter < sizeof(matchinfo_args)) {
432           if (matchinfo_argdata % 2) {
433             selector_string += matchinfo_args[matchinfo_counter];
434           }
435           matchinfo_argdata >>= 1;
436           matchinfo_counter++;
437         }
438 
439         selector_string += "') ";
440         break;
441       }
442       case 0x01:
443         selector_string += " snippet(f) ";
444         break;
445       case 0x02:
446         selector_string += " snippet(f, x'";
447         selector_string += GetValueByType(&data_provider, ColumnType::kBlob);
448         selector_string += "', x'";
449         selector_string += GetValueByType(&data_provider, ColumnType::kBlob);
450         selector_string += "', x'";
451         selector_string += GetValueByType(&data_provider, ColumnType::kBlob);
452         selector_string += "') ";
453         break;
454       case 0x03:
455         selector_string += " offsets(f) ";
456         break;
457       case 0x04:
458         selector_string += " * ";
459         break;
460       }
461 
462       op = "SELECT " + selector_string + " FROM f WHERE ";
463       op += (op_16.select_operator_1 ? "a" : "b");
464       uint16_t processor_byte =
465           0; /* don't read a byte when it compares with = (eq)*/
466       uint8_t current_bits = 0; /* 4 bits of data, maximum = 15*/
467       switch (op_16.select_operator_2 % 4) {
468       case 0: {
469         bool need_string = true;
470         uint8_t match_length = data_provider.ConsumeIntegralInRange<uint8_t>(
471             1, 10); /* parts involved in match */
472         op += " MATCH ";
473         while (match_length && data_provider.remaining_bytes()) {
474           if (need_string) {
475             /* asterisk cases like MATCH 'a*' will be automatically covered in
476              * this BLOB */
477             op += GetValueByType(&data_provider, ColumnType::kBlob);
478             need_string = false;
479           } else {
480             /*
481                  ignoring brackets to simplify the logic since they can be
482                converted into the equal forms.
483                  eg: a AND (b OR c) == b OR c AND a
484             */
485             switch (data_provider.ConsumeIntegralInRange<uint8_t>(0, 3)) {
486             case 0:
487               op += " AND ";
488               break;
489             case 1:
490               op += " OR ";
491               break;
492             case 2:
493               op += " NEAR ";
494               break;
495             default:
496             case 3:
497               op += " NEAR/";
498               op += GetValueByType(&data_provider, ColumnType::kShortNumber);
499               op += " ";
500               break;
501             }
502             need_string = true;
503           }
504           match_length--;
505         }
506         if (need_string) {
507           /* asterisk cases like MATCH 'a*' will be automatically covered in
508            * this BLOB */
509           op += GetValueByType(&data_provider, ColumnType::kBlob);
510           need_string = false;
511         }
512         op += ";";
513       } break;
514       case 1:
515         processor_byte = data_provider.ConsumeIntegral<uint16_t>();
516         op += " LIKE ";
517         while (processor_byte) {
518           current_bits = processor_byte % (1 << 4);
519           processor_byte >>= 4;
520           switch (current_bits) {
521           case 0:
522             op += "%";
523             break;
524           case 1:
525             op += "_";
526             break;
527           case 2:
528             op += " ";
529             break;
530           default:
531             op += ('a' + current_bits);
532             break;
533           }
534         }
535         op += "';";
536         break;
537       case 2:
538         op += " = 'a b';";
539         break;
540       case 3:
541         processor_byte = data_provider.ConsumeIntegral<uint16_t>();
542         op += " GLOB '";
543         while (processor_byte) {
544           current_bits = processor_byte % (1 << 4);
545           processor_byte >>= 4;
546           switch (current_bits) {
547           case 0:
548             op += "*";
549             break;
550           case 1:
551             op += "?";
552             break;
553           case 2:
554             op += "[AB]";
555             break;
556           case 3:
557             op += "[0-9]";
558             break;
559           case 4:
560             op += "[!A]";
561             break;
562           case 5:
563             op += "[!3-5]";
564             break;
565           case 6:
566             op += "\\";
567             break;
568           case 7:
569             op += "/";
570             break;
571           default: // alphabets
572             op += ('a' + current_bits);
573             break;
574           }
575         }
576         op += "';";
577         break;
578       } /* end of switch (op_16.select_operator_2 % 4) */
579       break;
580     }
581     case 0x05: {
582       std::string command;
583       std::ostringstream ss;
584       uint8_t command_operator =
585           (op_16.select_operator_2 << 1) + op_16.select_operator_1;
586       switch (command_operator % 5) {
587       case 0x00:
588         command = "optimize";
589         break;
590       case 0x01:
591         command = "rebuild";
592         break;
593       case 0x02:
594         command = "integrity-check";
595         break;
596       case 0x03:
597         ss << GetValueByType(&data_provider, ColumnType::kShortNumber) << ','
598            << GetValueByType(&data_provider, ColumnType::kShortNumber);
599         command = "merge=";
600         command += ss.str();
601         break;
602       case 0x04:
603         ss << GetValueByType(&data_provider, ColumnType::kShortNumber);
604         command = "automerge=";
605         command += ss.str();
606         break;
607       }
608       op = "INSERT INTO f(f) VALUES ('" + command + "');";
609     } break;
610     }
611 
612     RunSqlQuery(op, &exec_count);
613   }
614   /* Cleanup and return */
615 
616   sqlite3_close(g_database);
617 
618   return 0;
619 }
620 
621