1 /* Copyright 2019 Google Inc. All Rights Reserved.
2 ** Licensed under the Apache License, Version 2.0 (the "License");
3 **
4 ** Derived from the public domain SQLite (https://sqlite.org) sources.
5 */
6
7 #include <fuzzer/FuzzedDataProvider.h>
8 #include <stddef.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <cstdlib>
12 #include <iomanip>
13 #include <ios>
14 #include <iostream>
15 #include <sstream>
16 #include <string>
17
18 #include "third_party/sqlite/sqlite3.h"
19
20 sqlite3 *g_database = 0; /* The database connection */
21
22 enum class ColumnType {
23 kMin = 0,
24 kShortNumber = 0,
25 kBlob = 1,
26 kString = 2,
27 kAny = 3,
28 kMax = 3,
29 };
30
31 enum class TableOperator : unsigned long {
32 kMin = 0,
33 kShadowTableMin = 0,
34 kSegdir = 0,
35 kContent = 1,
36 kDocsize = 2,
37 kSegments = 3,
38 kStat = 4,
39 kShadowTableMax = 4,
40 // 5-7: reserved for further use
41 kFuzzTable = 8,
42 kMax = 8,
43 };
44
45 typedef struct {
46 uint8_t op_type : 4;
47 uint8_t column_op : 4;
48 uint8_t op_sql_operation : 4;
49 uint8_t select_operator_1 : 1;
50 uint8_t select_operator_2 : 3;
51 } opdata_16;
52
53 /*
54 ** Callback for sqlite3_exec().
55 */
ExecHandler(void * pCnt,int argc,char ** argv,char ** namev)56 static int ExecHandler(void *pCnt, int argc, char **argv, char **namev) {
57 return ((*static_cast<int *>(pCnt))--) <= 0;
58 }
59
GetOperator16(FuzzedDataProvider * data_provider,opdata_16 * op16)60 size_t GetOperator16(FuzzedDataProvider *data_provider, opdata_16 *op16) {
61 if (data_provider->remaining_bytes() < sizeof(uint16_t))
62 return 0;
63
64 uint16_t operator_data = data_provider->ConsumeIntegral<uint16_t>();
65 memcpy(op16, &operator_data, sizeof(uint16_t));
66 return sizeof(uint16_t);
67 }
68
GetValueByType(FuzzedDataProvider * data_provider,ColumnType type)69 std::string GetValueByType(FuzzedDataProvider *data_provider, ColumnType type) {
70 std::string out;
71 std::ostringstream ss;
72 switch (type) {
73 case ColumnType::kShortNumber: {
74 uint8_t number = data_provider->ConsumeIntegral<uint8_t>();
75 ss << static_cast<uint16_t>(number);
76 } break;
77 case ColumnType::kBlob: {
78 uint16_t length = data_provider->ConsumeIntegral<uint16_t>();
79 uint16_t value = 0;
80 if (length) {
81 ss << "x'" << std::hex;
82 for (uint16_t i = 0; i < length; i++) {
83 value = data_provider->ConsumeIntegral<uint8_t>();
84
85 if (data_provider->remaining_bytes() == 0)
86 break;
87
88 ss << std::setfill('0') << std::setw(2) << value;
89 }
90 ss << "'";
91 } else
92 return "x'00'";
93 } break;
94 default:
95 return "'NOT SUPPORTED'";
96 }
97 out = ss.str();
98 return out;
99 }
100
RunSqlQuery(std::string & query,int * exec_count)101 void RunSqlQuery(std::string &query, int *exec_count) {
102 static bool should_print = ::getenv("DUMP_NATIVE_INPUT");
103 if (should_print)
104 std::cout << query << std::endl;
105 char *zErrMsg = 0; /* Error message returned by sqlite_exec() */
106 sqlite3_exec(g_database, query.c_str(), ExecHandler,
107 static_cast<void *>(exec_count), &zErrMsg);
108 sqlite3_free(zErrMsg);
109 }
110
InitializeDB(FuzzedDataProvider * data_provider,int * exec_count)111 int InitializeDB(FuzzedDataProvider *data_provider, int *exec_count) {
112 int rc; /* Return code from various interfaces */
113 const char *icu_list[219] = {
114 "af_NA", "af_ZA", "ar_AE", "ar_BH", "ar_DZ",
115 "ar_EG", "ar_IQ", "ar_JO", "ar_KW", "ar_LB",
116 "ar_LY", "ar_MA", "ar_OM", "ar_QA", "ar_SA",
117 "ar_SD", "ar_SY", "ar_TN", "ar_YE", "as_IN",
118 "az_Latn", "az_Latn_AZ", "be_BY", "bg_BG", "bn_BD",
119 "bn_IN", "bs_BA", "ca_ES", "chr", "chr_US",
120 "cs_CZ", "cy_GB", "da_DK", "de_AT", "de_BE",
121 "de_CH", "de_DE", "de_LI", "de_LU", "el_CY",
122 "el_GR", "en", "en_AS", "en_AU", "en_BE",
123 "en_BW", "en_BZ", "en_CA", "en_GB", "en_GU",
124 "en_HK", "en_IE", "en_IN", "en_JM", "en_MH",
125 "en_MP", "en_MT", "en_MU", "en_NA", "en_NZ",
126 "en_PH", "en_PK", "en_SG", "en_TT", "en_UM",
127 "en_US", "en_US_POSIX", "en_VI", "en_ZA", "en_ZW",
128 "es_419", "es_AR", "es_BO", "es_CL", "es_CO",
129 "es_CR", "es_DO", "es_EC", "es_ES", "es_GQ",
130 "es_GT", "es_HN", "es_MX", "es_NI", "es_PA",
131 "es_PE", "es_PR", "es_PY", "es_SV", "es_US",
132 "es_UY", "es_VE", "et_EE", "fa_IR", "fi_FI",
133 "fil_PH", "fo_FO", "fr_BE", "fr_BF", "fr_BI",
134 "fr_BJ", "fr_BL", "fr_CA", "fr_CD", "fr_CF",
135 "fr_CG", "fr_CH", "fr_CI", "fr_CM", "fr_DJ",
136 "fr_FR", "fr_GA", "fr_GN", "fr_GP", "fr_GQ",
137 "fr_KM", "fr_LU", "fr_MC", "fr_MF", "fr_MG",
138 "fr_ML", "fr_MQ", "fr_NE", "fr_RE", "fr_RW",
139 "fr_SN", "fr_TD", "fr_TG", "ga", "ga_IE",
140 "gu_IN", "ha_Latn", "ha_Latn_GH", "ha_Latn_NE", "ha_Latn_NG",
141 "he_IL", "hi_IN", "hr_HR", "hu_HU", "hy_AM",
142 "id", "id_ID", "ig_NG", "is_IS", "it",
143 "it_CH", "it_IT", "ja_JP", "ka", "ka_GE",
144 "kk_KZ", "kl_GL", "kn_IN", "ko_KR", "kok_IN",
145 "lt_LT", "lv_LV", "mk_MK", "ml_IN", "mr_IN",
146 "ms", "ms_BN", "ms_MY", "mt_MT", "nb_NO",
147 "nlnl_BE", "nl_NL", "nn_NO", "om_ET", "om_KE",
148 "or_IN", "pa_Arab", "pa_Arab_PK", "pa_Guru", "pa_Guru_IN",
149 "pl_PL", "ps_AF", "pt", "pt_BR", "pt_PT",
150 "ro_MD", "ro_RO", "ru_MD", "ru_RU", "ru_UA",
151 "si_LK", "sk_SK", "sl_SI", "sq_AL", "sr_Cyrl",
152 "sr_Cyrl_BA", "sr_Cyrl_ME", "sr_Cyrl_RS", "sr_Latn_BA", "sr_Latn_ME",
153 "sr_Latn_RS", "sv_FI", "sv_SE", "sw", "sw_KE",
154 "sw_TZ", "ta_IN", "ta_LK", "te_IN", "th_TH",
155 "tr_TR", "uk_UA", "ur_IN", "ur_PK", "vi_VN",
156 "yo_NG", "zh_Hans", "zh_Hans_CN", "zh_Hans_SG", "zh_Hant_HK",
157 "zh_Hant_MO", "zh_Hant_TW", "zu", "zu_ZA"};
158
159 rc = sqlite3_initialize();
160 if (rc)
161 return rc;
162
163 /* Open the database connection. Only use an in-memory database. */
164 rc = sqlite3_open_v2(
165 "fuzz.db", &g_database,
166 SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_MEMORY, 0);
167 if (rc)
168 return rc;
169
170 /* enables foreign key constraints */
171 sqlite3_db_config(g_database, SQLITE_DBCONFIG_ENABLE_FKEY, 1, &rc);
172
173 /* disable defence-in-depth to simplify the fuzzing on shadow tables */
174 sqlite3_db_config(g_database, SQLITE_DBCONFIG_DEFENSIVE, 0, &rc);
175 if (rc)
176 return rc;
177
178 /* determine a limit on the number of output rows */
179 *exec_count = 0x3f;
180
181 /* Some initial queries */
182 std::string init_drop_db("DROP TABLE IF EXISTS f;");
183 std::string init_drop_docsize("DROP TABLE IF EXISTS 'f_docsize';");
184 std::string init_drop_stat("DROP TABLE IF EXISTS 'f_stat';");
185 std::string init_create_fts3 = "CREATE VIRTUAL TABLE f USING fts3(a,b";
186 switch (data_provider->ConsumeIntegralInRange<uint8_t>(0, 3)) {
187 case 1:
188 init_create_fts3 += ",tokenize=porter";
189 break;
190 case 2:
191 init_create_fts3 += ",tokenize=icu ";
192 init_create_fts3 +=
193 icu_list[(data_provider->ConsumeIntegralInRange<uint8_t>(
194 0, ((sizeof(icu_list) / sizeof(const char *) % 0x100) - 1)))];
195 break;
196 case 3:
197 init_create_fts3 += ",tokenize=icu";
198 break;
199 default:
200 case 0:
201 /*if we don't set tokenizer to anything, it will goes default simple
202 * tokenizer*/
203 break;
204 }
205 init_create_fts3 += ");";
206 std::string create_fake_docsize(
207 "CREATE TABLE 'f_docsize'(docid INTEGER PRIMARY KEY, size BLOB);");
208 std::string create_fake_stat(
209 "CREATE TABLE 'f_stat'(id INTEGER PRIMARY KEY, value BLOB);");
210 std::string init_set_initial_data("INSERT INTO f VALUES (1, '1234');");
211
212 RunSqlQuery(init_drop_db, exec_count);
213 RunSqlQuery(init_drop_docsize, exec_count);
214 RunSqlQuery(init_drop_stat, exec_count);
215 RunSqlQuery(init_create_fts3, exec_count);
216 RunSqlQuery(create_fake_docsize, exec_count);
217 RunSqlQuery(create_fake_stat, exec_count);
218 RunSqlQuery(init_set_initial_data, exec_count);
219
220 return rc;
221 }
222 /*
223 ** Main entry point. The fuzzer invokes this function with each
224 ** fuzzed input.
225 */
226
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)227 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
228 if (size < 3 || size > 0x1000000)
229 return 0; /* Early out if unsufficient or too much data */
230
231 FuzzedDataProvider data_provider(data, size);
232 int exec_count = 0; /* Abort row callback when count reaches zero */
233
234 if (InitializeDB(&data_provider, &exec_count))
235 return 0;
236
237 while (1) {
238 std::string op, target, target_column;
239 ColumnType target_column_type = ColumnType::kMin;
240
241 opdata_16 op_16;
242 size_t ret = GetOperator16(&data_provider, &op_16);
243
244 if (ret == 0)
245 break;
246 unsigned long table_operator_border =
247 static_cast<unsigned long>(TableOperator::kMax) + 1;
248 TableOperator op_type =
249 static_cast<TableOperator>(op_16.op_type % table_operator_border);
250
251 /*
252 1. choose a table and a column to fuzz.
253 */
254 bool op_on_shadow = (op_type <= TableOperator::kShadowTableMax);
255 if (op_on_shadow) {
256 switch (op_type) {
257 case TableOperator::kSegdir:
258 target = "f_segdir";
259 switch (op_16.column_op % 5) {
260 case 0x00:
261 target_column = " level ";
262 target_column_type = ColumnType::kShortNumber;
263 break;
264 case 0x01:
265 target_column = " start_block ";
266 target_column_type = ColumnType::kShortNumber;
267 break;
268 case 0x02:
269 target_column = " end_block ";
270 target_column_type = ColumnType::kShortNumber;
271 break;
272 case 0x03:
273 target_column = " leaves_end_block ";
274 target_column_type = ColumnType::kShortNumber;
275 break;
276 case 0x04:
277 target_column = " root ";
278 target_column_type = ColumnType::kBlob;
279 break;
280 }
281 break;
282
283 case TableOperator::kContent:
284 target = "f_content";
285 switch (op_16.column_op % 3) {
286 case 0x00:
287 target_column = " docid ";
288 target_column_type = ColumnType::kShortNumber;
289 break;
290 case 0x01:
291 target_column = " 'c0a' ";
292 target_column_type = ColumnType::kShortNumber;
293 break;
294 case 0x02:
295 target_column = " 'c1b' ";
296 target_column_type = ColumnType::kBlob;
297 break;
298 }
299 break;
300
301 case TableOperator::kDocsize:
302 target = "f_docsize";
303 switch (op_16.column_op % 2) {
304 case 0x00:
305 target_column = " docid ";
306 target_column_type = ColumnType::kShortNumber;
307 break;
308 case 0x01:
309 target_column = " size ";
310 target_column_type = ColumnType::kBlob;
311 break;
312 }
313 break;
314
315 case TableOperator::kSegments:
316 target = "f_segments";
317 switch (op_16.column_op % 2) {
318 case 0x00:
319 target_column = " blockid ";
320 target_column_type = ColumnType::kShortNumber;
321 break;
322 case 0x01:
323 target_column = " block ";
324 target_column_type = ColumnType::kBlob;
325 break;
326 }
327 break;
328
329 case TableOperator::kStat:
330 default:
331 target = "f_stat";
332 switch (op_16.column_op % 2) {
333 case 0x00:
334 target_column = " id ";
335 target_column_type = ColumnType::kShortNumber;
336 break;
337 case 0x01:
338 target_column = " value ";
339 target_column_type = ColumnType::kBlob;
340 break;
341 }
342 break;
343 }
344 } else {
345 target = "f";
346 switch (op_16.column_op % 2) {
347 case 0x00:
348 target_column = " a ";
349 target_column_type = ColumnType::kShortNumber;
350 break;
351 case 0x01:
352 target_column = " b ";
353 target_column_type = ColumnType::kBlob;
354 break;
355 }
356 op_type = TableOperator::kFuzzTable;
357 }
358
359 /*
360 2. choose a verb and generate some data if needed
361 */
362 switch (op_16.op_sql_operation % 6) {
363 case 0x01:
364 op = "UPDATE " + target + " SET " + target_column + " = ";
365 op += GetValueByType(&data_provider, target_column_type);
366 op += " WHERE " + target_column + " IN (SELECT " + target_column;
367 op += " FROM " + target + " LIMIT 1 OFFSET ";
368 op += GetValueByType(&data_provider, ColumnType::kShortNumber);
369 op += ");";
370 break;
371
372 case 0x00:
373 case 0x02: {
374 std::ostringstream ss;
375 switch (op_type) {
376 case TableOperator::kSegdir:
377 ss << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
378 << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
379 << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
380 << GetValueByType(&data_provider, ColumnType::kShortNumber) << ",'"
381 << GetValueByType(&data_provider, ColumnType::kShortNumber) << " "
382 << GetValueByType(&data_provider, ColumnType::kShortNumber) << "',"
383 << GetValueByType(&data_provider, ColumnType::kBlob);
384 break;
385 case TableOperator::kContent:
386 ss << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
387 << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
388 << GetValueByType(&data_provider, ColumnType::kBlob);
389 break;
390 default:
391 /*
392 All other tables have the same type, so to simplify, use this
393 default instead.
394 BLOB is almost the same as STRING and it avoids the annoying
395 encoding problem, so choose BLOB instead when some columns
396 need STRING
397 */
398 ss << GetValueByType(&data_provider, ColumnType::kShortNumber) << ","
399 << GetValueByType(&data_provider, ColumnType::kBlob);
400 break;
401 }
402 op = "INSERT INTO " + target + " VALUES (" + ss.str() + ");";
403 } break;
404
405 case 0x03:
406 op = "DELETE FROM f WHERE ";
407 op += (op_16.select_operator_1 ? "a" : "b");
408 op += "=";
409 op += GetValueByType(&data_provider, op_16.select_operator_1
410 ? ColumnType::kShortNumber
411 : ColumnType::kBlob);
412 op += ";";
413 break;
414
415 case 0x04: {
416 uint8_t selector_indicator =
417 data_provider.ConsumeIntegralInRange<uint8_t>(0, 4);
418 std::string selector_string;
419 switch (selector_indicator) {
420 case 0x00: {
421 selector_string = " matchinfo( f , '";
422 uint8_t matchinfo_argdata = data_provider.ConsumeIntegral<uint8_t>();
423 if (matchinfo_argdata == 0) {
424 selector_string += "pcx"; // default value
425 }
426 /* to simplify I removed y, because it is almost the same as 'x'
427 * =>https://www.sqlite.org/fts3.html#matchinfo */
428 const char matchinfo_args[8] = {'p', 'c', 'n', 'a', 'l', 's', 'x', 'b'};
429 size_t matchinfo_counter = 0;
430 while (matchinfo_argdata > 0 &&
431 matchinfo_counter < sizeof(matchinfo_args)) {
432 if (matchinfo_argdata % 2) {
433 selector_string += matchinfo_args[matchinfo_counter];
434 }
435 matchinfo_argdata >>= 1;
436 matchinfo_counter++;
437 }
438
439 selector_string += "') ";
440 break;
441 }
442 case 0x01:
443 selector_string += " snippet(f) ";
444 break;
445 case 0x02:
446 selector_string += " snippet(f, x'";
447 selector_string += GetValueByType(&data_provider, ColumnType::kBlob);
448 selector_string += "', x'";
449 selector_string += GetValueByType(&data_provider, ColumnType::kBlob);
450 selector_string += "', x'";
451 selector_string += GetValueByType(&data_provider, ColumnType::kBlob);
452 selector_string += "') ";
453 break;
454 case 0x03:
455 selector_string += " offsets(f) ";
456 break;
457 case 0x04:
458 selector_string += " * ";
459 break;
460 }
461
462 op = "SELECT " + selector_string + " FROM f WHERE ";
463 op += (op_16.select_operator_1 ? "a" : "b");
464 uint16_t processor_byte =
465 0; /* don't read a byte when it compares with = (eq)*/
466 uint8_t current_bits = 0; /* 4 bits of data, maximum = 15*/
467 switch (op_16.select_operator_2 % 4) {
468 case 0: {
469 bool need_string = true;
470 uint8_t match_length = data_provider.ConsumeIntegralInRange<uint8_t>(
471 1, 10); /* parts involved in match */
472 op += " MATCH ";
473 while (match_length && data_provider.remaining_bytes()) {
474 if (need_string) {
475 /* asterisk cases like MATCH 'a*' will be automatically covered in
476 * this BLOB */
477 op += GetValueByType(&data_provider, ColumnType::kBlob);
478 need_string = false;
479 } else {
480 /*
481 ignoring brackets to simplify the logic since they can be
482 converted into the equal forms.
483 eg: a AND (b OR c) == b OR c AND a
484 */
485 switch (data_provider.ConsumeIntegralInRange<uint8_t>(0, 3)) {
486 case 0:
487 op += " AND ";
488 break;
489 case 1:
490 op += " OR ";
491 break;
492 case 2:
493 op += " NEAR ";
494 break;
495 default:
496 case 3:
497 op += " NEAR/";
498 op += GetValueByType(&data_provider, ColumnType::kShortNumber);
499 op += " ";
500 break;
501 }
502 need_string = true;
503 }
504 match_length--;
505 }
506 if (need_string) {
507 /* asterisk cases like MATCH 'a*' will be automatically covered in
508 * this BLOB */
509 op += GetValueByType(&data_provider, ColumnType::kBlob);
510 need_string = false;
511 }
512 op += ";";
513 } break;
514 case 1:
515 processor_byte = data_provider.ConsumeIntegral<uint16_t>();
516 op += " LIKE ";
517 while (processor_byte) {
518 current_bits = processor_byte % (1 << 4);
519 processor_byte >>= 4;
520 switch (current_bits) {
521 case 0:
522 op += "%";
523 break;
524 case 1:
525 op += "_";
526 break;
527 case 2:
528 op += " ";
529 break;
530 default:
531 op += ('a' + current_bits);
532 break;
533 }
534 }
535 op += "';";
536 break;
537 case 2:
538 op += " = 'a b';";
539 break;
540 case 3:
541 processor_byte = data_provider.ConsumeIntegral<uint16_t>();
542 op += " GLOB '";
543 while (processor_byte) {
544 current_bits = processor_byte % (1 << 4);
545 processor_byte >>= 4;
546 switch (current_bits) {
547 case 0:
548 op += "*";
549 break;
550 case 1:
551 op += "?";
552 break;
553 case 2:
554 op += "[AB]";
555 break;
556 case 3:
557 op += "[0-9]";
558 break;
559 case 4:
560 op += "[!A]";
561 break;
562 case 5:
563 op += "[!3-5]";
564 break;
565 case 6:
566 op += "\\";
567 break;
568 case 7:
569 op += "/";
570 break;
571 default: // alphabets
572 op += ('a' + current_bits);
573 break;
574 }
575 }
576 op += "';";
577 break;
578 } /* end of switch (op_16.select_operator_2 % 4) */
579 break;
580 }
581 case 0x05: {
582 std::string command;
583 std::ostringstream ss;
584 uint8_t command_operator =
585 (op_16.select_operator_2 << 1) + op_16.select_operator_1;
586 switch (command_operator % 5) {
587 case 0x00:
588 command = "optimize";
589 break;
590 case 0x01:
591 command = "rebuild";
592 break;
593 case 0x02:
594 command = "integrity-check";
595 break;
596 case 0x03:
597 ss << GetValueByType(&data_provider, ColumnType::kShortNumber) << ','
598 << GetValueByType(&data_provider, ColumnType::kShortNumber);
599 command = "merge=";
600 command += ss.str();
601 break;
602 case 0x04:
603 ss << GetValueByType(&data_provider, ColumnType::kShortNumber);
604 command = "automerge=";
605 command += ss.str();
606 break;
607 }
608 op = "INSERT INTO f(f) VALUES ('" + command + "');";
609 } break;
610 }
611
612 RunSqlQuery(op, &exec_count);
613 }
614 /* Cleanup and return */
615
616 sqlite3_close(g_database);
617
618 return 0;
619 }
620
621