1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of TokuDB
6
7
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 Copyright (c) 2020, MariaDB Corporation.
10
11 TokuDBis is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License, version 2,
13 as published by the Free Software Foundation.
14
15 TokuDB is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with TokuDB. If not, see <http://www.gnu.org/licenses/>.
22
23 ======= */
24
25 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
26
27 #include "hatoku_cmp.h"
28
29 #ifdef WORDS_BIGENDIAN
30 #error "WORDS_BIGENDIAN not supported"
31 #endif
32
33 // returns true if the field is a valid field to be used
34 // in a TokuDB table. The non-valid fields are those
35 // that have been deprecated since before 5.1, and can
36 // only exist through upgrades of old versions of MySQL
field_valid_for_tokudb_table(Field * field)37 static bool field_valid_for_tokudb_table(Field* field) {
38 bool ret_val = false;
39 enum_field_types mysql_type = field->real_type();
40 switch (mysql_type) {
41 case MYSQL_TYPE_LONG:
42 case MYSQL_TYPE_LONGLONG:
43 case MYSQL_TYPE_TINY:
44 case MYSQL_TYPE_SHORT:
45 case MYSQL_TYPE_INT24:
46 case MYSQL_TYPE_DATE:
47 case MYSQL_TYPE_YEAR:
48 case MYSQL_TYPE_NEWDATE:
49 case MYSQL_TYPE_ENUM:
50 case MYSQL_TYPE_SET:
51 case MYSQL_TYPE_TIME:
52 case MYSQL_TYPE_DATETIME:
53 case MYSQL_TYPE_TIMESTAMP:
54 case MYSQL_TYPE_DOUBLE:
55 case MYSQL_TYPE_FLOAT:
56 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
57 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
58 (100000 <= MYSQL_VERSION_ID)
59 case MYSQL_TYPE_DATETIME2:
60 case MYSQL_TYPE_TIMESTAMP2:
61 case MYSQL_TYPE_TIME2:
62 #endif
63 case MYSQL_TYPE_NEWDECIMAL:
64 case MYSQL_TYPE_BIT:
65 case MYSQL_TYPE_STRING:
66 case MYSQL_TYPE_VARCHAR:
67 case MYSQL_TYPE_TINY_BLOB:
68 case MYSQL_TYPE_MEDIUM_BLOB:
69 case MYSQL_TYPE_BLOB:
70 case MYSQL_TYPE_LONG_BLOB:
71 ret_val = true;
72 goto exit;
73 //
74 // I believe these are old types that are no longer
75 // in any 5.1 tables, so tokudb does not need
76 // to worry about them
77 // Putting in this assert in case I am wrong.
78 // Do not support geometry yet.
79 //
80 case MYSQL_TYPE_GEOMETRY:
81 case MYSQL_TYPE_DECIMAL:
82 case MYSQL_TYPE_VAR_STRING:
83 case MYSQL_TYPE_NULL:
84 case MYSQL_TYPE_VARCHAR_COMPRESSED:
85 case MYSQL_TYPE_BLOB_COMPRESSED:
86 ret_val = false;
87 }
88 exit:
89 return ret_val;
90 }
91
get_var_field_info(uint32_t * field_len,uint32_t * start_offset,uint32_t var_field_index,const uchar * var_field_offset_ptr,uint32_t num_offset_bytes)92 static void get_var_field_info(
93 uint32_t* field_len, // output: length of field
94 uint32_t* start_offset, // output, length of offset where data starts
95 uint32_t var_field_index, //input, index of var field we want info on
96 const uchar* var_field_offset_ptr, //input, pointer to where offset information for all var fields begins
97 uint32_t num_offset_bytes //input, number of bytes used to store offsets starting at var_field_offset_ptr
98 )
99 {
100 uint32_t data_start_offset = 0;
101 uint32_t data_end_offset = 0;
102 switch (num_offset_bytes) {
103 case (1):
104 data_end_offset = (var_field_offset_ptr + var_field_index)[0];
105 break;
106 case (2):
107 data_end_offset = uint2korr(var_field_offset_ptr + 2*var_field_index);
108 break;
109 default:
110 assert_unreachable();
111 }
112
113 if (var_field_index) {
114 switch (num_offset_bytes) {
115 case (1):
116 data_start_offset = (var_field_offset_ptr + var_field_index - 1)[0];
117 break;
118 case (2):
119 data_start_offset = uint2korr(var_field_offset_ptr + 2*(var_field_index-1));
120 break;
121 default:
122 assert_unreachable();
123 }
124 }
125 else {
126 data_start_offset = 0;
127 }
128
129 *start_offset = data_start_offset;
130 assert_always(data_end_offset >= data_start_offset);
131 *field_len = data_end_offset - data_start_offset;
132 }
133
get_blob_field_info(uint32_t * start_offset,uint32_t len_of_offsets,const uchar * var_field_data_ptr,uint32_t num_offset_bytes)134 static void get_blob_field_info(
135 uint32_t* start_offset,
136 uint32_t len_of_offsets,
137 const uchar* var_field_data_ptr,
138 uint32_t num_offset_bytes
139 )
140 {
141 uint32_t data_end_offset;
142 //
143 // need to set var_field_data_ptr to point to beginning of blobs, which
144 // is at the end of the var stuff (if they exist), if var stuff does not exist
145 // then the bottom variable will be 0, and var_field_data_ptr is already
146 // set correctly
147 //
148 if (len_of_offsets) {
149 switch (num_offset_bytes) {
150 case (1):
151 data_end_offset = (var_field_data_ptr - 1)[0];
152 break;
153 case (2):
154 data_end_offset = uint2korr(var_field_data_ptr - 2);
155 break;
156 default:
157 assert_unreachable();
158 }
159 }
160 else {
161 data_end_offset = 0;
162 }
163 *start_offset = data_end_offset;
164 }
165
166
167 // this function is pattern matched from
168 // InnoDB's get_innobase_type_from_mysql_type
mysql_to_toku_type(Field * field)169 static TOKU_TYPE mysql_to_toku_type (Field* field) {
170 TOKU_TYPE ret_val = toku_type_unknown;
171 enum_field_types mysql_type = field->real_type();
172 switch (mysql_type) {
173 case MYSQL_TYPE_LONG:
174 case MYSQL_TYPE_LONGLONG:
175 case MYSQL_TYPE_TINY:
176 case MYSQL_TYPE_SHORT:
177 case MYSQL_TYPE_INT24:
178 case MYSQL_TYPE_DATE:
179 case MYSQL_TYPE_YEAR:
180 case MYSQL_TYPE_NEWDATE:
181 case MYSQL_TYPE_ENUM:
182 case MYSQL_TYPE_SET:
183 ret_val = toku_type_int;
184 goto exit;
185 case MYSQL_TYPE_TIME:
186 case MYSQL_TYPE_DATETIME:
187 case MYSQL_TYPE_TIMESTAMP:
188 #ifdef MARIADB_BASE_VERSION
189 // case to handle fractional seconds in MariaDB
190 //
191 if (field->key_type() == HA_KEYTYPE_BINARY) {
192 ret_val = toku_type_fixbinary;
193 goto exit;
194 }
195 #endif
196 ret_val = toku_type_int;
197 goto exit;
198 case MYSQL_TYPE_DOUBLE:
199 ret_val = toku_type_double;
200 goto exit;
201 case MYSQL_TYPE_FLOAT:
202 ret_val = toku_type_float;
203 goto exit;
204 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
205 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
206 (100000 <= MYSQL_VERSION_ID)
207 case MYSQL_TYPE_DATETIME2:
208 case MYSQL_TYPE_TIMESTAMP2:
209 case MYSQL_TYPE_TIME2:
210 #endif
211 case MYSQL_TYPE_NEWDECIMAL:
212 case MYSQL_TYPE_BIT:
213 ret_val = toku_type_fixbinary;
214 goto exit;
215 case MYSQL_TYPE_STRING:
216 if (field->binary()) {
217 ret_val = toku_type_fixbinary;
218 }
219 else {
220 ret_val = toku_type_fixstring;
221 }
222 goto exit;
223 case MYSQL_TYPE_VARCHAR:
224 if (field->binary()) {
225 ret_val = toku_type_varbinary;
226 }
227 else {
228 ret_val = toku_type_varstring;
229 }
230 goto exit;
231 case MYSQL_TYPE_TINY_BLOB:
232 case MYSQL_TYPE_MEDIUM_BLOB:
233 case MYSQL_TYPE_BLOB:
234 case MYSQL_TYPE_LONG_BLOB:
235 ret_val = toku_type_blob;
236 goto exit;
237 //
238 // I believe these are old types that are no longer
239 // in any 5.1 tables, so tokudb does not need
240 // to worry about them
241 // Putting in this assert in case I am wrong.
242 // Do not support geometry yet.
243 //
244 case MYSQL_TYPE_GEOMETRY:
245 case MYSQL_TYPE_DECIMAL:
246 case MYSQL_TYPE_VAR_STRING:
247 case MYSQL_TYPE_NULL:
248 case MYSQL_TYPE_VARCHAR_COMPRESSED:
249 case MYSQL_TYPE_BLOB_COMPRESSED:
250 assert_unreachable();
251 }
252 exit:
253 return ret_val;
254 }
255
256
get_charset_from_num(uint32_t charset_number)257 static inline CHARSET_INFO* get_charset_from_num (uint32_t charset_number) {
258 //
259 // patternmatched off of InnoDB, due to MySQL bug 42649
260 //
261 if (charset_number == default_charset_info->number) {
262 return default_charset_info;
263 }
264 else if (charset_number == my_charset_latin1.number) {
265 return &my_charset_latin1;
266 }
267 else {
268 return get_charset(charset_number, MYF(MY_WME));
269 }
270 }
271
272
273
274 //
275 // used to read the length of a variable sized field in a tokudb key (buf).
276 //
get_length_from_var_tokudata(uchar * buf,uint32_t length_bytes)277 static inline uint32_t get_length_from_var_tokudata (uchar* buf, uint32_t length_bytes) {
278 uint32_t length = (uint32_t)(buf[0]);
279 if (length_bytes == 2) {
280 uint32_t rest_of_length = (uint32_t)buf[1];
281 length += rest_of_length<<8;
282 }
283 return length;
284 }
285
286 //
287 // used to deduce the number of bytes used to store the length of a varstring/varbinary
288 // in a key field stored in tokudb
289 //
get_length_bytes_from_max(uint32_t max_num_bytes)290 static inline uint32_t get_length_bytes_from_max(uint32_t max_num_bytes) {
291 return (max_num_bytes > 255) ? 2 : 1;
292 }
293
294
295
296 //
297 // assuming MySQL in little endian, and we are storing in little endian
298 //
pack_toku_int(uchar * to_tokudb,uchar * from_mysql,uint32_t num_bytes)299 static inline uchar* pack_toku_int (uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
300 switch (num_bytes) {
301 case (1):
302 memcpy(to_tokudb, from_mysql, 1);
303 break;
304 case (2):
305 memcpy(to_tokudb, from_mysql, 2);
306 break;
307 case (3):
308 memcpy(to_tokudb, from_mysql, 3);
309 break;
310 case (4):
311 memcpy(to_tokudb, from_mysql, 4);
312 break;
313 case (8):
314 memcpy(to_tokudb, from_mysql, 8);
315 break;
316 default:
317 assert_unreachable();
318 }
319 return to_tokudb+num_bytes;
320 }
321
322 //
323 // assuming MySQL in little endian, and we are unpacking to little endian
324 //
unpack_toku_int(uchar * to_mysql,uchar * from_tokudb,uint32_t num_bytes)325 static inline uchar* unpack_toku_int(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
326 switch (num_bytes) {
327 case (1):
328 memcpy(to_mysql, from_tokudb, 1);
329 break;
330 case (2):
331 memcpy(to_mysql, from_tokudb, 2);
332 break;
333 case (3):
334 memcpy(to_mysql, from_tokudb, 3);
335 break;
336 case (4):
337 memcpy(to_mysql, from_tokudb, 4);
338 break;
339 case (8):
340 memcpy(to_mysql, from_tokudb, 8);
341 break;
342 default:
343 assert_unreachable();
344 }
345 return from_tokudb+num_bytes;
346 }
347
cmp_toku_int(uchar * a_buf,uchar * b_buf,bool is_unsigned,uint32_t num_bytes)348 static inline int cmp_toku_int (uchar* a_buf, uchar* b_buf, bool is_unsigned, uint32_t num_bytes) {
349 int ret_val = 0;
350 //
351 // case for unsigned integers
352 //
353 if (is_unsigned) {
354 uint32_t a_num, b_num = 0;
355 uint64_t a_big_num, b_big_num = 0;
356 switch (num_bytes) {
357 case (1):
358 a_num = *a_buf;
359 b_num = *b_buf;
360 ret_val = a_num-b_num;
361 goto exit;
362 case (2):
363 a_num = uint2korr(a_buf);
364 b_num = uint2korr(b_buf);
365 ret_val = a_num-b_num;
366 goto exit;
367 case (3):
368 a_num = tokudb_uint3korr(a_buf);
369 b_num = tokudb_uint3korr(b_buf);
370 ret_val = a_num-b_num;
371 goto exit;
372 case (4):
373 a_num = uint4korr(a_buf);
374 b_num = uint4korr(b_buf);
375 if (a_num < b_num) {
376 ret_val = -1; goto exit;
377 }
378 if (a_num > b_num) {
379 ret_val = 1; goto exit;
380 }
381 ret_val = 0;
382 goto exit;
383 case (8):
384 a_big_num = uint8korr(a_buf);
385 b_big_num = uint8korr(b_buf);
386 if (a_big_num < b_big_num) {
387 ret_val = -1; goto exit;
388 }
389 else if (a_big_num > b_big_num) {
390 ret_val = 1; goto exit;
391 }
392 ret_val = 0;
393 goto exit;
394 default:
395 assert_unreachable();
396 }
397 }
398 //
399 // case for signed integers
400 //
401 else {
402 int32_t a_num, b_num = 0;
403 int64_t a_big_num, b_big_num = 0;
404 switch (num_bytes) {
405 case (1):
406 a_num = *(signed char *)a_buf;
407 b_num = *(signed char *)b_buf;
408 ret_val = a_num-b_num;
409 goto exit;
410 case (2):
411 a_num = sint2korr(a_buf);
412 b_num = sint2korr(b_buf);
413 ret_val = a_num-b_num;
414 goto exit;
415 case (3):
416 a_num = sint3korr(a_buf);
417 b_num = sint3korr(b_buf);
418 ret_val = a_num - b_num;
419 goto exit;
420 case (4):
421 a_num = sint4korr(a_buf);
422 b_num = sint4korr(b_buf);
423 if (a_num < b_num) {
424 ret_val = -1; goto exit;
425 }
426 if (a_num > b_num) {
427 ret_val = 1; goto exit;
428 }
429 ret_val = 0;
430 goto exit;
431 case (8):
432 a_big_num = sint8korr(a_buf);
433 b_big_num = sint8korr(b_buf);
434 if (a_big_num < b_big_num) {
435 ret_val = -1; goto exit;
436 }
437 else if (a_big_num > b_big_num) {
438 ret_val = 1; goto exit;
439 }
440 ret_val = 0;
441 goto exit;
442 default:
443 assert_unreachable();
444 }
445 }
446 //
447 // if this is hit, indicates bug in writing of this function
448 //
449 assert_unreachable();
450 exit:
451 return ret_val;
452 }
453
pack_toku_double(uchar * to_tokudb,uchar * from_mysql)454 static inline uchar* pack_toku_double (uchar* to_tokudb, uchar* from_mysql) {
455 memcpy(to_tokudb, from_mysql, sizeof(double));
456 return to_tokudb + sizeof(double);
457 }
458
459
unpack_toku_double(uchar * to_mysql,uchar * from_tokudb)460 static inline uchar* unpack_toku_double(uchar* to_mysql, uchar* from_tokudb) {
461 memcpy(to_mysql, from_tokudb, sizeof(double));
462 return from_tokudb + sizeof(double);
463 }
464
cmp_toku_double(uchar * a_buf,uchar * b_buf)465 static inline int cmp_toku_double(uchar* a_buf, uchar* b_buf) {
466 int ret_val;
467 double a_num;
468 double b_num;
469 doubleget(a_num, a_buf);
470 doubleget(b_num, b_buf);
471 if (a_num < b_num) {
472 ret_val = -1;
473 goto exit;
474 }
475 else if (a_num > b_num) {
476 ret_val = 1;
477 goto exit;
478 }
479 ret_val = 0;
480 exit:
481 return ret_val;
482 }
483
484
pack_toku_float(uchar * to_tokudb,uchar * from_mysql)485 static inline uchar* pack_toku_float (uchar* to_tokudb, uchar* from_mysql) {
486 memcpy(to_tokudb, from_mysql, sizeof(float));
487 return to_tokudb + sizeof(float);
488 }
489
490
unpack_toku_float(uchar * to_mysql,uchar * from_tokudb)491 static inline uchar* unpack_toku_float(uchar* to_mysql, uchar* from_tokudb) {
492 memcpy(to_mysql, from_tokudb, sizeof(float));
493 return from_tokudb + sizeof(float);
494 }
495
cmp_toku_float(uchar * a_buf,uchar * b_buf)496 static inline int cmp_toku_float(uchar* a_buf, uchar* b_buf) {
497 int ret_val;
498 float a_num;
499 float b_num;
500 //
501 // This is the way Field_float::cmp gets the floats from the buffers
502 //
503 memcpy(&a_num, a_buf, sizeof(float));
504 memcpy(&b_num, b_buf, sizeof(float));
505 if (a_num < b_num) {
506 ret_val = -1;
507 goto exit;
508 }
509 else if (a_num > b_num) {
510 ret_val = 1;
511 goto exit;
512 }
513 ret_val = 0;
514 exit:
515 return ret_val;
516 }
517
518
pack_toku_binary(uchar * to_tokudb,uchar * from_mysql,uint32_t num_bytes)519 static inline uchar* pack_toku_binary(uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
520 memcpy(to_tokudb, from_mysql, num_bytes);
521 return to_tokudb + num_bytes;
522 }
523
unpack_toku_binary(uchar * to_mysql,uchar * from_tokudb,uint32_t num_bytes)524 static inline uchar* unpack_toku_binary(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
525 memcpy(to_mysql, from_tokudb, num_bytes);
526 return from_tokudb + num_bytes;
527 }
528
529
cmp_toku_binary(uchar * a_buf,uint32_t a_num_bytes,uchar * b_buf,uint32_t b_num_bytes)530 static inline int cmp_toku_binary(
531 uchar* a_buf,
532 uint32_t a_num_bytes,
533 uchar* b_buf,
534 uint32_t b_num_bytes
535 )
536 {
537 int ret_val = 0;
538 uint32_t num_bytes_to_cmp = (a_num_bytes < b_num_bytes) ? a_num_bytes : b_num_bytes;
539 ret_val = memcmp(a_buf, b_buf, num_bytes_to_cmp);
540 if ((ret_val != 0) || (a_num_bytes == b_num_bytes)) {
541 goto exit;
542 }
543 if (a_num_bytes < b_num_bytes) {
544 ret_val = -1;
545 goto exit;
546 }
547 else {
548 ret_val = 1;
549 goto exit;
550 }
551 exit:
552 return ret_val;
553 }
554
555 //
556 // partially copied from below
557 //
pack_toku_varbinary_from_desc(uchar * to_tokudb,const uchar * from_desc,uint32_t key_part_length,uint32_t field_length)558 static uchar* pack_toku_varbinary_from_desc(
559 uchar* to_tokudb,
560 const uchar* from_desc,
561 uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
562 uint32_t field_length //length of field
563 )
564 {
565 uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
566 uint32_t length = field_length;
567 set_if_smaller(length, key_part_length);
568
569 //
570 // copy the length bytes, assuming both are in little endian
571 //
572 to_tokudb[0] = (uchar)length & 255;
573 if (length_bytes_in_tokudb > 1) {
574 to_tokudb[1] = (uchar) (length >> 8);
575 }
576 //
577 // copy the string
578 //
579 memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
580 return to_tokudb + length + length_bytes_in_tokudb;
581 }
582
pack_toku_varbinary(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes)583 static inline uchar* pack_toku_varbinary(
584 uchar* to_tokudb,
585 uchar* from_mysql,
586 uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
587 uint32_t max_num_bytes
588 )
589 {
590 uint32_t length = 0;
591 uint32_t length_bytes_in_tokudb;
592 switch (length_bytes_in_mysql) {
593 case (0):
594 length = max_num_bytes;
595 break;
596 case (1):
597 length = (uint32_t)(*from_mysql);
598 break;
599 case (2):
600 length = uint2korr(from_mysql);
601 break;
602 case (3):
603 length = tokudb_uint3korr(from_mysql);
604 break;
605 case (4):
606 length = uint4korr(from_mysql);
607 break;
608 }
609
610 //
611 // from this point on, functionality equivalent to pack_toku_varbinary_from_desc
612 //
613 set_if_smaller(length,max_num_bytes);
614
615 length_bytes_in_tokudb = get_length_bytes_from_max(max_num_bytes);
616 //
617 // copy the length bytes, assuming both are in little endian
618 //
619 to_tokudb[0] = (uchar)length & 255;
620 if (length_bytes_in_tokudb > 1) {
621 to_tokudb[1] = (uchar) (length >> 8);
622 }
623 //
624 // copy the string
625 //
626 memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
627 return to_tokudb + length + length_bytes_in_tokudb;
628 }
629
unpack_toku_varbinary(uchar * to_mysql,uchar * from_tokudb,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql)630 static inline uchar* unpack_toku_varbinary(
631 uchar* to_mysql,
632 uchar* from_tokudb,
633 uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
634 uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
635 )
636 {
637 uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
638
639 //
640 // copy the length into the mysql buffer
641 //
642 switch (length_bytes_in_mysql) {
643 case (0):
644 break;
645 case (1):
646 *to_mysql = (uchar) length;
647 break;
648 case (2):
649 int2store(to_mysql, length);
650 break;
651 case (3):
652 int3store(to_mysql, length);
653 break;
654 case (4):
655 int4store(to_mysql, length);
656 break;
657 default:
658 assert_unreachable();
659 }
660 //
661 // copy the binary data
662 //
663 memcpy(to_mysql + length_bytes_in_mysql, from_tokudb + length_bytes_in_tokudb, length);
664 return from_tokudb + length_bytes_in_tokudb+ length;
665 }
666
cmp_toku_varbinary(uchar * a_buf,uchar * b_buf,uint32_t length_bytes,uint32_t * a_bytes_read,uint32_t * b_bytes_read)667 static inline int cmp_toku_varbinary(
668 uchar* a_buf,
669 uchar* b_buf,
670 uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
671 uint32_t* a_bytes_read,
672 uint32_t* b_bytes_read
673 )
674 {
675 int ret_val = 0;
676 uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
677 uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
678 ret_val = cmp_toku_binary(
679 a_buf + length_bytes,
680 a_len,
681 b_buf + length_bytes,
682 b_len
683 );
684 *a_bytes_read = a_len + length_bytes;
685 *b_bytes_read = b_len + length_bytes;
686 return ret_val;
687 }
688
pack_toku_blob(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes,const CHARSET_INFO * charset)689 static inline uchar* pack_toku_blob(
690 uchar* to_tokudb,
691 uchar* from_mysql,
692 uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
693 uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
694 uint32_t max_num_bytes,
695 #if MYSQL_VERSION_ID >= 50600
696 const CHARSET_INFO* charset
697 #else
698 CHARSET_INFO* charset
699 #endif
700 )
701 {
702 uint32_t length = 0;
703 uint32_t local_char_length = 0;
704 uchar* blob_buf = NULL;
705
706 switch (length_bytes_in_mysql) {
707 case (0):
708 length = max_num_bytes;
709 break;
710 case (1):
711 length = (uint32_t)(*from_mysql);
712 break;
713 case (2):
714 length = uint2korr(from_mysql);
715 break;
716 case (3):
717 length = tokudb_uint3korr(from_mysql);
718 break;
719 case (4):
720 length = uint4korr(from_mysql);
721 break;
722 }
723 set_if_smaller(length,max_num_bytes);
724
725 memcpy(&blob_buf,from_mysql+length_bytes_in_mysql,sizeof(uchar *));
726
727 local_char_length= ((charset->mbmaxlen > 1) ?
728 max_num_bytes/charset->mbmaxlen : max_num_bytes);
729 if (length > local_char_length)
730 {
731 local_char_length= charset->charpos(
732 blob_buf,
733 blob_buf+length,
734 local_char_length
735 );
736 set_if_smaller(length, local_char_length);
737 }
738
739
740 //
741 // copy the length bytes, assuming both are in little endian
742 //
743 to_tokudb[0] = (uchar)length & 255;
744 if (length_bytes_in_tokudb > 1) {
745 to_tokudb[1] = (uchar) (length >> 8);
746 }
747 //
748 // copy the string
749 //
750 memcpy(to_tokudb + length_bytes_in_tokudb, blob_buf, length);
751 return to_tokudb + length + length_bytes_in_tokudb;
752 }
753
754
unpack_toku_blob(uchar * to_mysql,uchar * from_tokudb,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql)755 static inline uchar* unpack_toku_blob(
756 uchar* to_mysql,
757 uchar* from_tokudb,
758 uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
759 uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
760 )
761 {
762 uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
763 uchar* blob_pos = NULL;
764 //
765 // copy the length into the mysql buffer
766 //
767 switch (length_bytes_in_mysql) {
768 case (0):
769 break;
770 case (1):
771 *to_mysql = (uchar) length;
772 break;
773 case (2):
774 int2store(to_mysql, length);
775 break;
776 case (3):
777 int3store(to_mysql, length);
778 break;
779 case (4):
780 int4store(to_mysql, length);
781 break;
782 default:
783 assert_unreachable();
784 }
785 //
786 // copy the binary data
787 //
788 blob_pos = from_tokudb + length_bytes_in_tokudb;
789 memcpy(to_mysql + length_bytes_in_mysql, &blob_pos, sizeof(uchar *));
790 return from_tokudb + length_bytes_in_tokudb+ length;
791 }
792
793
794 //
795 // partially copied from below
796 //
pack_toku_varstring_from_desc(uchar * to_tokudb,const uchar * from_desc,uint32_t key_part_length,uint32_t field_length,uint32_t charset_num)797 static uchar* pack_toku_varstring_from_desc(
798 uchar* to_tokudb,
799 const uchar* from_desc,
800 uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
801 uint32_t field_length,
802 uint32_t charset_num//length of field
803 )
804 {
805 CHARSET_INFO* charset = NULL;
806 uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
807 uint32_t length = field_length;
808 uint32_t local_char_length = 0;
809 set_if_smaller(length, key_part_length);
810
811 charset = get_charset_from_num(charset_num);
812
813 //
814 // copy the string
815 //
816 local_char_length= ((charset->mbmaxlen > 1) ?
817 key_part_length/charset->mbmaxlen : key_part_length);
818 if (length > local_char_length)
819 {
820 local_char_length= charset->charpos(
821 from_desc,
822 from_desc+length,
823 local_char_length
824 );
825 set_if_smaller(length, local_char_length);
826 }
827
828
829 //
830 // copy the length bytes, assuming both are in little endian
831 //
832 to_tokudb[0] = (uchar)length & 255;
833 if (length_bytes_in_tokudb > 1) {
834 to_tokudb[1] = (uchar) (length >> 8);
835 }
836 //
837 // copy the string
838 //
839 memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
840 return to_tokudb + length + length_bytes_in_tokudb;
841 }
842
pack_toku_varstring(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes,const CHARSET_INFO * charset)843 static inline uchar* pack_toku_varstring(
844 uchar* to_tokudb,
845 uchar* from_mysql,
846 uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
847 uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
848 uint32_t max_num_bytes,
849 #if MYSQL_VERSION_ID >= 50600
850 const CHARSET_INFO *charset
851 #else
852 CHARSET_INFO* charset
853 #endif
854 )
855 {
856 uint32_t length = 0;
857 uint32_t local_char_length = 0;
858
859 switch (length_bytes_in_mysql) {
860 case (0):
861 length = max_num_bytes;
862 break;
863 case (1):
864 length = (uint32_t)(*from_mysql);
865 break;
866 case (2):
867 length = uint2korr(from_mysql);
868 break;
869 case (3):
870 length = tokudb_uint3korr(from_mysql);
871 break;
872 case (4):
873 length = uint4korr(from_mysql);
874 break;
875 }
876 set_if_smaller(length,max_num_bytes);
877
878 local_char_length= ((charset->mbmaxlen > 1) ?
879 max_num_bytes/charset->mbmaxlen : max_num_bytes);
880 if (length > local_char_length)
881 {
882 local_char_length= charset->charpos(
883 from_mysql+length_bytes_in_mysql,
884 from_mysql+length_bytes_in_mysql+length,
885 local_char_length
886 );
887 set_if_smaller(length, local_char_length);
888 }
889
890
891 //
892 // copy the length bytes, assuming both are in little endian
893 //
894 to_tokudb[0] = (uchar)length & 255;
895 if (length_bytes_in_tokudb > 1) {
896 to_tokudb[1] = (uchar) (length >> 8);
897 }
898 //
899 // copy the string
900 //
901 memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
902 return to_tokudb + length + length_bytes_in_tokudb;
903 }
904
cmp_toku_string(uchar * a_buf,uint32_t a_num_bytes,uchar * b_buf,uint32_t b_num_bytes,uint32_t charset_number)905 static inline int cmp_toku_string(
906 uchar* a_buf,
907 uint32_t a_num_bytes,
908 uchar* b_buf,
909 uint32_t b_num_bytes,
910 uint32_t charset_number
911 )
912 {
913 int ret_val = 0;
914 CHARSET_INFO* charset = NULL;
915
916 charset = get_charset_from_num(charset_number);
917
918 ret_val = charset->strnncollsp(
919 a_buf,
920 a_num_bytes,
921 b_buf,
922 b_num_bytes
923 );
924 return ret_val;
925 }
926
cmp_toku_varstring(uchar * a_buf,uchar * b_buf,uint32_t length_bytes,uint32_t charset_num,uint32_t * a_bytes_read,uint32_t * b_bytes_read)927 static inline int cmp_toku_varstring(
928 uchar* a_buf,
929 uchar* b_buf,
930 uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
931 uint32_t charset_num,
932 uint32_t* a_bytes_read,
933 uint32_t* b_bytes_read
934 )
935 {
936 int ret_val = 0;
937 uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
938 uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
939 ret_val = cmp_toku_string(
940 a_buf + length_bytes,
941 a_len,
942 b_buf + length_bytes,
943 b_len,
944 charset_num
945 );
946 *a_bytes_read = a_len + length_bytes;
947 *b_bytes_read = b_len + length_bytes;
948 return ret_val;
949 }
950
tokudb_compare_two_hidden_keys(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size)951 static inline int tokudb_compare_two_hidden_keys(
952 const void* new_key_data,
953 const uint32_t new_key_size,
954 const void* saved_key_data,
955 const uint32_t saved_key_size
956 ) {
957 assert_always(new_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
958 assert_always(saved_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
959 ulonglong a = hpk_char_to_num((uchar *) new_key_data);
960 ulonglong b = hpk_char_to_num((uchar *) saved_key_data);
961 return a < b ? -1 : (a > b ? 1 : 0);
962 }
963
964 //
965 // Returns number of bytes used for a given TOKU_TYPE
966 // in a key descriptor. The number of bytes returned
967 // here MUST match the number of bytes used for the encoding
968 // in create_toku_key_descriptor_for_key
969 // Parameters:
970 // [in] row_desc - buffer that contains portion of descriptor
971 // created in create_toku_key_descriptor_for_key. The first
972 // byte points to the TOKU_TYPE.
973 //
skip_field_in_descriptor(uchar * row_desc)974 static uint32_t skip_field_in_descriptor(uchar* row_desc) {
975 uchar* row_desc_pos = row_desc;
976 TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
977 row_desc_pos++;
978
979 switch (toku_type) {
980 case (toku_type_hpk):
981 case (toku_type_double):
982 case (toku_type_float):
983 break;
984 case (toku_type_int):
985 row_desc_pos += 2;
986 break;
987 case (toku_type_fixbinary):
988 case (toku_type_varbinary):
989 row_desc_pos++;
990 break;
991 case (toku_type_fixstring):
992 case (toku_type_varstring):
993 case (toku_type_blob):
994 row_desc_pos++;
995 row_desc_pos += sizeof(uint32_t);
996 break;
997 default:
998 assert_unreachable();
999 }
1000 return (uint32_t)(row_desc_pos - row_desc);
1001 }
1002
1003 //
1004 // outputs a descriptor for key into buf. Returns number of bytes used in buf
1005 // to store the descriptor. Number of bytes used MUST match number of bytes
1006 // we would skip in skip_field_in_descriptor
1007 //
create_toku_key_descriptor_for_key(KEY * key,uchar * buf)1008 static int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
1009 uchar* pos = buf;
1010 uint32_t num_bytes_in_field = 0;
1011 uint32_t charset_num = 0;
1012 for (uint i = 0; i < key->user_defined_key_parts; i++) {
1013 Field* field = key->key_part[i].field;
1014 //
1015 // The first byte states if there is a null byte
1016 // 0 means no null byte, non-zer means there
1017 // is one
1018 //
1019 *pos = field->null_bit;
1020 pos++;
1021
1022 //
1023 // The second byte for each field is the type
1024 //
1025 TOKU_TYPE type = mysql_to_toku_type(field);
1026 assert_always((int)type < 256);
1027 *pos = (uchar)(type & 255);
1028 pos++;
1029
1030 //
1031 // based on the type, extra data follows afterwards
1032 //
1033 switch (type) {
1034 //
1035 // two bytes follow for ints, first one states how many
1036 // bytes the int is (1 , 2, 3, 4 or 8)
1037 // next one states if it is signed or not
1038 //
1039 case (toku_type_int):
1040 num_bytes_in_field = field->pack_length();
1041 assert_always (num_bytes_in_field < 256);
1042 *pos = (uchar)(num_bytes_in_field & 255);
1043 pos++;
1044 *pos = (field->flags & UNSIGNED_FLAG) ? 1 : 0;
1045 pos++;
1046 break;
1047 //
1048 // nothing follows floats and doubles
1049 //
1050 case (toku_type_double):
1051 case (toku_type_float):
1052 break;
1053 //
1054 // one byte follow stating the length of the field
1055 //
1056 case (toku_type_fixbinary):
1057 num_bytes_in_field = field->pack_length();
1058 set_if_smaller(num_bytes_in_field, key->key_part[i].length);
1059 assert_always(num_bytes_in_field < 256);
1060 pos[0] = (uchar)(num_bytes_in_field & 255);
1061 pos++;
1062 break;
1063 //
1064 // one byte follows: the number of bytes used to encode the length
1065 //
1066 case (toku_type_varbinary):
1067 *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1068 pos++;
1069 break;
1070 //
1071 // five bytes follow: one for the number of bytes to encode the length,
1072 // four for the charset number
1073 //
1074 case (toku_type_fixstring):
1075 case (toku_type_varstring):
1076 case (toku_type_blob):
1077 *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1078 pos++;
1079 charset_num = field->charset()->number;
1080 pos[0] = (uchar)(charset_num & 255);
1081 pos[1] = (uchar)((charset_num >> 8) & 255);
1082 pos[2] = (uchar)((charset_num >> 16) & 255);
1083 pos[3] = (uchar)((charset_num >> 24) & 255);
1084 pos += 4;
1085 break;
1086 default:
1087 assert_unreachable();
1088 }
1089 }
1090 return pos - buf;
1091 }
1092
1093
1094 //
1095 // Creates a descriptor for a DB. That contains all information necessary
1096 // to do both key comparisons and data comparisons (for dup-sort databases).
1097 //
1098 // There are two types of descriptors we care about:
1099 // 1) Primary key, (in a no-dup database)
1100 // 2) secondary keys, which are a secondary key followed by a primary key,
1101 // but in a no-dup database.
1102 //
1103 // I realize this may be confusing, but here is how it works.
1104 // All DB's have a key compare.
1105 // The format of the descriptor must be able to handle both.
1106 //
1107 // The first four bytes store an offset into the descriptor to the second piece
1108 // used for data comparisons. So, if in the future we want to append something
1109 // to the descriptor, we can.
1110 //
1111 //
create_toku_key_descriptor(uchar * buf,bool is_first_hpk,KEY * first_key,bool is_second_hpk,KEY * second_key)1112 static int create_toku_key_descriptor(
1113 uchar* buf,
1114 bool is_first_hpk,
1115 KEY* first_key,
1116 bool is_second_hpk,
1117 KEY* second_key
1118 )
1119 {
1120 //
1121 // The first four bytes always contain the offset of where the first key
1122 // ends.
1123 //
1124 uchar* pos = buf + 4;
1125 uint32_t num_bytes = 0;
1126 uint32_t offset = 0;
1127
1128
1129 if (is_first_hpk) {
1130 pos[0] = 0; //say there is NO infinity byte
1131 pos[1] = 0; //field cannot be NULL, stating it
1132 pos[2] = toku_type_hpk;
1133 pos += 3;
1134 }
1135 else {
1136 //
1137 // first key is NOT a hidden primary key, so we now pack first_key
1138 //
1139 pos[0] = 1; //say there is an infinity byte
1140 pos++;
1141 num_bytes = create_toku_key_descriptor_for_key(first_key, pos);
1142 pos += num_bytes;
1143 }
1144
1145 //
1146 // if we do not have a second key, we can jump to exit right now
1147 // we do not have a second key if it is not a hidden primary key
1148 // and if second_key is NULL
1149 //
1150 if (is_first_hpk || (!is_second_hpk && (second_key == NULL)) ) {
1151 goto exit;
1152 }
1153
1154 //
1155 // if we have a second key, and it is an hpk, we need to pack it, and
1156 // write in the offset to this position in the first four bytes
1157 //
1158 if (is_second_hpk) {
1159 pos[0] = 0; //field cannot be NULL, stating it
1160 pos[1] = toku_type_hpk;
1161 pos += 2;
1162 }
1163 else {
1164 //
1165 // second key is NOT a hidden primary key, so we now pack second_key
1166 //
1167 num_bytes = create_toku_key_descriptor_for_key(second_key, pos);
1168 pos += num_bytes;
1169 }
1170
1171
1172 exit:
1173 offset = pos - buf;
1174 buf[0] = (uchar)(offset & 255);
1175 buf[1] = (uchar)((offset >> 8) & 255);
1176 buf[2] = (uchar)((offset >> 16) & 255);
1177 buf[3] = (uchar)((offset >> 24) & 255);
1178
1179 return pos - buf;
1180 }
1181
1182
compare_toku_field(uchar * a_buf,uchar * b_buf,uchar * row_desc,uint32_t * a_bytes_read,uint32_t * b_bytes_read,uint32_t * row_desc_bytes_read,bool * read_string)1183 static inline int compare_toku_field(
1184 uchar* a_buf,
1185 uchar* b_buf,
1186 uchar* row_desc,
1187 uint32_t* a_bytes_read,
1188 uint32_t* b_bytes_read,
1189 uint32_t* row_desc_bytes_read,
1190 bool* read_string
1191 )
1192 {
1193 int ret_val = 0;
1194 uchar* row_desc_pos = row_desc;
1195 uint32_t num_bytes = 0;
1196 uint32_t length_bytes = 0;
1197 uint32_t charset_num = 0;
1198 bool is_unsigned = false;
1199
1200 TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
1201 row_desc_pos++;
1202
1203 switch (toku_type) {
1204 case (toku_type_hpk):
1205 ret_val = tokudb_compare_two_hidden_keys(
1206 a_buf,
1207 TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH,
1208 b_buf,
1209 TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH
1210 );
1211 *a_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1212 *b_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1213 break;
1214 case (toku_type_int):
1215 num_bytes = row_desc_pos[0];
1216 is_unsigned = row_desc_pos[1];
1217 ret_val = cmp_toku_int(
1218 a_buf,
1219 b_buf,
1220 is_unsigned,
1221 num_bytes
1222 );
1223 *a_bytes_read = num_bytes;
1224 *b_bytes_read = num_bytes;
1225 row_desc_pos += 2;
1226 break;
1227 case (toku_type_double):
1228 ret_val = cmp_toku_double(a_buf, b_buf);
1229 *a_bytes_read = sizeof(double);
1230 *b_bytes_read = sizeof(double);
1231 break;
1232 case (toku_type_float):
1233 ret_val = cmp_toku_float(a_buf, b_buf);
1234 *a_bytes_read = sizeof(float);
1235 *b_bytes_read = sizeof(float);
1236 break;
1237 case (toku_type_fixbinary):
1238 num_bytes = row_desc_pos[0];
1239 ret_val = cmp_toku_binary(a_buf, num_bytes, b_buf,num_bytes);
1240 *a_bytes_read = num_bytes;
1241 *b_bytes_read = num_bytes;
1242 row_desc_pos++;
1243 break;
1244 case (toku_type_varbinary):
1245 length_bytes = row_desc_pos[0];
1246 ret_val = cmp_toku_varbinary(
1247 a_buf,
1248 b_buf,
1249 length_bytes,
1250 a_bytes_read,
1251 b_bytes_read
1252 );
1253 row_desc_pos++;
1254 break;
1255 case (toku_type_fixstring):
1256 case (toku_type_varstring):
1257 case (toku_type_blob):
1258 length_bytes = row_desc_pos[0];
1259 row_desc_pos++;
1260 //
1261 // not sure we want to read charset_num like this
1262 //
1263 charset_num = *(uint32_t *)row_desc_pos;
1264 row_desc_pos += sizeof(uint32_t);
1265 ret_val = cmp_toku_varstring(
1266 a_buf,
1267 b_buf,
1268 length_bytes,
1269 charset_num,
1270 a_bytes_read,
1271 b_bytes_read
1272 );
1273 *read_string = true;
1274 break;
1275 default:
1276 assert_unreachable();
1277 }
1278
1279 *row_desc_bytes_read = row_desc_pos - row_desc;
1280 return ret_val;
1281 }
1282
1283 //
1284 // packs a field from a MySQL buffer into a tokudb buffer.
1285 // Used for inserts/updates
1286 //
pack_toku_key_field(uchar * to_tokudb,uchar * from_mysql,Field * field,uint32_t key_part_length)1287 static uchar* pack_toku_key_field(
1288 uchar* to_tokudb,
1289 uchar* from_mysql,
1290 Field* field,
1291 uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1292 )
1293 {
1294 uchar* new_pos = NULL;
1295 uint32_t num_bytes = 0;
1296 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1297 switch(toku_type) {
1298 case (toku_type_int):
1299 assert_always(key_part_length == field->pack_length());
1300 new_pos = pack_toku_int(
1301 to_tokudb,
1302 from_mysql,
1303 field->pack_length()
1304 );
1305 goto exit;
1306 case (toku_type_double):
1307 assert_always(field->pack_length() == sizeof(double));
1308 assert_always(key_part_length == sizeof(double));
1309 new_pos = pack_toku_double(to_tokudb, from_mysql);
1310 goto exit;
1311 case (toku_type_float):
1312 assert_always(field->pack_length() == sizeof(float));
1313 assert_always(key_part_length == sizeof(float));
1314 new_pos = pack_toku_float(to_tokudb, from_mysql);
1315 goto exit;
1316 case (toku_type_fixbinary):
1317 num_bytes = field->pack_length();
1318 set_if_smaller(num_bytes, key_part_length);
1319 new_pos = pack_toku_binary(
1320 to_tokudb,
1321 from_mysql,
1322 num_bytes
1323 );
1324 goto exit;
1325 case (toku_type_fixstring):
1326 num_bytes = field->pack_length();
1327 set_if_smaller(num_bytes, key_part_length);
1328 new_pos = pack_toku_varstring(
1329 to_tokudb,
1330 from_mysql,
1331 get_length_bytes_from_max(key_part_length),
1332 0,
1333 num_bytes,
1334 field->charset()
1335 );
1336 goto exit;
1337 case (toku_type_varbinary):
1338 new_pos = pack_toku_varbinary(
1339 to_tokudb,
1340 from_mysql,
1341 ((Field_varstring *)field)->length_bytes,
1342 key_part_length
1343 );
1344 goto exit;
1345 case (toku_type_varstring):
1346 new_pos = pack_toku_varstring(
1347 to_tokudb,
1348 from_mysql,
1349 get_length_bytes_from_max(key_part_length),
1350 ((Field_varstring *)field)->length_bytes,
1351 key_part_length,
1352 field->charset()
1353 );
1354 goto exit;
1355 case (toku_type_blob):
1356 new_pos = pack_toku_blob(
1357 to_tokudb,
1358 from_mysql,
1359 get_length_bytes_from_max(key_part_length),
1360 ((Field_blob *)field)->row_pack_length(), //only calling this because packlength is returned
1361 key_part_length,
1362 field->charset()
1363 );
1364 goto exit;
1365 default:
1366 assert_unreachable();
1367 }
1368 assert_unreachable();
1369 exit:
1370 return new_pos;
1371 }
1372
1373 //
1374 // packs a field from a MySQL buffer into a tokudb buffer.
1375 // Used for queries. The only difference between this function
1376 // and pack_toku_key_field is that all variable sized columns
1377 // use 2 bytes to encode the length, regardless of the field
1378 // So varchar(4) will still use 2 bytes to encode the field
1379 //
pack_key_toku_key_field(uchar * to_tokudb,uchar * from_mysql,Field * field,uint32_t key_part_length)1380 static uchar* pack_key_toku_key_field(
1381 uchar* to_tokudb,
1382 uchar* from_mysql,
1383 Field* field,
1384 uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1385 )
1386 {
1387 uchar* new_pos = NULL;
1388 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1389 switch(toku_type) {
1390 case (toku_type_int):
1391 case (toku_type_double):
1392 case (toku_type_float):
1393 case (toku_type_fixbinary):
1394 case (toku_type_fixstring):
1395 new_pos = pack_toku_key_field(to_tokudb, from_mysql, field, key_part_length);
1396 goto exit;
1397 case (toku_type_varbinary):
1398 new_pos = pack_toku_varbinary(
1399 to_tokudb,
1400 from_mysql,
1401 2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1402 key_part_length
1403 );
1404 goto exit;
1405 case (toku_type_varstring):
1406 case (toku_type_blob):
1407 new_pos = pack_toku_varstring(
1408 to_tokudb,
1409 from_mysql,
1410 get_length_bytes_from_max(key_part_length),
1411 2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1412 key_part_length,
1413 field->charset()
1414 );
1415 goto exit;
1416 default:
1417 assert_unreachable();
1418 }
1419
1420 assert_unreachable();
1421 exit:
1422 return new_pos;
1423 }
1424
1425
unpack_toku_key_field(uchar * to_mysql,uchar * from_tokudb,Field * field,uint32_t key_part_length)1426 uchar* unpack_toku_key_field(
1427 uchar* to_mysql,
1428 uchar* from_tokudb,
1429 Field* field,
1430 uint32_t key_part_length) {
1431
1432 uchar* new_pos = NULL;
1433 uint32_t num_bytes = 0;
1434 uint32_t num_bytes_copied;
1435 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1436 switch(toku_type) {
1437 case (toku_type_int):
1438 assert_always(key_part_length == field->pack_length());
1439 new_pos = unpack_toku_int(
1440 to_mysql,
1441 from_tokudb,
1442 field->pack_length()
1443 );
1444 goto exit;
1445 case (toku_type_double):
1446 assert_always(field->pack_length() == sizeof(double));
1447 assert_always(key_part_length == sizeof(double));
1448 new_pos = unpack_toku_double(to_mysql, from_tokudb);
1449 goto exit;
1450 case (toku_type_float):
1451 assert_always(field->pack_length() == sizeof(float));
1452 assert_always(key_part_length == sizeof(float));
1453 new_pos = unpack_toku_float(to_mysql, from_tokudb);
1454 goto exit;
1455 case (toku_type_fixbinary):
1456 num_bytes = field->pack_length();
1457 set_if_smaller(num_bytes, key_part_length);
1458 new_pos = unpack_toku_binary(
1459 to_mysql,
1460 from_tokudb,
1461 num_bytes);
1462 goto exit;
1463 case (toku_type_fixstring):
1464 num_bytes = field->pack_length();
1465 new_pos = unpack_toku_varbinary(
1466 to_mysql,
1467 from_tokudb,
1468 get_length_bytes_from_max(key_part_length),
1469 0);
1470 num_bytes_copied =
1471 new_pos -
1472 (from_tokudb + get_length_bytes_from_max(key_part_length));
1473 assert_always(num_bytes_copied <= num_bytes);
1474 memset(
1475 to_mysql + num_bytes_copied,
1476 field->charset()->pad_char,
1477 num_bytes - num_bytes_copied);
1478 goto exit;
1479 case (toku_type_varbinary):
1480 case (toku_type_varstring):
1481 new_pos = unpack_toku_varbinary(
1482 to_mysql,
1483 from_tokudb,
1484 get_length_bytes_from_max(key_part_length),
1485 ((Field_varstring*)field)->length_bytes);
1486 goto exit;
1487 case (toku_type_blob):
1488 new_pos = unpack_toku_blob(
1489 to_mysql,
1490 from_tokudb,
1491 get_length_bytes_from_max(key_part_length),
1492 //only calling this because packlength is returned
1493 ((Field_blob *)field)->row_pack_length());
1494 goto exit;
1495 default:
1496 assert_unreachable();
1497 }
1498 assert_unreachable();
1499 exit:
1500 return new_pos;
1501 }
1502
1503
tokudb_compare_two_keys(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size,const void * row_desc,const uint32_t row_desc_size,bool cmp_prefix,bool * read_string)1504 static int tokudb_compare_two_keys(
1505 const void* new_key_data,
1506 const uint32_t new_key_size,
1507 const void* saved_key_data,
1508 const uint32_t saved_key_size,
1509 const void* row_desc,
1510 const uint32_t row_desc_size,
1511 bool cmp_prefix,
1512 bool* read_string) {
1513
1514 int ret_val = 0;
1515 int8_t new_key_inf_val = COL_NEG_INF;
1516 int8_t saved_key_inf_val = COL_NEG_INF;
1517
1518 uchar* row_desc_ptr = (uchar *)row_desc;
1519 uchar *new_key_ptr = (uchar *)new_key_data;
1520 uchar *saved_key_ptr = (uchar *)saved_key_data;
1521
1522 uint32_t new_key_bytes_left = new_key_size;
1523 uint32_t saved_key_bytes_left = saved_key_size;
1524
1525 //
1526 // if the keys have an infinity byte, set it
1527 //
1528 if (row_desc_ptr[0]) {
1529 new_key_inf_val = (int8_t)new_key_ptr[0];
1530 saved_key_inf_val = (int8_t)saved_key_ptr[0];
1531 new_key_ptr++;
1532 saved_key_ptr++;
1533 }
1534 row_desc_ptr++;
1535
1536 while ((uint32_t)(new_key_ptr - (uchar*)new_key_data) < new_key_size &&
1537 (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) < saved_key_size &&
1538 (uint32_t)(row_desc_ptr - (uchar*)row_desc) < row_desc_size) {
1539 uint32_t new_key_field_length;
1540 uint32_t saved_key_field_length;
1541 uint32_t row_desc_field_length;
1542 //
1543 // if there is a null byte at this point in the key
1544 //
1545 if (row_desc_ptr[0]) {
1546 //
1547 // compare null bytes. If different, return
1548 //
1549 if (new_key_ptr[0] != saved_key_ptr[0]) {
1550 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1551 goto exit;
1552 }
1553 saved_key_ptr++;
1554 //
1555 // in case we just read the fact that new_key_ptr and saved_key_ptr
1556 // have NULL as their next field
1557 //
1558 if (!*new_key_ptr++) {
1559 //
1560 // skip row_desc_ptr[0] read in if clause
1561 //
1562 row_desc_ptr++;
1563 //
1564 // skip data that describes rest of field
1565 //
1566 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1567 continue;
1568 }
1569 }
1570 row_desc_ptr++;
1571
1572 ret_val = compare_toku_field(
1573 new_key_ptr,
1574 saved_key_ptr,
1575 row_desc_ptr,
1576 &new_key_field_length,
1577 &saved_key_field_length,
1578 &row_desc_field_length,
1579 read_string);
1580 new_key_ptr += new_key_field_length;
1581 saved_key_ptr += saved_key_field_length;
1582 row_desc_ptr += row_desc_field_length;
1583 if (ret_val) {
1584 goto exit;
1585 }
1586
1587 assert_always(
1588 (uint32_t)(new_key_ptr - (uchar*)new_key_data) <= new_key_size);
1589 assert_always(
1590 (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) <= saved_key_size);
1591 assert_always(
1592 (uint32_t)(row_desc_ptr - (uchar*)row_desc) <= row_desc_size);
1593 }
1594 new_key_bytes_left =
1595 new_key_size - ((uint32_t)(new_key_ptr - (uchar*)new_key_data));
1596 saved_key_bytes_left =
1597 saved_key_size - ((uint32_t)(saved_key_ptr - (uchar*)saved_key_data));
1598 if (cmp_prefix) {
1599 ret_val = 0;
1600 } else if (new_key_bytes_left== 0 && saved_key_bytes_left== 0) {
1601 // in this case, read both keys to completion, now read infinity byte
1602 ret_val = new_key_inf_val - saved_key_inf_val;
1603 } else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) {
1604 // at this point, one SHOULD be 0
1605 ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1;
1606 } else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) {
1607 ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1;
1608 } else {
1609 // this should never happen, perhaps we should assert(false)
1610 assert_unreachable();
1611 ret_val = new_key_bytes_left - saved_key_bytes_left;
1612 }
1613 exit:
1614 return ret_val;
1615 }
1616
simple_memcmp(const DBT * keya,const DBT * keyb)1617 static int simple_memcmp(const DBT *keya, const DBT *keyb) {
1618 int cmp;
1619 int num_bytes_cmp = keya->size < keyb->size ?
1620 keya->size : keyb->size;
1621 cmp = memcmp(keya->data,keyb->data,num_bytes_cmp);
1622 if (cmp == 0 && (keya->size != keyb->size)) {
1623 cmp = keya->size < keyb->size ? -1 : 1;
1624 }
1625 return cmp;
1626 }
1627
1628 // comparison function to be used by the fractal trees.
tokudb_cmp_dbt_key(DB * file,const DBT * keya,const DBT * keyb)1629 static int tokudb_cmp_dbt_key(DB* file, const DBT *keya, const DBT *keyb) {
1630 int cmp;
1631 if (file->cmp_descriptor->dbt.size == 0) {
1632 cmp = simple_memcmp(keya, keyb);
1633 }
1634 else {
1635 bool read_string = false;
1636 cmp = tokudb_compare_two_keys(
1637 keya->data,
1638 keya->size,
1639 keyb->data,
1640 keyb->size,
1641 (uchar *)file->cmp_descriptor->dbt.data + 4,
1642 (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1643 false,
1644 &read_string
1645 );
1646 // comparison above may be case-insensitive, but fractal tree
1647 // needs to distinguish between different data, so we do this
1648 // additional check here
1649 if (read_string && (cmp == 0)) {
1650 cmp = simple_memcmp(keya, keyb);
1651 }
1652 }
1653 return cmp;
1654 }
1655
1656 //TODO: QQQ Only do one direction for prefix.
tokudb_prefix_cmp_dbt_key(DB * file,const DBT * keya,const DBT * keyb)1657 static int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb) {
1658 // calls to this function are done by the handlerton, and are
1659 // comparing just the keys as MySQL would compare them.
1660 bool read_string = false;
1661 int cmp = tokudb_compare_two_keys(
1662 keya->data,
1663 keya->size,
1664 keyb->data,
1665 keyb->size,
1666 (uchar *)file->cmp_descriptor->dbt.data + 4,
1667 *(uint32_t *)file->cmp_descriptor->dbt.data - 4,
1668 true,
1669 &read_string
1670 );
1671 return cmp;
1672 }
1673
tokudb_compare_two_key_parts(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size,const void * row_desc,const uint32_t row_desc_size,uint max_parts)1674 static int tokudb_compare_two_key_parts(
1675 const void* new_key_data,
1676 const uint32_t new_key_size,
1677 const void* saved_key_data,
1678 const uint32_t saved_key_size,
1679 const void* row_desc,
1680 const uint32_t row_desc_size,
1681 uint max_parts
1682 )
1683 {
1684 int ret_val = 0;
1685
1686 uchar* row_desc_ptr = (uchar *)row_desc;
1687 uchar *new_key_ptr = (uchar *)new_key_data;
1688 uchar *saved_key_ptr = (uchar *)saved_key_data;
1689
1690 //
1691 // if the keys have an infinity byte, set it
1692 //
1693 if (row_desc_ptr[0]) {
1694 // new_key_inf_val = (int8_t)new_key_ptr[0];
1695 // saved_key_inf_val = (int8_t)saved_key_ptr[0];
1696 new_key_ptr++;
1697 saved_key_ptr++;
1698 }
1699 row_desc_ptr++;
1700
1701 for (uint i = 0; i < max_parts; i++) {
1702 if (!((uint32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size &&
1703 (uint32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size &&
1704 (uint32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size))
1705 break;
1706 uint32_t new_key_field_length;
1707 uint32_t saved_key_field_length;
1708 uint32_t row_desc_field_length;
1709 //
1710 // if there is a null byte at this point in the key
1711 //
1712 if (row_desc_ptr[0]) {
1713 //
1714 // compare null bytes. If different, return
1715 //
1716 if (new_key_ptr[0] != saved_key_ptr[0]) {
1717 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1718 goto exit;
1719 }
1720 saved_key_ptr++;
1721 //
1722 // in case we just read the fact that new_key_ptr and saved_key_ptr
1723 // have NULL as their next field
1724 //
1725 if (!*new_key_ptr++) {
1726 //
1727 // skip row_desc_ptr[0] read in if clause
1728 //
1729 row_desc_ptr++;
1730 //
1731 // skip data that describes rest of field
1732 //
1733 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1734 continue;
1735 }
1736 }
1737 row_desc_ptr++;
1738 bool read_string = false;
1739 ret_val = compare_toku_field(
1740 new_key_ptr,
1741 saved_key_ptr,
1742 row_desc_ptr,
1743 &new_key_field_length,
1744 &saved_key_field_length,
1745 &row_desc_field_length,
1746 &read_string
1747 );
1748 new_key_ptr += new_key_field_length;
1749 saved_key_ptr += saved_key_field_length;
1750 row_desc_ptr += row_desc_field_length;
1751 if (ret_val) {
1752 goto exit;
1753 }
1754
1755 assert_always((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
1756 assert_always((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
1757 assert_always((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
1758 }
1759
1760 ret_val = 0;
1761 exit:
1762 return ret_val;
1763 }
1764
tokudb_cmp_dbt_key_parts(DB * file,const DBT * keya,const DBT * keyb,uint max_parts)1765 static int tokudb_cmp_dbt_key_parts(DB *file, const DBT *keya, const DBT *keyb, uint max_parts) {
1766 assert_always(file->cmp_descriptor->dbt.size);
1767 return tokudb_compare_two_key_parts(
1768 keya->data,
1769 keya->size,
1770 keyb->data,
1771 keyb->size,
1772 (uchar *)file->cmp_descriptor->dbt.data + 4,
1773 (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1774 max_parts);
1775 }
1776
create_toku_main_key_pack_descriptor(uchar * buf)1777 static uint32_t create_toku_main_key_pack_descriptor (
1778 uchar* buf
1779 )
1780 {
1781 //
1782 // The first four bytes always contain the offset of where the first key
1783 // ends.
1784 //
1785 uchar* pos = buf + 4;
1786 uint32_t offset = 0;
1787 //
1788 // one byte states if this is the main dictionary
1789 //
1790 pos[0] = 1;
1791 pos++;
1792 goto exit;
1793
1794
1795 exit:
1796 offset = pos - buf;
1797 buf[0] = (uchar)(offset & 255);
1798 buf[1] = (uchar)((offset >> 8) & 255);
1799 buf[2] = (uchar)((offset >> 16) & 255);
1800 buf[3] = (uchar)((offset >> 24) & 255);
1801
1802 return pos - buf;
1803 }
1804
1805 #define COL_HAS_NO_CHARSET 0x44
1806 #define COL_HAS_CHARSET 0x55
1807
1808 #define COL_FIX_PK_OFFSET 0x66
1809 #define COL_VAR_PK_OFFSET 0x77
1810
1811 #define CK_FIX_RANGE 0x88
1812 #define CK_VAR_RANGE 0x99
1813
1814 #define COPY_OFFSET_TO_BUF memcpy ( \
1815 pos, \
1816 &kc_info->cp_info[pk_index][field_index].col_pack_val, \
1817 sizeof(uint32_t) \
1818 ); \
1819 pos += sizeof(uint32_t);
1820
1821
pack_desc_pk_info(uchar * buf,KEY_AND_COL_INFO * kc_info,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1822 static uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1823 uchar* pos = buf;
1824 uint16 field_index = key_part->field->field_index;
1825 Field* field = table_share->field[field_index];
1826 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1827 uint32_t key_part_length = key_part->length;
1828 uint32_t field_length;
1829 uchar len_bytes = 0;
1830
1831 switch(toku_type) {
1832 case (toku_type_int):
1833 case (toku_type_double):
1834 case (toku_type_float):
1835 pos[0] = COL_FIX_FIELD;
1836 pos++;
1837 assert_always(kc_info->field_lengths[field_index] < 256);
1838 pos[0] = kc_info->field_lengths[field_index];
1839 pos++;
1840 break;
1841 case (toku_type_fixbinary):
1842 pos[0] = COL_FIX_FIELD;
1843 pos++;
1844 field_length = field->pack_length();
1845 set_if_smaller(key_part_length, field_length);
1846 assert_always(key_part_length < 256);
1847 pos[0] = (uchar)key_part_length;
1848 pos++;
1849 break;
1850 case (toku_type_fixstring):
1851 case (toku_type_varbinary):
1852 case (toku_type_varstring):
1853 case (toku_type_blob):
1854 pos[0] = COL_VAR_FIELD;
1855 pos++;
1856 len_bytes = (key_part_length > 255) ? 2 : 1;
1857 pos[0] = len_bytes;
1858 pos++;
1859 break;
1860 default:
1861 assert_unreachable();
1862 }
1863
1864 return pos - buf;
1865 }
1866
pack_desc_pk_offset_info(uchar * buf,KEY_PART_INFO * key_part,KEY * prim_key,uchar * pk_info)1867 static uint32_t pack_desc_pk_offset_info(uchar* buf,
1868 KEY_PART_INFO* key_part,
1869 KEY* prim_key,
1870 uchar* pk_info) {
1871 uchar* pos = buf;
1872 uint16 field_index = key_part->field->field_index;
1873 bool found_col_in_pk = false;
1874 uint32_t index_in_pk;
1875
1876 bool is_constant_offset = true;
1877 uint32_t offset = 0;
1878 for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
1879 KEY_PART_INFO curr = prim_key->key_part[i];
1880 uint16 curr_field_index = curr.field->field_index;
1881
1882 if (pk_info[2*i] == COL_VAR_FIELD) {
1883 is_constant_offset = false;
1884 }
1885
1886 if (curr_field_index == field_index) {
1887 found_col_in_pk = true;
1888 index_in_pk = i;
1889 break;
1890 }
1891 offset += pk_info[2*i + 1];
1892 }
1893 assert_always(found_col_in_pk);
1894 if (is_constant_offset) {
1895 pos[0] = COL_FIX_PK_OFFSET;
1896 pos++;
1897
1898 memcpy (pos, &offset, sizeof(offset));
1899 pos += sizeof(offset);
1900 }
1901 else {
1902 pos[0] = COL_VAR_PK_OFFSET;
1903 pos++;
1904
1905 memcpy(pos, &index_in_pk, sizeof(index_in_pk));
1906 pos += sizeof(index_in_pk);
1907 }
1908 return pos - buf;
1909 }
1910
pack_desc_offset_info(uchar * buf,KEY_AND_COL_INFO * kc_info,uint pk_index,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1911 static uint32_t pack_desc_offset_info(uchar* buf, KEY_AND_COL_INFO* kc_info, uint pk_index, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1912 uchar* pos = buf;
1913 uint16 field_index = key_part->field->field_index;
1914 Field* field = table_share->field[field_index];
1915 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1916 bool found_index = false;
1917
1918 switch(toku_type) {
1919 case (toku_type_int):
1920 case (toku_type_double):
1921 case (toku_type_float):
1922 case (toku_type_fixbinary):
1923 case (toku_type_fixstring):
1924 pos[0] = COL_FIX_FIELD;
1925 pos++;
1926
1927 // copy the offset
1928 COPY_OFFSET_TO_BUF;
1929 break;
1930 case (toku_type_varbinary):
1931 case (toku_type_varstring):
1932 pos[0] = COL_VAR_FIELD;
1933 pos++;
1934
1935 // copy the offset
1936 COPY_OFFSET_TO_BUF;
1937 break;
1938 case (toku_type_blob):
1939 pos[0] = COL_BLOB_FIELD;
1940 pos++;
1941 for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
1942 uint32_t blob_index = kc_info->blob_fields[i];
1943 if (blob_index == field_index) {
1944 uint32_t val = i;
1945 memcpy(pos, &val, sizeof(uint32_t));
1946 pos += sizeof(uint32_t);
1947 found_index = true;
1948 break;
1949 }
1950 }
1951 assert_always(found_index);
1952 break;
1953 default:
1954 assert_unreachable();
1955 }
1956
1957 return pos - buf;
1958 }
1959
pack_desc_key_length_info(uchar * buf,KEY_AND_COL_INFO * kc_info,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1960 static uint32_t pack_desc_key_length_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1961 uchar* pos = buf;
1962 uint16 field_index = key_part->field->field_index;
1963 Field* field = table_share->field[field_index];
1964 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1965 uint32_t key_part_length = key_part->length;
1966 uint32_t field_length;
1967
1968 switch(toku_type) {
1969 case (toku_type_int):
1970 case (toku_type_double):
1971 case (toku_type_float):
1972 // copy the key_part length
1973 field_length = kc_info->field_lengths[field_index];
1974 memcpy(pos, &field_length, sizeof(field_length));
1975 pos += sizeof(key_part_length);
1976 break;
1977 case (toku_type_fixbinary):
1978 case (toku_type_fixstring):
1979 field_length = field->pack_length();
1980 set_if_smaller(key_part_length, field_length);
1981 // fallthrough
1982 case (toku_type_varbinary):
1983 case (toku_type_varstring):
1984 case (toku_type_blob):
1985 // copy the key_part length
1986 memcpy(pos, &key_part_length, sizeof(key_part_length));
1987 pos += sizeof(key_part_length);
1988 break;
1989 default:
1990 assert_unreachable();
1991 }
1992
1993 return pos - buf;
1994 }
1995
pack_desc_char_info(uchar * buf,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1996 static uint32_t pack_desc_char_info(uchar* buf,
1997 TABLE_SHARE* table_share,
1998 KEY_PART_INFO* key_part) {
1999 uchar* pos = buf;
2000 uint16 field_index = key_part->field->field_index;
2001 Field* field = table_share->field[field_index];
2002 TOKU_TYPE toku_type = mysql_to_toku_type(field);
2003 uint32_t charset_num = 0;
2004
2005 switch(toku_type) {
2006 case (toku_type_int):
2007 case (toku_type_double):
2008 case (toku_type_float):
2009 case (toku_type_fixbinary):
2010 case (toku_type_varbinary):
2011 pos[0] = COL_HAS_NO_CHARSET;
2012 pos++;
2013 break;
2014 case (toku_type_fixstring):
2015 case (toku_type_varstring):
2016 case (toku_type_blob):
2017 pos[0] = COL_HAS_CHARSET;
2018 pos++;
2019
2020 // copy the charset
2021 charset_num = field->charset()->number;
2022 pos[0] = (uchar)(charset_num & 255);
2023 pos[1] = (uchar)((charset_num >> 8) & 255);
2024 pos[2] = (uchar)((charset_num >> 16) & 255);
2025 pos[3] = (uchar)((charset_num >> 24) & 255);
2026 pos += 4;
2027 break;
2028 default:
2029 assert_unreachable();
2030 }
2031
2032 return pos - buf;
2033 }
2034
pack_some_row_info(uchar * buf,uint pk_index,TABLE_SHARE * table_share,KEY_AND_COL_INFO * kc_info)2035 static uint32_t pack_some_row_info (
2036 uchar* buf,
2037 uint pk_index,
2038 TABLE_SHARE* table_share,
2039 KEY_AND_COL_INFO* kc_info
2040 )
2041 {
2042 uchar* pos = buf;
2043 uint32_t num_null_bytes = 0;
2044 //
2045 // four bytes stating number of null bytes
2046 //
2047 num_null_bytes = table_share->null_bytes;
2048 memcpy(pos, &num_null_bytes, sizeof(num_null_bytes));
2049 pos += sizeof(num_null_bytes);
2050 //
2051 // eight bytes stating mcp_info
2052 //
2053 memcpy(pos, &kc_info->mcp_info[pk_index], sizeof(MULTI_COL_PACK_INFO));
2054 pos += sizeof(MULTI_COL_PACK_INFO);
2055 //
2056 // one byte for the number of offset bytes
2057 //
2058 pos[0] = (uchar)kc_info->num_offset_bytes;
2059 pos++;
2060
2061 return pos - buf;
2062 }
2063
get_max_clustering_val_pack_desc_size(TABLE_SHARE * table_share)2064 static uint32_t get_max_clustering_val_pack_desc_size(
2065 TABLE_SHARE* table_share
2066 )
2067 {
2068 uint32_t ret_val = 0;
2069 //
2070 // the fixed stuff:
2071 // first the things in pack_some_row_info
2072 // second another mcp_info
2073 // third a byte that states if blobs exist
2074 ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2075 ret_val += sizeof(MULTI_COL_PACK_INFO);
2076 ret_val++;
2077 //
2078 // now the variable stuff
2079 // an upper bound is, for each field, byte stating if it is fixed or var, followed
2080 // by 8 bytes for endpoints
2081 //
2082 ret_val += (table_share->fields)*(1 + 2*sizeof(uint32_t));
2083 //
2084 // four bytes storing the length of this portion
2085 //
2086 ret_val += 4;
2087
2088 return ret_val;
2089 }
2090
create_toku_clustering_val_pack_descriptor(uchar * buf,uint pk_index,TABLE_SHARE * table_share,KEY_AND_COL_INFO * kc_info,uint32_t keynr,bool is_clustering)2091 static uint32_t create_toku_clustering_val_pack_descriptor (
2092 uchar* buf,
2093 uint pk_index,
2094 TABLE_SHARE* table_share,
2095 KEY_AND_COL_INFO* kc_info,
2096 uint32_t keynr,
2097 bool is_clustering
2098 )
2099 {
2100 uchar* pos = buf + 4;
2101 uint32_t offset = 0;
2102 bool start_range_set = false;
2103 uint32_t last_col = 0;
2104 //
2105 // do not need to write anything if the key is not clustering
2106 //
2107 if (!is_clustering) {
2108 goto exit;
2109 }
2110
2111 pos += pack_some_row_info(
2112 pos,
2113 pk_index,
2114 table_share,
2115 kc_info
2116 );
2117
2118 //
2119 // eight bytes stating mcp_info of clustering key
2120 //
2121 memcpy(pos, &kc_info->mcp_info[keynr], sizeof(MULTI_COL_PACK_INFO));
2122 pos += sizeof(MULTI_COL_PACK_INFO);
2123
2124 //
2125 // store bit that states if blobs exist
2126 //
2127 pos[0] = (kc_info->num_blobs) ? 1 : 0;
2128 pos++;
2129
2130 //
2131 // descriptor assumes that all fields filtered from pk are
2132 // also filtered from clustering key val. Doing check here to
2133 // make sure something unexpected does not happen
2134 //
2135 for (uint i = 0; i < table_share->fields; i++) {
2136 bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2137 bool col_filtered_in_pk = bitmap_is_set(&kc_info->key_filters[pk_index],i);
2138 if (col_filtered_in_pk) {
2139 assert_always(col_filtered);
2140 }
2141 }
2142
2143 //
2144 // first handle the fixed fields
2145 //
2146 start_range_set = false;
2147 last_col = 0;
2148 for (uint i = 0; i < table_share->fields; i++) {
2149 bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2150 if (!is_fixed_field(kc_info, i)) {
2151 //
2152 // not a fixed field, continue
2153 //
2154 continue;
2155 }
2156 if (col_filtered && start_range_set) {
2157 //
2158 // need to set the end range
2159 //
2160 start_range_set = false;
2161 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val + kc_info->field_lengths[last_col];
2162 memcpy(pos, &end_offset, sizeof(end_offset));
2163 pos += sizeof(end_offset);
2164 }
2165 else if (!col_filtered) {
2166 if (!start_range_set) {
2167 pos[0] = CK_FIX_RANGE;
2168 pos++;
2169 start_range_set = true;
2170 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2171 memcpy(pos, &start_offset , sizeof(start_offset));
2172 pos += sizeof(start_offset);
2173 }
2174 last_col = i;
2175 }
2176 else {
2177 continue;
2178 }
2179 }
2180 if (start_range_set) {
2181 //
2182 // need to set the end range
2183 //
2184 start_range_set = false;
2185 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val+ kc_info->field_lengths[last_col];
2186 memcpy(pos, &end_offset, sizeof(end_offset));
2187 pos += sizeof(end_offset);
2188 }
2189
2190 //
2191 // now handle the var fields
2192 //
2193 start_range_set = false;
2194 last_col = 0;
2195 for (uint i = 0; i < table_share->fields; i++) {
2196 bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2197 if (!is_variable_field(kc_info, i)) {
2198 //
2199 // not a var field, continue
2200 //
2201 continue;
2202 }
2203 if (col_filtered && start_range_set) {
2204 //
2205 // need to set the end range
2206 //
2207 start_range_set = false;
2208 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2209 memcpy(pos, &end_offset, sizeof(end_offset));
2210 pos += sizeof(end_offset);
2211 }
2212 else if (!col_filtered) {
2213 if (!start_range_set) {
2214 pos[0] = CK_VAR_RANGE;
2215 pos++;
2216
2217 start_range_set = true;
2218 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2219 memcpy(pos, &start_offset , sizeof(start_offset));
2220 pos += sizeof(start_offset);
2221 }
2222 last_col = i;
2223 }
2224 else {
2225 continue;
2226 }
2227 }
2228 if (start_range_set) {
2229 start_range_set = false;
2230 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2231 memcpy(pos, &end_offset, sizeof(end_offset));
2232 pos += sizeof(end_offset);
2233 }
2234
2235 exit:
2236 offset = pos - buf;
2237 buf[0] = (uchar)(offset & 255);
2238 buf[1] = (uchar)((offset >> 8) & 255);
2239 buf[2] = (uchar)((offset >> 16) & 255);
2240 buf[3] = (uchar)((offset >> 24) & 255);
2241
2242 return pos - buf;
2243 }
2244
pack_clustering_val_from_desc(uchar * buf,void * row_desc,uint32_t row_desc_size,const DBT * pk_val)2245 static uint32_t pack_clustering_val_from_desc(
2246 uchar* buf,
2247 void* row_desc,
2248 uint32_t row_desc_size,
2249 const DBT* pk_val
2250 )
2251 {
2252 uchar* null_bytes_src_ptr = NULL;
2253 uchar* fixed_src_ptr = NULL;
2254 uchar* var_src_offset_ptr = NULL;
2255 uchar* var_src_data_ptr = NULL;
2256 uchar* fixed_dest_ptr = NULL;
2257 uchar* var_dest_offset_ptr = NULL;
2258 uchar* var_dest_data_ptr = NULL;
2259 uchar* orig_var_dest_data_ptr = NULL;
2260 uchar* desc_pos = (uchar *)row_desc;
2261 uint32_t num_null_bytes = 0;
2262 uint32_t num_offset_bytes;
2263 MULTI_COL_PACK_INFO src_mcp_info, dest_mcp_info;
2264 uchar has_blobs;
2265
2266 memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2267 desc_pos += sizeof(num_null_bytes);
2268
2269 memcpy(&src_mcp_info, desc_pos, sizeof(src_mcp_info));
2270 desc_pos += sizeof(src_mcp_info);
2271
2272 num_offset_bytes = desc_pos[0];
2273 desc_pos++;
2274
2275 memcpy(&dest_mcp_info, desc_pos, sizeof(dest_mcp_info));
2276 desc_pos += sizeof(dest_mcp_info);
2277
2278 has_blobs = desc_pos[0];
2279 desc_pos++;
2280
2281 //
2282 //set the variables
2283 //
2284 null_bytes_src_ptr = (uchar *)pk_val->data;
2285 fixed_src_ptr = null_bytes_src_ptr + num_null_bytes;
2286 var_src_offset_ptr = fixed_src_ptr + src_mcp_info.fixed_field_size;
2287 var_src_data_ptr = var_src_offset_ptr + src_mcp_info.len_of_offsets;
2288
2289 fixed_dest_ptr = buf + num_null_bytes;
2290 var_dest_offset_ptr = fixed_dest_ptr + dest_mcp_info.fixed_field_size;
2291 var_dest_data_ptr = var_dest_offset_ptr + dest_mcp_info.len_of_offsets;
2292 orig_var_dest_data_ptr = var_dest_data_ptr;
2293
2294 //
2295 // copy the null bytes
2296 //
2297 memcpy(buf, null_bytes_src_ptr, num_null_bytes);
2298 while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2299 uint32_t start, end, length;
2300 uchar curr = desc_pos[0];
2301 desc_pos++;
2302
2303 memcpy(&start, desc_pos, sizeof(start));
2304 desc_pos += sizeof(start);
2305
2306 memcpy(&end, desc_pos, sizeof(end));
2307 desc_pos += sizeof(end);
2308
2309 assert_always (start <= end);
2310
2311 if (curr == CK_FIX_RANGE) {
2312 length = end - start;
2313
2314 memcpy(fixed_dest_ptr, fixed_src_ptr + start, length);
2315 fixed_dest_ptr += length;
2316 }
2317 else if (curr == CK_VAR_RANGE) {
2318 uint32_t start_data_size;
2319 uint32_t start_data_offset;
2320 uint32_t end_data_size;
2321 uint32_t end_data_offset;
2322 uint32_t offset_diffs;
2323
2324 get_var_field_info(
2325 &start_data_size,
2326 &start_data_offset,
2327 start,
2328 var_src_offset_ptr,
2329 num_offset_bytes
2330 );
2331 get_var_field_info(
2332 &end_data_size,
2333 &end_data_offset,
2334 end,
2335 var_src_offset_ptr,
2336 num_offset_bytes
2337 );
2338 length = end_data_offset + end_data_size - start_data_offset;
2339 //
2340 // copy the data
2341 //
2342 memcpy(
2343 var_dest_data_ptr,
2344 var_src_data_ptr + start_data_offset,
2345 length
2346 );
2347 var_dest_data_ptr += length;
2348
2349 //
2350 // put in offset info
2351 //
2352 offset_diffs = (end_data_offset + end_data_size) - (uint32_t)(var_dest_data_ptr - orig_var_dest_data_ptr);
2353 for (uint32_t i = start; i <= end; i++) {
2354 if ( num_offset_bytes == 1 ) {
2355 assert_always(offset_diffs < 256);
2356 var_dest_offset_ptr[0] = var_src_offset_ptr[i] - (uchar)offset_diffs;
2357 var_dest_offset_ptr++;
2358 } else if ( num_offset_bytes == 2 ) {
2359 uint32_t tmp = uint2korr(var_src_offset_ptr + 2*i);
2360 uint32_t new_offset = tmp - offset_diffs;
2361 assert_always(new_offset < 1<<16);
2362 int2store(var_dest_offset_ptr,new_offset);
2363 var_dest_offset_ptr += 2;
2364 } else {
2365 assert_unreachable();
2366 }
2367 }
2368 } else {
2369 assert_unreachable();
2370 }
2371 }
2372 //
2373 // copy blobs
2374 // at this point, var_dest_data_ptr is pointing to the end, where blobs should be located
2375 // so, we put the blobs at var_dest_data_ptr
2376 //
2377 if (has_blobs) {
2378 uint32_t num_blob_bytes;
2379 uint32_t start_offset;
2380 uchar* src_blob_ptr = NULL;
2381 get_blob_field_info(
2382 &start_offset,
2383 src_mcp_info.len_of_offsets,
2384 var_src_data_ptr,
2385 num_offset_bytes
2386 );
2387 src_blob_ptr = var_src_data_ptr + start_offset;
2388 num_blob_bytes = pk_val->size - (start_offset + (var_src_data_ptr - null_bytes_src_ptr));
2389 memcpy(var_dest_data_ptr, src_blob_ptr, num_blob_bytes);
2390 var_dest_data_ptr += num_blob_bytes;
2391 }
2392 return var_dest_data_ptr - buf;
2393 }
2394
2395
get_max_secondary_key_pack_desc_size(KEY_AND_COL_INFO * kc_info)2396 static uint32_t get_max_secondary_key_pack_desc_size(
2397 KEY_AND_COL_INFO* kc_info
2398 )
2399 {
2400 uint32_t ret_val = 0;
2401 //
2402 // the fixed stuff:
2403 // byte that states if main dictionary
2404 // byte that states if hpk
2405 // the things in pack_some_row_info
2406 ret_val++;
2407 ret_val++;
2408 ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2409 //
2410 // now variable sized stuff
2411 //
2412
2413 // first the blobs
2414 ret_val += sizeof(kc_info->num_blobs);
2415 ret_val+= kc_info->num_blobs;
2416
2417 // then the pk
2418 // one byte for num key parts
2419 // two bytes for each key part
2420 ret_val++;
2421 ret_val += MAX_REF_PARTS*2;
2422
2423 // then the key
2424 // null bit, then null byte,
2425 // then 1 byte stating what it is, then 4 for offset, 4 for key length,
2426 // 1 for if charset exists, and 4 for charset
2427 ret_val += MAX_REF_PARTS*(1 + sizeof(uint32_t) + 1 + 3*sizeof(uint32_t) + 1);
2428 //
2429 // four bytes storing the length of this portion
2430 //
2431 ret_val += 4;
2432 return ret_val;
2433 }
2434
create_toku_secondary_key_pack_descriptor(uchar * buf,bool has_hpk,uint pk_index,TABLE_SHARE * table_share,TABLE * table,KEY_AND_COL_INFO * kc_info,KEY * key_info,KEY * prim_key)2435 static uint32_t create_toku_secondary_key_pack_descriptor (
2436 uchar* buf,
2437 bool has_hpk,
2438 uint pk_index,
2439 TABLE_SHARE* table_share,
2440 TABLE* table,
2441 KEY_AND_COL_INFO* kc_info,
2442 KEY* key_info,
2443 KEY* prim_key
2444 )
2445 {
2446 //
2447 // The first four bytes always contain the offset of where the first key
2448 // ends.
2449 //
2450 uchar* pk_info = NULL;
2451 uchar* pos = buf + 4;
2452 uint32_t offset = 0;
2453
2454 //
2455 // first byte states that it is NOT main dictionary
2456 //
2457 pos[0] = 0;
2458 pos++;
2459
2460 //
2461 // one byte states if main dictionary has an hpk or not
2462 //
2463 if (has_hpk) {
2464 pos[0] = 1;
2465 }
2466 else {
2467 pos[0] = 0;
2468 }
2469 pos++;
2470
2471 pos += pack_some_row_info(
2472 pos,
2473 pk_index,
2474 table_share,
2475 kc_info
2476 );
2477
2478 //
2479 // store blob information
2480 //
2481 memcpy(pos, &kc_info->num_blobs, sizeof(kc_info->num_blobs));
2482 pos += sizeof(uint32_t);
2483 for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
2484 //
2485 // store length bytes for each blob
2486 //
2487 Field* field = table_share->field[kc_info->blob_fields[i]];
2488 pos[0] = (uchar)field->row_pack_length();
2489 pos++;
2490 }
2491
2492 //
2493 // store the pk information
2494 //
2495 if (has_hpk) {
2496 pos[0] = 0;
2497 pos++;
2498 }
2499 else {
2500 //
2501 // store number of parts
2502 //
2503 assert_always(prim_key->user_defined_key_parts < 128);
2504 pos[0] = 2 * prim_key->user_defined_key_parts;
2505 pos++;
2506 //
2507 // for each part, store if it is a fixed field or var field
2508 // if fixed, store number of bytes, if var, store
2509 // number of length bytes
2510 // total should be two bytes per key part stored
2511 //
2512 pk_info = pos;
2513 uchar* tmp = pos;
2514 for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
2515 tmp += pack_desc_pk_info(
2516 tmp,
2517 kc_info,
2518 table_share,
2519 &prim_key->key_part[i]
2520 );
2521 }
2522 //
2523 // asserting that we moved forward as much as we think we have
2524 //
2525 assert_always(tmp - pos == (2 * prim_key->user_defined_key_parts));
2526 pos = tmp;
2527 }
2528
2529 for (uint i = 0; i < key_info->user_defined_key_parts; i++) {
2530 KEY_PART_INFO curr_kpi = key_info->key_part[i];
2531 uint16 field_index = curr_kpi.field->field_index;
2532 Field* field = table_share->field[field_index];
2533 bool is_col_in_pk = false;
2534
2535 if (bitmap_is_set(&kc_info->key_filters[pk_index],field_index)) {
2536 assert_always(!has_hpk);
2537 assert_always(prim_key != nullptr);
2538 is_col_in_pk = true;
2539 }
2540 else {
2541 is_col_in_pk = false;
2542 }
2543
2544 pos[0] = field->null_bit;
2545 pos++;
2546
2547 if (is_col_in_pk) {
2548 //
2549 // assert that columns in pk do not have a null bit
2550 // because in MySQL, pk columns cannot be null
2551 //
2552 assert_always(!field->null_bit);
2553 }
2554
2555 if (field->null_bit) {
2556 uint32_t null_offset = get_null_offset(table,table->field[field_index]);
2557 memcpy(pos, &null_offset, sizeof(uint32_t));
2558 pos += sizeof(uint32_t);
2559 }
2560 if (is_col_in_pk) {
2561 pos += pack_desc_pk_offset_info(pos, &curr_kpi, prim_key, pk_info);
2562 }
2563 else {
2564 pos += pack_desc_offset_info(
2565 pos,
2566 kc_info,
2567 pk_index,
2568 table_share,
2569 &curr_kpi
2570 );
2571 }
2572 pos += pack_desc_key_length_info(
2573 pos,
2574 kc_info,
2575 table_share,
2576 &curr_kpi
2577 );
2578 pos += pack_desc_char_info(pos, table_share, &curr_kpi);
2579 }
2580
2581 offset = pos - buf;
2582 buf[0] = (uchar)(offset & 255);
2583 buf[1] = (uchar)((offset >> 8) & 255);
2584 buf[2] = (uchar)((offset >> 16) & 255);
2585 buf[3] = (uchar)((offset >> 24) & 255);
2586
2587 return pos - buf;
2588 }
2589
skip_key_in_desc(uchar * row_desc)2590 static uint32_t skip_key_in_desc(
2591 uchar* row_desc
2592 )
2593 {
2594 uchar* pos = row_desc;
2595 uchar col_bin_or_char;
2596 //
2597 // skip the byte that states if it is a fix field or var field, we do not care
2598 //
2599 pos++;
2600
2601 //
2602 // skip the offset information
2603 //
2604 pos += sizeof(uint32_t);
2605
2606 //
2607 // skip the key_part_length info
2608 //
2609 pos += sizeof(uint32_t);
2610 col_bin_or_char = pos[0];
2611 pos++;
2612 if (col_bin_or_char == COL_HAS_NO_CHARSET) {
2613 goto exit;
2614 }
2615 //
2616 // skip the charset info
2617 //
2618 pos += 4;
2619
2620
2621 exit:
2622 return (uint32_t)(pos-row_desc);
2623 }
2624
2625
max_key_size_from_desc(void * row_desc,uint32_t row_desc_size)2626 static uint32_t max_key_size_from_desc(
2627 void* row_desc,
2628 uint32_t row_desc_size
2629 )
2630 {
2631 uchar* desc_pos = (uchar *)row_desc;
2632 uint32_t num_blobs;
2633 uint32_t num_pk_columns;
2634 //
2635 // start at 1 for the infinity byte
2636 //
2637 uint32_t max_size = 1;
2638
2639 // skip byte that states if main dictionary
2640 bool is_main_dictionary = desc_pos[0];
2641 desc_pos++;
2642 assert_always(!is_main_dictionary);
2643
2644 // skip hpk byte
2645 desc_pos++;
2646
2647 // skip num_null_bytes
2648 desc_pos += sizeof(uint32_t);
2649
2650 // skip mcp_info
2651 desc_pos += sizeof(MULTI_COL_PACK_INFO);
2652
2653 // skip offset_bytes
2654 desc_pos++;
2655
2656 // skip over blobs
2657 memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2658 desc_pos += sizeof(num_blobs);
2659 desc_pos += num_blobs;
2660
2661 // skip over pk info
2662 num_pk_columns = desc_pos[0]/2;
2663 desc_pos++;
2664 desc_pos += 2*num_pk_columns;
2665
2666 while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2667 uchar has_charset;
2668 uint32_t key_length = 0;
2669
2670 uchar null_bit = desc_pos[0];
2671 desc_pos++;
2672
2673 if (null_bit) {
2674 //
2675 // column is NULLable, skip null_offset, and add a null byte
2676 //
2677 max_size++;
2678 desc_pos += sizeof(uint32_t);
2679 }
2680 //
2681 // skip over byte that states if fix or var
2682 //
2683 desc_pos++;
2684
2685 // skip over offset
2686 desc_pos += sizeof(uint32_t);
2687
2688 //
2689 // get the key length and add it to return value
2690 //
2691 memcpy(&key_length, desc_pos, sizeof(key_length));
2692 desc_pos += sizeof(key_length);
2693 max_size += key_length;
2694 max_size += 2; // 2 bytes for a potential length bytes, we are upperbounding, does not need to be super tight
2695
2696 has_charset = desc_pos[0];
2697 desc_pos++;
2698
2699 uint32_t charset_num;
2700 if (has_charset == COL_HAS_CHARSET) {
2701 // skip over charsent num
2702 desc_pos += sizeof(charset_num);
2703 }
2704 else {
2705 assert_always(has_charset == COL_HAS_NO_CHARSET);
2706 }
2707 }
2708 return max_size;
2709 }
2710
pack_key_from_desc(uchar * buf,void * row_desc,uint32_t row_desc_size,const DBT * pk_key,const DBT * pk_val)2711 static uint32_t pack_key_from_desc(
2712 uchar* buf,
2713 void* row_desc,
2714 uint32_t row_desc_size,
2715 const DBT* pk_key,
2716 const DBT* pk_val) {
2717
2718 MULTI_COL_PACK_INFO mcp_info;
2719 uint32_t num_null_bytes;
2720 uint32_t num_blobs;
2721 uint32_t num_pk_columns;
2722 uchar* blob_lengths = NULL;
2723 uchar* pk_info = NULL;
2724 uchar* pk_data_ptr = NULL;
2725 uchar* null_bytes_ptr = NULL;
2726 uchar* fixed_field_ptr = NULL;
2727 uchar* var_field_offset_ptr = NULL;
2728 const uchar* var_field_data_ptr = NULL;
2729 uint32_t num_offset_bytes;
2730 uchar* packed_key_pos = buf;
2731 uchar* desc_pos = (uchar *)row_desc;
2732
2733 bool is_main_dictionary = desc_pos[0];
2734 desc_pos++;
2735 assert_always(!is_main_dictionary);
2736
2737 //
2738 // get the constant info out of descriptor
2739 //
2740 bool hpk = desc_pos[0];
2741 desc_pos++;
2742
2743 memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2744 desc_pos += sizeof(num_null_bytes);
2745
2746 memcpy(&mcp_info, desc_pos, sizeof(mcp_info));
2747 desc_pos += sizeof(mcp_info);
2748
2749 num_offset_bytes = desc_pos[0];
2750 desc_pos++;
2751
2752 memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2753 desc_pos += sizeof(num_blobs);
2754
2755 blob_lengths = desc_pos;
2756 desc_pos += num_blobs;
2757
2758 num_pk_columns = desc_pos[0]/2;
2759 desc_pos++;
2760 pk_info = desc_pos;
2761 desc_pos += 2*num_pk_columns;
2762
2763 //
2764 // now start packing the key
2765 //
2766
2767 //
2768 // pack the infinity byte
2769 //
2770 packed_key_pos[0] = COL_ZERO;
2771 packed_key_pos++;
2772 //
2773 // now start packing each column of the key, as described in descriptor
2774 //
2775 if (!hpk) {
2776 // +1 for the infinity byte
2777 pk_data_ptr = (uchar *)pk_key->data + 1;
2778 }
2779 null_bytes_ptr = (uchar *)pk_val->data;
2780 fixed_field_ptr = null_bytes_ptr + num_null_bytes;
2781 var_field_offset_ptr = fixed_field_ptr + mcp_info.fixed_field_size;
2782 var_field_data_ptr = var_field_offset_ptr + mcp_info.len_of_offsets;
2783 while ((uint32_t)(desc_pos - (uchar*)row_desc) < row_desc_size) {
2784 uchar col_fix_val;
2785 uchar has_charset;
2786 uint32_t col_pack_val = 0;
2787 uint32_t key_length = 0;
2788
2789 uchar null_bit = desc_pos[0];
2790 desc_pos++;
2791
2792 if (null_bit) {
2793 //
2794 // column is NULLable, need to check the null bytes to see if it is NULL
2795 //
2796 uint32_t null_offset = 0;
2797 bool is_field_null;
2798 memcpy(&null_offset, desc_pos, sizeof(null_offset));
2799 desc_pos += sizeof(null_offset);
2800
2801 is_field_null = (null_bytes_ptr[null_offset] & null_bit) ? true: false;
2802 if (is_field_null) {
2803 packed_key_pos[0] = NULL_COL_VAL;
2804 packed_key_pos++;
2805 desc_pos += skip_key_in_desc(desc_pos);
2806 continue;
2807 } else {
2808 packed_key_pos[0] = NONNULL_COL_VAL;
2809 packed_key_pos++;
2810 }
2811 }
2812 //
2813 // now pack the column (unless it was NULL, and we continued)
2814 //
2815 col_fix_val = desc_pos[0];
2816 desc_pos++;
2817
2818 memcpy(&col_pack_val, desc_pos, sizeof(col_pack_val));
2819 desc_pos += sizeof(col_pack_val);
2820
2821 memcpy(&key_length, desc_pos, sizeof(key_length));
2822 desc_pos += sizeof(key_length);
2823
2824 has_charset = desc_pos[0];
2825 desc_pos++;
2826
2827 uint32_t charset_num = 0;
2828 if (has_charset == COL_HAS_CHARSET) {
2829 memcpy(&charset_num, desc_pos, sizeof(charset_num));
2830 desc_pos += sizeof(charset_num);
2831 } else {
2832 assert_always(has_charset == COL_HAS_NO_CHARSET);
2833 }
2834 //
2835 // case where column is in pk val
2836 //
2837 if (col_fix_val == COL_FIX_FIELD ||
2838 col_fix_val == COL_VAR_FIELD ||
2839 col_fix_val == COL_BLOB_FIELD) {
2840 if (col_fix_val == COL_FIX_FIELD &&
2841 has_charset == COL_HAS_NO_CHARSET) {
2842 memcpy(
2843 packed_key_pos,
2844 &fixed_field_ptr[col_pack_val],
2845 key_length);
2846 packed_key_pos += key_length;
2847 } else if (col_fix_val == COL_VAR_FIELD &&
2848 has_charset == COL_HAS_NO_CHARSET) {
2849 uint32_t data_start_offset = 0;
2850
2851 uint32_t data_size = 0;
2852 get_var_field_info(
2853 &data_size,
2854 &data_start_offset,
2855 col_pack_val,
2856 var_field_offset_ptr,
2857 num_offset_bytes);
2858
2859 //
2860 // length of this field in this row is data_size
2861 // data is located beginning at var_field_data_ptr + data_start_offset
2862 //
2863 packed_key_pos = pack_toku_varbinary_from_desc(
2864 packed_key_pos,
2865 var_field_data_ptr + data_start_offset,
2866 //number of bytes to use to encode the length in to_tokudb
2867 key_length,
2868 //length of field
2869 data_size);
2870 } else {
2871 const uchar* data_start = NULL;
2872 uint32_t data_start_offset = 0;
2873 uint32_t data_size = 0;
2874
2875 if (col_fix_val == COL_FIX_FIELD) {
2876 data_start_offset = col_pack_val;
2877 data_size = key_length;
2878 data_start = fixed_field_ptr + data_start_offset;
2879 } else if (col_fix_val == COL_VAR_FIELD){
2880 get_var_field_info(
2881 &data_size,
2882 &data_start_offset,
2883 col_pack_val,
2884 var_field_offset_ptr,
2885 num_offset_bytes);
2886 data_start = var_field_data_ptr + data_start_offset;
2887 } else if (col_fix_val == COL_BLOB_FIELD) {
2888 uint32_t blob_index = col_pack_val;
2889 uint32_t blob_offset;
2890 const uchar* blob_ptr = NULL;
2891 uint32_t field_len;
2892 uint32_t field_len_bytes = blob_lengths[blob_index];
2893 get_blob_field_info(
2894 &blob_offset,
2895 mcp_info.len_of_offsets,
2896 var_field_data_ptr,
2897 num_offset_bytes);
2898 blob_ptr = var_field_data_ptr + blob_offset;
2899 assert_always(num_blobs > 0);
2900
2901 // skip over other blobs to get to the one we want to
2902 // make a key out of
2903 for (uint32_t i = 0; i < blob_index; i++) {
2904 blob_ptr = unpack_toku_field_blob(
2905 NULL,
2906 blob_ptr,
2907 blob_lengths[i],
2908 true);
2909 }
2910 // at this point, blob_ptr is pointing to the blob we
2911 // want to make a key from
2912 field_len = get_blob_field_len(blob_ptr, field_len_bytes);
2913 // now we set the variables to make the key
2914 data_start = blob_ptr + field_len_bytes;
2915 data_size = field_len;
2916 } else {
2917 assert_unreachable();
2918 }
2919
2920 packed_key_pos = pack_toku_varstring_from_desc(packed_key_pos,
2921 data_start,
2922 key_length,
2923 data_size,
2924 charset_num);
2925 }
2926 } else {
2927 // case where column is in pk key
2928 if (col_fix_val == COL_FIX_PK_OFFSET) {
2929 memcpy(packed_key_pos, &pk_data_ptr[col_pack_val], key_length);
2930 packed_key_pos += key_length;
2931 } else if (col_fix_val == COL_VAR_PK_OFFSET) {
2932 uchar* tmp_pk_data_ptr = pk_data_ptr;
2933 uint32_t index_in_pk = col_pack_val;
2934 //
2935 // skip along in pk to the right column
2936 //
2937 for (uint32_t i = 0; i < index_in_pk; i++) {
2938 if (pk_info[2*i] == COL_FIX_FIELD) {
2939 tmp_pk_data_ptr += pk_info[2*i + 1];
2940 } else if (pk_info[2*i] == COL_VAR_FIELD) {
2941 uint32_t len_bytes = pk_info[2*i + 1];
2942 uint32_t len;
2943 if (len_bytes == 1) {
2944 len = tmp_pk_data_ptr[0];
2945 tmp_pk_data_ptr++;
2946 } else if (len_bytes == 2) {
2947 len = uint2korr(tmp_pk_data_ptr);
2948 tmp_pk_data_ptr += 2;
2949 } else {
2950 assert_unreachable();
2951 }
2952 tmp_pk_data_ptr += len;
2953 } else {
2954 assert_unreachable();
2955 }
2956 }
2957 //
2958 // at this point, tmp_pk_data_ptr is pointing at the column
2959 //
2960 uint32_t is_fix_field = pk_info[2*index_in_pk];
2961 if (is_fix_field == COL_FIX_FIELD) {
2962 memcpy(packed_key_pos, tmp_pk_data_ptr, key_length);
2963 packed_key_pos += key_length;
2964 } else if (is_fix_field == COL_VAR_FIELD) {
2965 const uchar* data_start = NULL;
2966 uint32_t data_size = 0;
2967 uint32_t len_bytes = pk_info[2*index_in_pk + 1];
2968 if (len_bytes == 1) {
2969 data_size = tmp_pk_data_ptr[0];
2970 tmp_pk_data_ptr++;
2971 } else if (len_bytes == 2) {
2972 data_size = uint2korr(tmp_pk_data_ptr);
2973 tmp_pk_data_ptr += 2;
2974 } else {
2975 assert_unreachable();
2976 }
2977 data_start = tmp_pk_data_ptr;
2978
2979 if (has_charset == COL_HAS_CHARSET) {
2980 packed_key_pos = pack_toku_varstring_from_desc(
2981 packed_key_pos,
2982 data_start,
2983 key_length,
2984 data_size,
2985 charset_num);
2986 } else if (has_charset == COL_HAS_NO_CHARSET) {
2987 packed_key_pos = pack_toku_varbinary_from_desc(
2988 packed_key_pos,
2989 data_start,
2990 key_length,
2991 data_size);
2992 } else {
2993 assert_unreachable();
2994 }
2995 } else {
2996 assert_unreachable();
2997 }
2998 } else {
2999 assert_unreachable();
3000 }
3001 }
3002
3003 }
3004 assert_always( (uint32_t)(desc_pos - (uchar *)row_desc) == row_desc_size);
3005
3006 //
3007 // now append the primary key to the end of the key
3008 //
3009 if (hpk) {
3010 memcpy(packed_key_pos, pk_key->data, pk_key->size);
3011 packed_key_pos += pk_key->size;
3012 } else {
3013 memcpy(packed_key_pos, (uchar *)pk_key->data + 1, pk_key->size - 1);
3014 packed_key_pos += (pk_key->size - 1);
3015 }
3016
3017 return (uint32_t)(packed_key_pos - buf);
3018 }
3019
fields_have_same_name(Field * a,Field * b)3020 static bool fields_have_same_name(Field* a, Field* b) {
3021 return strcmp(a->field_name.str, b->field_name.str) == 0;
3022 }
3023
fields_are_same_type(Field * a,Field * b)3024 static bool fields_are_same_type(Field* a, Field* b) {
3025 bool retval = true;
3026 enum_field_types a_mysql_type = a->real_type();
3027 enum_field_types b_mysql_type = b->real_type();
3028 TOKU_TYPE a_toku_type = mysql_to_toku_type(a);
3029 TOKU_TYPE b_toku_type = mysql_to_toku_type(b);
3030 // make sure have same names
3031 // make sure have same types
3032 if (a_mysql_type != b_mysql_type) {
3033 retval = false;
3034 goto cleanup;
3035 }
3036 // Thanks to MariaDB 5.5, we can have two fields
3037 // be the same MySQL type but not the same toku type,
3038 // This is an issue introduced with MariaDB's fractional time
3039 // implementation
3040 if (a_toku_type != b_toku_type) {
3041 retval = false;
3042 goto cleanup;
3043 }
3044 // make sure that either both are nullable, or both not nullable
3045 if ((a->null_bit && !b->null_bit) || (!a->null_bit && b->null_bit)) {
3046 retval = false;
3047 goto cleanup;
3048 }
3049 switch (a_mysql_type) {
3050 case MYSQL_TYPE_TINY:
3051 case MYSQL_TYPE_SHORT:
3052 case MYSQL_TYPE_INT24:
3053 case MYSQL_TYPE_LONG:
3054 case MYSQL_TYPE_LONGLONG:
3055 // length, unsigned, auto increment
3056 if (a->pack_length() != b->pack_length() ||
3057 (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3058 (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3059 retval = false;
3060 goto cleanup;
3061 }
3062 break;
3063 case MYSQL_TYPE_DOUBLE:
3064 case MYSQL_TYPE_FLOAT:
3065 // length, unsigned, auto increment
3066 if (a->pack_length() != b->pack_length() ||
3067 (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3068 (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3069 retval = false;
3070 goto cleanup;
3071 }
3072 break;
3073 case MYSQL_TYPE_NEWDECIMAL:
3074 // length, unsigned
3075 if (a->pack_length() != b->pack_length() ||
3076 (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG)) {
3077 retval = false;
3078 goto cleanup;
3079 }
3080 break;
3081 case MYSQL_TYPE_ENUM: {
3082 Field_enum *a_enum = static_cast<Field_enum *>(a);
3083 if (!a_enum->eq_def(b)) {
3084 retval = false;
3085 goto cleanup;
3086 }
3087 break;
3088 }
3089 case MYSQL_TYPE_SET: {
3090 Field_set *a_set = static_cast<Field_set *>(a);
3091 if (!a_set->eq_def(b)) {
3092 retval = false;
3093 goto cleanup;
3094 }
3095 break;
3096 }
3097 case MYSQL_TYPE_BIT:
3098 // length
3099 if (a->pack_length() != b->pack_length()) {
3100 retval = false;
3101 goto cleanup;
3102 }
3103 break;
3104 case MYSQL_TYPE_DATE:
3105 case MYSQL_TYPE_DATETIME:
3106 case MYSQL_TYPE_YEAR:
3107 case MYSQL_TYPE_NEWDATE:
3108 case MYSQL_TYPE_TIME:
3109 case MYSQL_TYPE_TIMESTAMP:
3110 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
3111 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
3112 (100000 <= MYSQL_VERSION_ID)
3113 case MYSQL_TYPE_DATETIME2:
3114 case MYSQL_TYPE_TIMESTAMP2:
3115 case MYSQL_TYPE_TIME2:
3116 #endif
3117 // length
3118 if (a->pack_length() != b->pack_length()) {
3119 retval = false;
3120 goto cleanup;
3121 }
3122 break;
3123 case MYSQL_TYPE_TINY_BLOB:
3124 case MYSQL_TYPE_MEDIUM_BLOB:
3125 case MYSQL_TYPE_BLOB:
3126 case MYSQL_TYPE_LONG_BLOB:
3127 // test the charset
3128 if (a->charset()->number != b->charset()->number) {
3129 retval = false;
3130 goto cleanup;
3131 }
3132 if (a->row_pack_length() != b->row_pack_length()) {
3133 retval = false;
3134 goto cleanup;
3135 }
3136 break;
3137 case MYSQL_TYPE_STRING:
3138 if (a->pack_length() != b->pack_length()) {
3139 retval = false;
3140 goto cleanup;
3141 }
3142 // if both are binary, we know have same pack lengths,
3143 // so we can goto end
3144 if (a->binary() && b->binary()) {
3145 // nothing to do, we are good
3146 }
3147 else if (!a->binary() && !b->binary()) {
3148 // test the charset
3149 if (a->charset()->number != b->charset()->number) {
3150 retval = false;
3151 goto cleanup;
3152 }
3153 }
3154 else {
3155 // one is binary and the other is not, so not the same
3156 retval = false;
3157 goto cleanup;
3158 }
3159 break;
3160 case MYSQL_TYPE_VARCHAR:
3161 if (a->field_length != b->field_length) {
3162 retval = false;
3163 goto cleanup;
3164 }
3165 // if both are binary, we know have same pack lengths,
3166 // so we can goto end
3167 if (a->binary() && b->binary()) {
3168 // nothing to do, we are good
3169 }
3170 else if (!a->binary() && !b->binary()) {
3171 // test the charset
3172 if (a->charset()->number != b->charset()->number) {
3173 retval = false;
3174 goto cleanup;
3175 }
3176 }
3177 else {
3178 // one is binary and the other is not, so not the same
3179 retval = false;
3180 goto cleanup;
3181 }
3182 break;
3183 //
3184 // I believe these are old types that are no longer
3185 // in any 5.1 tables, so tokudb does not need
3186 // to worry about them
3187 // Putting in this assert in case I am wrong.
3188 // Do not support geometry yet.
3189 //
3190 case MYSQL_TYPE_GEOMETRY:
3191 case MYSQL_TYPE_DECIMAL:
3192 case MYSQL_TYPE_VAR_STRING:
3193 case MYSQL_TYPE_NULL:
3194 case MYSQL_TYPE_VARCHAR_COMPRESSED:
3195 case MYSQL_TYPE_BLOB_COMPRESSED:
3196 assert_unreachable();
3197 }
3198
3199 cleanup:
3200 return retval;
3201 }
3202
are_two_fields_same(Field * a,Field * b)3203 static bool are_two_fields_same(Field* a, Field* b) {
3204 return fields_have_same_name(a, b) && fields_are_same_type(a, b);
3205 }
3206
3207
3208