1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of TokuDB
6
7
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 TokuDBis is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 TokuDB is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with TokuDB. If not, see <http://www.gnu.org/licenses/>.
21
22 ======= */
23
24 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
25
26 #include "hatoku_cmp.h"
27
28 #ifdef WORDS_BIGENDIAN
29 #error "WORDS_BIGENDIAN not supported"
30 #endif
31
32 // returns true if the field is a valid field to be used
33 // in a TokuDB table. The non-valid fields are those
34 // that have been deprecated since before 5.1, and can
35 // only exist through upgrades of old versions of MySQL
field_valid_for_tokudb_table(Field * field)36 static bool field_valid_for_tokudb_table(Field* field) {
37 bool ret_val = false;
38 enum_field_types mysql_type = field->real_type();
39 switch (mysql_type) {
40 case MYSQL_TYPE_LONG:
41 case MYSQL_TYPE_LONGLONG:
42 case MYSQL_TYPE_TINY:
43 case MYSQL_TYPE_SHORT:
44 case MYSQL_TYPE_INT24:
45 case MYSQL_TYPE_DATE:
46 case MYSQL_TYPE_YEAR:
47 case MYSQL_TYPE_NEWDATE:
48 case MYSQL_TYPE_ENUM:
49 case MYSQL_TYPE_SET:
50 case MYSQL_TYPE_TIME:
51 case MYSQL_TYPE_DATETIME:
52 case MYSQL_TYPE_TIMESTAMP:
53 case MYSQL_TYPE_DOUBLE:
54 case MYSQL_TYPE_FLOAT:
55 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
56 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
57 (100000 <= MYSQL_VERSION_ID)
58 case MYSQL_TYPE_DATETIME2:
59 case MYSQL_TYPE_TIMESTAMP2:
60 case MYSQL_TYPE_TIME2:
61 #endif
62 case MYSQL_TYPE_NEWDECIMAL:
63 case MYSQL_TYPE_BIT:
64 case MYSQL_TYPE_STRING:
65 case MYSQL_TYPE_VARCHAR:
66 case MYSQL_TYPE_TINY_BLOB:
67 case MYSQL_TYPE_MEDIUM_BLOB:
68 case MYSQL_TYPE_BLOB:
69 case MYSQL_TYPE_LONG_BLOB:
70 ret_val = true;
71 goto exit;
72 //
73 // I believe these are old types that are no longer
74 // in any 5.1 tables, so tokudb does not need
75 // to worry about them
76 // Putting in this assert in case I am wrong.
77 // Do not support geometry yet.
78 //
79 case MYSQL_TYPE_GEOMETRY:
80 case MYSQL_TYPE_DECIMAL:
81 case MYSQL_TYPE_VAR_STRING:
82 case MYSQL_TYPE_NULL:
83 case MYSQL_TYPE_VARCHAR_COMPRESSED:
84 case MYSQL_TYPE_BLOB_COMPRESSED:
85 ret_val = false;
86 }
87 exit:
88 return ret_val;
89 }
90
get_var_field_info(uint32_t * field_len,uint32_t * start_offset,uint32_t var_field_index,const uchar * var_field_offset_ptr,uint32_t num_offset_bytes)91 static void get_var_field_info(
92 uint32_t* field_len, // output: length of field
93 uint32_t* start_offset, // output, length of offset where data starts
94 uint32_t var_field_index, //input, index of var field we want info on
95 const uchar* var_field_offset_ptr, //input, pointer to where offset information for all var fields begins
96 uint32_t num_offset_bytes //input, number of bytes used to store offsets starting at var_field_offset_ptr
97 )
98 {
99 uint32_t data_start_offset = 0;
100 uint32_t data_end_offset = 0;
101 switch (num_offset_bytes) {
102 case (1):
103 data_end_offset = (var_field_offset_ptr + var_field_index)[0];
104 break;
105 case (2):
106 data_end_offset = uint2korr(var_field_offset_ptr + 2*var_field_index);
107 break;
108 default:
109 assert_unreachable();
110 }
111
112 if (var_field_index) {
113 switch (num_offset_bytes) {
114 case (1):
115 data_start_offset = (var_field_offset_ptr + var_field_index - 1)[0];
116 break;
117 case (2):
118 data_start_offset = uint2korr(var_field_offset_ptr + 2*(var_field_index-1));
119 break;
120 default:
121 assert_unreachable();
122 }
123 }
124 else {
125 data_start_offset = 0;
126 }
127
128 *start_offset = data_start_offset;
129 assert_always(data_end_offset >= data_start_offset);
130 *field_len = data_end_offset - data_start_offset;
131 }
132
get_blob_field_info(uint32_t * start_offset,uint32_t len_of_offsets,const uchar * var_field_data_ptr,uint32_t num_offset_bytes)133 static void get_blob_field_info(
134 uint32_t* start_offset,
135 uint32_t len_of_offsets,
136 const uchar* var_field_data_ptr,
137 uint32_t num_offset_bytes
138 )
139 {
140 uint32_t data_end_offset;
141 //
142 // need to set var_field_data_ptr to point to beginning of blobs, which
143 // is at the end of the var stuff (if they exist), if var stuff does not exist
144 // then the bottom variable will be 0, and var_field_data_ptr is already
145 // set correctly
146 //
147 if (len_of_offsets) {
148 switch (num_offset_bytes) {
149 case (1):
150 data_end_offset = (var_field_data_ptr - 1)[0];
151 break;
152 case (2):
153 data_end_offset = uint2korr(var_field_data_ptr - 2);
154 break;
155 default:
156 assert_unreachable();
157 }
158 }
159 else {
160 data_end_offset = 0;
161 }
162 *start_offset = data_end_offset;
163 }
164
165
166 // this function is pattern matched from
167 // InnoDB's get_innobase_type_from_mysql_type
mysql_to_toku_type(Field * field)168 static TOKU_TYPE mysql_to_toku_type (Field* field) {
169 TOKU_TYPE ret_val = toku_type_unknown;
170 enum_field_types mysql_type = field->real_type();
171 switch (mysql_type) {
172 case MYSQL_TYPE_LONG:
173 case MYSQL_TYPE_LONGLONG:
174 case MYSQL_TYPE_TINY:
175 case MYSQL_TYPE_SHORT:
176 case MYSQL_TYPE_INT24:
177 case MYSQL_TYPE_DATE:
178 case MYSQL_TYPE_YEAR:
179 case MYSQL_TYPE_NEWDATE:
180 case MYSQL_TYPE_ENUM:
181 case MYSQL_TYPE_SET:
182 ret_val = toku_type_int;
183 goto exit;
184 case MYSQL_TYPE_TIME:
185 case MYSQL_TYPE_DATETIME:
186 case MYSQL_TYPE_TIMESTAMP:
187 #ifdef MARIADB_BASE_VERSION
188 // case to handle fractional seconds in MariaDB
189 //
190 if (field->key_type() == HA_KEYTYPE_BINARY) {
191 ret_val = toku_type_fixbinary;
192 goto exit;
193 }
194 #endif
195 ret_val = toku_type_int;
196 goto exit;
197 case MYSQL_TYPE_DOUBLE:
198 ret_val = toku_type_double;
199 goto exit;
200 case MYSQL_TYPE_FLOAT:
201 ret_val = toku_type_float;
202 goto exit;
203 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
204 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
205 (100000 <= MYSQL_VERSION_ID)
206 case MYSQL_TYPE_DATETIME2:
207 case MYSQL_TYPE_TIMESTAMP2:
208 case MYSQL_TYPE_TIME2:
209 #endif
210 case MYSQL_TYPE_NEWDECIMAL:
211 case MYSQL_TYPE_BIT:
212 ret_val = toku_type_fixbinary;
213 goto exit;
214 case MYSQL_TYPE_STRING:
215 if (field->binary()) {
216 ret_val = toku_type_fixbinary;
217 }
218 else {
219 ret_val = toku_type_fixstring;
220 }
221 goto exit;
222 case MYSQL_TYPE_VARCHAR:
223 if (field->binary()) {
224 ret_val = toku_type_varbinary;
225 }
226 else {
227 ret_val = toku_type_varstring;
228 }
229 goto exit;
230 case MYSQL_TYPE_TINY_BLOB:
231 case MYSQL_TYPE_MEDIUM_BLOB:
232 case MYSQL_TYPE_BLOB:
233 case MYSQL_TYPE_LONG_BLOB:
234 ret_val = toku_type_blob;
235 goto exit;
236 //
237 // I believe these are old types that are no longer
238 // in any 5.1 tables, so tokudb does not need
239 // to worry about them
240 // Putting in this assert in case I am wrong.
241 // Do not support geometry yet.
242 //
243 case MYSQL_TYPE_GEOMETRY:
244 case MYSQL_TYPE_DECIMAL:
245 case MYSQL_TYPE_VAR_STRING:
246 case MYSQL_TYPE_NULL:
247 case MYSQL_TYPE_VARCHAR_COMPRESSED:
248 case MYSQL_TYPE_BLOB_COMPRESSED:
249 assert_unreachable();
250 }
251 exit:
252 return ret_val;
253 }
254
255
get_charset_from_num(uint32_t charset_number)256 static inline CHARSET_INFO* get_charset_from_num (uint32_t charset_number) {
257 //
258 // patternmatched off of InnoDB, due to MySQL bug 42649
259 //
260 if (charset_number == default_charset_info->number) {
261 return default_charset_info;
262 }
263 else if (charset_number == my_charset_latin1.number) {
264 return &my_charset_latin1;
265 }
266 else {
267 return get_charset(charset_number, MYF(MY_WME));
268 }
269 }
270
271
272
273 //
274 // used to read the length of a variable sized field in a tokudb key (buf).
275 //
get_length_from_var_tokudata(uchar * buf,uint32_t length_bytes)276 static inline uint32_t get_length_from_var_tokudata (uchar* buf, uint32_t length_bytes) {
277 uint32_t length = (uint32_t)(buf[0]);
278 if (length_bytes == 2) {
279 uint32_t rest_of_length = (uint32_t)buf[1];
280 length += rest_of_length<<8;
281 }
282 return length;
283 }
284
285 //
286 // used to deduce the number of bytes used to store the length of a varstring/varbinary
287 // in a key field stored in tokudb
288 //
get_length_bytes_from_max(uint32_t max_num_bytes)289 static inline uint32_t get_length_bytes_from_max(uint32_t max_num_bytes) {
290 return (max_num_bytes > 255) ? 2 : 1;
291 }
292
293
294
295 //
296 // assuming MySQL in little endian, and we are storing in little endian
297 //
pack_toku_int(uchar * to_tokudb,uchar * from_mysql,uint32_t num_bytes)298 static inline uchar* pack_toku_int (uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
299 switch (num_bytes) {
300 case (1):
301 memcpy(to_tokudb, from_mysql, 1);
302 break;
303 case (2):
304 memcpy(to_tokudb, from_mysql, 2);
305 break;
306 case (3):
307 memcpy(to_tokudb, from_mysql, 3);
308 break;
309 case (4):
310 memcpy(to_tokudb, from_mysql, 4);
311 break;
312 case (8):
313 memcpy(to_tokudb, from_mysql, 8);
314 break;
315 default:
316 assert_unreachable();
317 }
318 return to_tokudb+num_bytes;
319 }
320
321 //
322 // assuming MySQL in little endian, and we are unpacking to little endian
323 //
unpack_toku_int(uchar * to_mysql,uchar * from_tokudb,uint32_t num_bytes)324 static inline uchar* unpack_toku_int(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
325 switch (num_bytes) {
326 case (1):
327 memcpy(to_mysql, from_tokudb, 1);
328 break;
329 case (2):
330 memcpy(to_mysql, from_tokudb, 2);
331 break;
332 case (3):
333 memcpy(to_mysql, from_tokudb, 3);
334 break;
335 case (4):
336 memcpy(to_mysql, from_tokudb, 4);
337 break;
338 case (8):
339 memcpy(to_mysql, from_tokudb, 8);
340 break;
341 default:
342 assert_unreachable();
343 }
344 return from_tokudb+num_bytes;
345 }
346
cmp_toku_int(uchar * a_buf,uchar * b_buf,bool is_unsigned,uint32_t num_bytes)347 static inline int cmp_toku_int (uchar* a_buf, uchar* b_buf, bool is_unsigned, uint32_t num_bytes) {
348 int ret_val = 0;
349 //
350 // case for unsigned integers
351 //
352 if (is_unsigned) {
353 uint32_t a_num, b_num = 0;
354 uint64_t a_big_num, b_big_num = 0;
355 switch (num_bytes) {
356 case (1):
357 a_num = *a_buf;
358 b_num = *b_buf;
359 ret_val = a_num-b_num;
360 goto exit;
361 case (2):
362 a_num = uint2korr(a_buf);
363 b_num = uint2korr(b_buf);
364 ret_val = a_num-b_num;
365 goto exit;
366 case (3):
367 a_num = tokudb_uint3korr(a_buf);
368 b_num = tokudb_uint3korr(b_buf);
369 ret_val = a_num-b_num;
370 goto exit;
371 case (4):
372 a_num = uint4korr(a_buf);
373 b_num = uint4korr(b_buf);
374 if (a_num < b_num) {
375 ret_val = -1; goto exit;
376 }
377 if (a_num > b_num) {
378 ret_val = 1; goto exit;
379 }
380 ret_val = 0;
381 goto exit;
382 case (8):
383 a_big_num = uint8korr(a_buf);
384 b_big_num = uint8korr(b_buf);
385 if (a_big_num < b_big_num) {
386 ret_val = -1; goto exit;
387 }
388 else if (a_big_num > b_big_num) {
389 ret_val = 1; goto exit;
390 }
391 ret_val = 0;
392 goto exit;
393 default:
394 assert_unreachable();
395 }
396 }
397 //
398 // case for signed integers
399 //
400 else {
401 int32_t a_num, b_num = 0;
402 int64_t a_big_num, b_big_num = 0;
403 switch (num_bytes) {
404 case (1):
405 a_num = *(signed char *)a_buf;
406 b_num = *(signed char *)b_buf;
407 ret_val = a_num-b_num;
408 goto exit;
409 case (2):
410 a_num = sint2korr(a_buf);
411 b_num = sint2korr(b_buf);
412 ret_val = a_num-b_num;
413 goto exit;
414 case (3):
415 a_num = sint3korr(a_buf);
416 b_num = sint3korr(b_buf);
417 ret_val = a_num - b_num;
418 goto exit;
419 case (4):
420 a_num = sint4korr(a_buf);
421 b_num = sint4korr(b_buf);
422 if (a_num < b_num) {
423 ret_val = -1; goto exit;
424 }
425 if (a_num > b_num) {
426 ret_val = 1; goto exit;
427 }
428 ret_val = 0;
429 goto exit;
430 case (8):
431 a_big_num = sint8korr(a_buf);
432 b_big_num = sint8korr(b_buf);
433 if (a_big_num < b_big_num) {
434 ret_val = -1; goto exit;
435 }
436 else if (a_big_num > b_big_num) {
437 ret_val = 1; goto exit;
438 }
439 ret_val = 0;
440 goto exit;
441 default:
442 assert_unreachable();
443 }
444 }
445 //
446 // if this is hit, indicates bug in writing of this function
447 //
448 assert_unreachable();
449 exit:
450 return ret_val;
451 }
452
pack_toku_double(uchar * to_tokudb,uchar * from_mysql)453 static inline uchar* pack_toku_double (uchar* to_tokudb, uchar* from_mysql) {
454 memcpy(to_tokudb, from_mysql, sizeof(double));
455 return to_tokudb + sizeof(double);
456 }
457
458
unpack_toku_double(uchar * to_mysql,uchar * from_tokudb)459 static inline uchar* unpack_toku_double(uchar* to_mysql, uchar* from_tokudb) {
460 memcpy(to_mysql, from_tokudb, sizeof(double));
461 return from_tokudb + sizeof(double);
462 }
463
cmp_toku_double(uchar * a_buf,uchar * b_buf)464 static inline int cmp_toku_double(uchar* a_buf, uchar* b_buf) {
465 int ret_val;
466 double a_num;
467 double b_num;
468 doubleget(a_num, a_buf);
469 doubleget(b_num, b_buf);
470 if (a_num < b_num) {
471 ret_val = -1;
472 goto exit;
473 }
474 else if (a_num > b_num) {
475 ret_val = 1;
476 goto exit;
477 }
478 ret_val = 0;
479 exit:
480 return ret_val;
481 }
482
483
pack_toku_float(uchar * to_tokudb,uchar * from_mysql)484 static inline uchar* pack_toku_float (uchar* to_tokudb, uchar* from_mysql) {
485 memcpy(to_tokudb, from_mysql, sizeof(float));
486 return to_tokudb + sizeof(float);
487 }
488
489
unpack_toku_float(uchar * to_mysql,uchar * from_tokudb)490 static inline uchar* unpack_toku_float(uchar* to_mysql, uchar* from_tokudb) {
491 memcpy(to_mysql, from_tokudb, sizeof(float));
492 return from_tokudb + sizeof(float);
493 }
494
cmp_toku_float(uchar * a_buf,uchar * b_buf)495 static inline int cmp_toku_float(uchar* a_buf, uchar* b_buf) {
496 int ret_val;
497 float a_num;
498 float b_num;
499 //
500 // This is the way Field_float::cmp gets the floats from the buffers
501 //
502 memcpy(&a_num, a_buf, sizeof(float));
503 memcpy(&b_num, b_buf, sizeof(float));
504 if (a_num < b_num) {
505 ret_val = -1;
506 goto exit;
507 }
508 else if (a_num > b_num) {
509 ret_val = 1;
510 goto exit;
511 }
512 ret_val = 0;
513 exit:
514 return ret_val;
515 }
516
517
pack_toku_binary(uchar * to_tokudb,uchar * from_mysql,uint32_t num_bytes)518 static inline uchar* pack_toku_binary(uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
519 memcpy(to_tokudb, from_mysql, num_bytes);
520 return to_tokudb + num_bytes;
521 }
522
unpack_toku_binary(uchar * to_mysql,uchar * from_tokudb,uint32_t num_bytes)523 static inline uchar* unpack_toku_binary(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
524 memcpy(to_mysql, from_tokudb, num_bytes);
525 return from_tokudb + num_bytes;
526 }
527
528
cmp_toku_binary(uchar * a_buf,uint32_t a_num_bytes,uchar * b_buf,uint32_t b_num_bytes)529 static inline int cmp_toku_binary(
530 uchar* a_buf,
531 uint32_t a_num_bytes,
532 uchar* b_buf,
533 uint32_t b_num_bytes
534 )
535 {
536 int ret_val = 0;
537 uint32_t num_bytes_to_cmp = (a_num_bytes < b_num_bytes) ? a_num_bytes : b_num_bytes;
538 ret_val = memcmp(a_buf, b_buf, num_bytes_to_cmp);
539 if ((ret_val != 0) || (a_num_bytes == b_num_bytes)) {
540 goto exit;
541 }
542 if (a_num_bytes < b_num_bytes) {
543 ret_val = -1;
544 goto exit;
545 }
546 else {
547 ret_val = 1;
548 goto exit;
549 }
550 exit:
551 return ret_val;
552 }
553
554 //
555 // partially copied from below
556 //
pack_toku_varbinary_from_desc(uchar * to_tokudb,const uchar * from_desc,uint32_t key_part_length,uint32_t field_length)557 static uchar* pack_toku_varbinary_from_desc(
558 uchar* to_tokudb,
559 const uchar* from_desc,
560 uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
561 uint32_t field_length //length of field
562 )
563 {
564 uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
565 uint32_t length = field_length;
566 set_if_smaller(length, key_part_length);
567
568 //
569 // copy the length bytes, assuming both are in little endian
570 //
571 to_tokudb[0] = (uchar)length & 255;
572 if (length_bytes_in_tokudb > 1) {
573 to_tokudb[1] = (uchar) (length >> 8);
574 }
575 //
576 // copy the string
577 //
578 memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
579 return to_tokudb + length + length_bytes_in_tokudb;
580 }
581
pack_toku_varbinary(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes)582 static inline uchar* pack_toku_varbinary(
583 uchar* to_tokudb,
584 uchar* from_mysql,
585 uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
586 uint32_t max_num_bytes
587 )
588 {
589 uint32_t length = 0;
590 uint32_t length_bytes_in_tokudb;
591 switch (length_bytes_in_mysql) {
592 case (0):
593 length = max_num_bytes;
594 break;
595 case (1):
596 length = (uint32_t)(*from_mysql);
597 break;
598 case (2):
599 length = uint2korr(from_mysql);
600 break;
601 case (3):
602 length = tokudb_uint3korr(from_mysql);
603 break;
604 case (4):
605 length = uint4korr(from_mysql);
606 break;
607 }
608
609 //
610 // from this point on, functionality equivalent to pack_toku_varbinary_from_desc
611 //
612 set_if_smaller(length,max_num_bytes);
613
614 length_bytes_in_tokudb = get_length_bytes_from_max(max_num_bytes);
615 //
616 // copy the length bytes, assuming both are in little endian
617 //
618 to_tokudb[0] = (uchar)length & 255;
619 if (length_bytes_in_tokudb > 1) {
620 to_tokudb[1] = (uchar) (length >> 8);
621 }
622 //
623 // copy the string
624 //
625 memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
626 return to_tokudb + length + length_bytes_in_tokudb;
627 }
628
unpack_toku_varbinary(uchar * to_mysql,uchar * from_tokudb,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql)629 static inline uchar* unpack_toku_varbinary(
630 uchar* to_mysql,
631 uchar* from_tokudb,
632 uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
633 uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
634 )
635 {
636 uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
637
638 //
639 // copy the length into the mysql buffer
640 //
641 switch (length_bytes_in_mysql) {
642 case (0):
643 break;
644 case (1):
645 *to_mysql = (uchar) length;
646 break;
647 case (2):
648 int2store(to_mysql, length);
649 break;
650 case (3):
651 int3store(to_mysql, length);
652 break;
653 case (4):
654 int4store(to_mysql, length);
655 break;
656 default:
657 assert_unreachable();
658 }
659 //
660 // copy the binary data
661 //
662 memcpy(to_mysql + length_bytes_in_mysql, from_tokudb + length_bytes_in_tokudb, length);
663 return from_tokudb + length_bytes_in_tokudb+ length;
664 }
665
cmp_toku_varbinary(uchar * a_buf,uchar * b_buf,uint32_t length_bytes,uint32_t * a_bytes_read,uint32_t * b_bytes_read)666 static inline int cmp_toku_varbinary(
667 uchar* a_buf,
668 uchar* b_buf,
669 uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
670 uint32_t* a_bytes_read,
671 uint32_t* b_bytes_read
672 )
673 {
674 int ret_val = 0;
675 uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
676 uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
677 ret_val = cmp_toku_binary(
678 a_buf + length_bytes,
679 a_len,
680 b_buf + length_bytes,
681 b_len
682 );
683 *a_bytes_read = a_len + length_bytes;
684 *b_bytes_read = b_len + length_bytes;
685 return ret_val;
686 }
687
pack_toku_blob(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes,const CHARSET_INFO * charset)688 static inline uchar* pack_toku_blob(
689 uchar* to_tokudb,
690 uchar* from_mysql,
691 uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
692 uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
693 uint32_t max_num_bytes,
694 #if MYSQL_VERSION_ID >= 50600
695 const CHARSET_INFO* charset
696 #else
697 CHARSET_INFO* charset
698 #endif
699 )
700 {
701 uint32_t length = 0;
702 uint32_t local_char_length = 0;
703 uchar* blob_buf = NULL;
704
705 switch (length_bytes_in_mysql) {
706 case (0):
707 length = max_num_bytes;
708 break;
709 case (1):
710 length = (uint32_t)(*from_mysql);
711 break;
712 case (2):
713 length = uint2korr(from_mysql);
714 break;
715 case (3):
716 length = tokudb_uint3korr(from_mysql);
717 break;
718 case (4):
719 length = uint4korr(from_mysql);
720 break;
721 }
722 set_if_smaller(length,max_num_bytes);
723
724 memcpy(&blob_buf,from_mysql+length_bytes_in_mysql,sizeof(uchar *));
725
726 local_char_length= ((charset->mbmaxlen > 1) ?
727 max_num_bytes/charset->mbmaxlen : max_num_bytes);
728 if (length > local_char_length)
729 {
730 local_char_length= my_charpos(
731 charset,
732 blob_buf,
733 blob_buf+length,
734 local_char_length
735 );
736 set_if_smaller(length, local_char_length);
737 }
738
739
740 //
741 // copy the length bytes, assuming both are in little endian
742 //
743 to_tokudb[0] = (uchar)length & 255;
744 if (length_bytes_in_tokudb > 1) {
745 to_tokudb[1] = (uchar) (length >> 8);
746 }
747 //
748 // copy the string
749 //
750 memcpy(to_tokudb + length_bytes_in_tokudb, blob_buf, length);
751 return to_tokudb + length + length_bytes_in_tokudb;
752 }
753
754
unpack_toku_blob(uchar * to_mysql,uchar * from_tokudb,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql)755 static inline uchar* unpack_toku_blob(
756 uchar* to_mysql,
757 uchar* from_tokudb,
758 uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
759 uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
760 )
761 {
762 uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
763 uchar* blob_pos = NULL;
764 //
765 // copy the length into the mysql buffer
766 //
767 switch (length_bytes_in_mysql) {
768 case (0):
769 break;
770 case (1):
771 *to_mysql = (uchar) length;
772 break;
773 case (2):
774 int2store(to_mysql, length);
775 break;
776 case (3):
777 int3store(to_mysql, length);
778 break;
779 case (4):
780 int4store(to_mysql, length);
781 break;
782 default:
783 assert_unreachable();
784 }
785 //
786 // copy the binary data
787 //
788 blob_pos = from_tokudb + length_bytes_in_tokudb;
789 memcpy(to_mysql + length_bytes_in_mysql, &blob_pos, sizeof(uchar *));
790 return from_tokudb + length_bytes_in_tokudb+ length;
791 }
792
793
794 //
795 // partially copied from below
796 //
pack_toku_varstring_from_desc(uchar * to_tokudb,const uchar * from_desc,uint32_t key_part_length,uint32_t field_length,uint32_t charset_num)797 static uchar* pack_toku_varstring_from_desc(
798 uchar* to_tokudb,
799 const uchar* from_desc,
800 uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
801 uint32_t field_length,
802 uint32_t charset_num//length of field
803 )
804 {
805 CHARSET_INFO* charset = NULL;
806 uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
807 uint32_t length = field_length;
808 uint32_t local_char_length = 0;
809 set_if_smaller(length, key_part_length);
810
811 charset = get_charset_from_num(charset_num);
812
813 //
814 // copy the string
815 //
816 local_char_length= ((charset->mbmaxlen > 1) ?
817 key_part_length/charset->mbmaxlen : key_part_length);
818 if (length > local_char_length)
819 {
820 local_char_length= my_charpos(
821 charset,
822 from_desc,
823 from_desc+length,
824 local_char_length
825 );
826 set_if_smaller(length, local_char_length);
827 }
828
829
830 //
831 // copy the length bytes, assuming both are in little endian
832 //
833 to_tokudb[0] = (uchar)length & 255;
834 if (length_bytes_in_tokudb > 1) {
835 to_tokudb[1] = (uchar) (length >> 8);
836 }
837 //
838 // copy the string
839 //
840 memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
841 return to_tokudb + length + length_bytes_in_tokudb;
842 }
843
pack_toku_varstring(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes,const CHARSET_INFO * charset)844 static inline uchar* pack_toku_varstring(
845 uchar* to_tokudb,
846 uchar* from_mysql,
847 uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
848 uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
849 uint32_t max_num_bytes,
850 #if MYSQL_VERSION_ID >= 50600
851 const CHARSET_INFO *charset
852 #else
853 CHARSET_INFO* charset
854 #endif
855 )
856 {
857 uint32_t length = 0;
858 uint32_t local_char_length = 0;
859
860 switch (length_bytes_in_mysql) {
861 case (0):
862 length = max_num_bytes;
863 break;
864 case (1):
865 length = (uint32_t)(*from_mysql);
866 break;
867 case (2):
868 length = uint2korr(from_mysql);
869 break;
870 case (3):
871 length = tokudb_uint3korr(from_mysql);
872 break;
873 case (4):
874 length = uint4korr(from_mysql);
875 break;
876 }
877 set_if_smaller(length,max_num_bytes);
878
879 local_char_length= ((charset->mbmaxlen > 1) ?
880 max_num_bytes/charset->mbmaxlen : max_num_bytes);
881 if (length > local_char_length)
882 {
883 local_char_length= my_charpos(
884 charset,
885 from_mysql+length_bytes_in_mysql,
886 from_mysql+length_bytes_in_mysql+length,
887 local_char_length
888 );
889 set_if_smaller(length, local_char_length);
890 }
891
892
893 //
894 // copy the length bytes, assuming both are in little endian
895 //
896 to_tokudb[0] = (uchar)length & 255;
897 if (length_bytes_in_tokudb > 1) {
898 to_tokudb[1] = (uchar) (length >> 8);
899 }
900 //
901 // copy the string
902 //
903 memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
904 return to_tokudb + length + length_bytes_in_tokudb;
905 }
906
cmp_toku_string(uchar * a_buf,uint32_t a_num_bytes,uchar * b_buf,uint32_t b_num_bytes,uint32_t charset_number)907 static inline int cmp_toku_string(
908 uchar* a_buf,
909 uint32_t a_num_bytes,
910 uchar* b_buf,
911 uint32_t b_num_bytes,
912 uint32_t charset_number
913 )
914 {
915 int ret_val = 0;
916 CHARSET_INFO* charset = NULL;
917
918 charset = get_charset_from_num(charset_number);
919
920 ret_val = charset->coll->strnncollsp(
921 charset,
922 a_buf,
923 a_num_bytes,
924 b_buf,
925 b_num_bytes
926 );
927 return ret_val;
928 }
929
cmp_toku_varstring(uchar * a_buf,uchar * b_buf,uint32_t length_bytes,uint32_t charset_num,uint32_t * a_bytes_read,uint32_t * b_bytes_read)930 static inline int cmp_toku_varstring(
931 uchar* a_buf,
932 uchar* b_buf,
933 uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
934 uint32_t charset_num,
935 uint32_t* a_bytes_read,
936 uint32_t* b_bytes_read
937 )
938 {
939 int ret_val = 0;
940 uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
941 uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
942 ret_val = cmp_toku_string(
943 a_buf + length_bytes,
944 a_len,
945 b_buf + length_bytes,
946 b_len,
947 charset_num
948 );
949 *a_bytes_read = a_len + length_bytes;
950 *b_bytes_read = b_len + length_bytes;
951 return ret_val;
952 }
953
tokudb_compare_two_hidden_keys(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size)954 static inline int tokudb_compare_two_hidden_keys(
955 const void* new_key_data,
956 const uint32_t new_key_size,
957 const void* saved_key_data,
958 const uint32_t saved_key_size
959 ) {
960 assert_always(new_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
961 assert_always(saved_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
962 ulonglong a = hpk_char_to_num((uchar *) new_key_data);
963 ulonglong b = hpk_char_to_num((uchar *) saved_key_data);
964 return a < b ? -1 : (a > b ? 1 : 0);
965 }
966
967 //
968 // Returns number of bytes used for a given TOKU_TYPE
969 // in a key descriptor. The number of bytes returned
970 // here MUST match the number of bytes used for the encoding
971 // in create_toku_key_descriptor_for_key
972 // Parameters:
973 // [in] row_desc - buffer that contains portion of descriptor
974 // created in create_toku_key_descriptor_for_key. The first
975 // byte points to the TOKU_TYPE.
976 //
skip_field_in_descriptor(uchar * row_desc)977 static uint32_t skip_field_in_descriptor(uchar* row_desc) {
978 uchar* row_desc_pos = row_desc;
979 TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
980 row_desc_pos++;
981
982 switch (toku_type) {
983 case (toku_type_hpk):
984 case (toku_type_double):
985 case (toku_type_float):
986 break;
987 case (toku_type_int):
988 row_desc_pos += 2;
989 break;
990 case (toku_type_fixbinary):
991 case (toku_type_varbinary):
992 row_desc_pos++;
993 break;
994 case (toku_type_fixstring):
995 case (toku_type_varstring):
996 case (toku_type_blob):
997 row_desc_pos++;
998 row_desc_pos += sizeof(uint32_t);
999 break;
1000 default:
1001 assert_unreachable();
1002 }
1003 return (uint32_t)(row_desc_pos - row_desc);
1004 }
1005
1006 //
1007 // outputs a descriptor for key into buf. Returns number of bytes used in buf
1008 // to store the descriptor. Number of bytes used MUST match number of bytes
1009 // we would skip in skip_field_in_descriptor
1010 //
create_toku_key_descriptor_for_key(KEY * key,uchar * buf)1011 static int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
1012 uchar* pos = buf;
1013 uint32_t num_bytes_in_field = 0;
1014 uint32_t charset_num = 0;
1015 for (uint i = 0; i < key->user_defined_key_parts; i++) {
1016 Field* field = key->key_part[i].field;
1017 //
1018 // The first byte states if there is a null byte
1019 // 0 means no null byte, non-zer means there
1020 // is one
1021 //
1022 *pos = field->null_bit;
1023 pos++;
1024
1025 //
1026 // The second byte for each field is the type
1027 //
1028 TOKU_TYPE type = mysql_to_toku_type(field);
1029 assert_always((int)type < 256);
1030 *pos = (uchar)(type & 255);
1031 pos++;
1032
1033 //
1034 // based on the type, extra data follows afterwards
1035 //
1036 switch (type) {
1037 //
1038 // two bytes follow for ints, first one states how many
1039 // bytes the int is (1 , 2, 3, 4 or 8)
1040 // next one states if it is signed or not
1041 //
1042 case (toku_type_int):
1043 num_bytes_in_field = field->pack_length();
1044 assert_always (num_bytes_in_field < 256);
1045 *pos = (uchar)(num_bytes_in_field & 255);
1046 pos++;
1047 *pos = (field->flags & UNSIGNED_FLAG) ? 1 : 0;
1048 pos++;
1049 break;
1050 //
1051 // nothing follows floats and doubles
1052 //
1053 case (toku_type_double):
1054 case (toku_type_float):
1055 break;
1056 //
1057 // one byte follow stating the length of the field
1058 //
1059 case (toku_type_fixbinary):
1060 num_bytes_in_field = field->pack_length();
1061 set_if_smaller(num_bytes_in_field, key->key_part[i].length);
1062 assert_always(num_bytes_in_field < 256);
1063 pos[0] = (uchar)(num_bytes_in_field & 255);
1064 pos++;
1065 break;
1066 //
1067 // one byte follows: the number of bytes used to encode the length
1068 //
1069 case (toku_type_varbinary):
1070 *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1071 pos++;
1072 break;
1073 //
1074 // five bytes follow: one for the number of bytes to encode the length,
1075 // four for the charset number
1076 //
1077 case (toku_type_fixstring):
1078 case (toku_type_varstring):
1079 case (toku_type_blob):
1080 *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1081 pos++;
1082 charset_num = field->charset()->number;
1083 pos[0] = (uchar)(charset_num & 255);
1084 pos[1] = (uchar)((charset_num >> 8) & 255);
1085 pos[2] = (uchar)((charset_num >> 16) & 255);
1086 pos[3] = (uchar)((charset_num >> 24) & 255);
1087 pos += 4;
1088 break;
1089 default:
1090 assert_unreachable();
1091 }
1092 }
1093 return pos - buf;
1094 }
1095
1096
1097 //
1098 // Creates a descriptor for a DB. That contains all information necessary
1099 // to do both key comparisons and data comparisons (for dup-sort databases).
1100 //
1101 // There are two types of descriptors we care about:
1102 // 1) Primary key, (in a no-dup database)
1103 // 2) secondary keys, which are a secondary key followed by a primary key,
1104 // but in a no-dup database.
1105 //
1106 // I realize this may be confusing, but here is how it works.
1107 // All DB's have a key compare.
1108 // The format of the descriptor must be able to handle both.
1109 //
1110 // The first four bytes store an offset into the descriptor to the second piece
1111 // used for data comparisons. So, if in the future we want to append something
1112 // to the descriptor, we can.
1113 //
1114 //
create_toku_key_descriptor(uchar * buf,bool is_first_hpk,KEY * first_key,bool is_second_hpk,KEY * second_key)1115 static int create_toku_key_descriptor(
1116 uchar* buf,
1117 bool is_first_hpk,
1118 KEY* first_key,
1119 bool is_second_hpk,
1120 KEY* second_key
1121 )
1122 {
1123 //
1124 // The first four bytes always contain the offset of where the first key
1125 // ends.
1126 //
1127 uchar* pos = buf + 4;
1128 uint32_t num_bytes = 0;
1129 uint32_t offset = 0;
1130
1131
1132 if (is_first_hpk) {
1133 pos[0] = 0; //say there is NO infinity byte
1134 pos[1] = 0; //field cannot be NULL, stating it
1135 pos[2] = toku_type_hpk;
1136 pos += 3;
1137 }
1138 else {
1139 //
1140 // first key is NOT a hidden primary key, so we now pack first_key
1141 //
1142 pos[0] = 1; //say there is an infinity byte
1143 pos++;
1144 num_bytes = create_toku_key_descriptor_for_key(first_key, pos);
1145 pos += num_bytes;
1146 }
1147
1148 //
1149 // if we do not have a second key, we can jump to exit right now
1150 // we do not have a second key if it is not a hidden primary key
1151 // and if second_key is NULL
1152 //
1153 if (is_first_hpk || (!is_second_hpk && (second_key == NULL)) ) {
1154 goto exit;
1155 }
1156
1157 //
1158 // if we have a second key, and it is an hpk, we need to pack it, and
1159 // write in the offset to this position in the first four bytes
1160 //
1161 if (is_second_hpk) {
1162 pos[0] = 0; //field cannot be NULL, stating it
1163 pos[1] = toku_type_hpk;
1164 pos += 2;
1165 }
1166 else {
1167 //
1168 // second key is NOT a hidden primary key, so we now pack second_key
1169 //
1170 num_bytes = create_toku_key_descriptor_for_key(second_key, pos);
1171 pos += num_bytes;
1172 }
1173
1174
1175 exit:
1176 offset = pos - buf;
1177 buf[0] = (uchar)(offset & 255);
1178 buf[1] = (uchar)((offset >> 8) & 255);
1179 buf[2] = (uchar)((offset >> 16) & 255);
1180 buf[3] = (uchar)((offset >> 24) & 255);
1181
1182 return pos - buf;
1183 }
1184
1185
compare_toku_field(uchar * a_buf,uchar * b_buf,uchar * row_desc,uint32_t * a_bytes_read,uint32_t * b_bytes_read,uint32_t * row_desc_bytes_read,bool * read_string)1186 static inline int compare_toku_field(
1187 uchar* a_buf,
1188 uchar* b_buf,
1189 uchar* row_desc,
1190 uint32_t* a_bytes_read,
1191 uint32_t* b_bytes_read,
1192 uint32_t* row_desc_bytes_read,
1193 bool* read_string
1194 )
1195 {
1196 int ret_val = 0;
1197 uchar* row_desc_pos = row_desc;
1198 uint32_t num_bytes = 0;
1199 uint32_t length_bytes = 0;
1200 uint32_t charset_num = 0;
1201 bool is_unsigned = false;
1202
1203 TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
1204 row_desc_pos++;
1205
1206 switch (toku_type) {
1207 case (toku_type_hpk):
1208 ret_val = tokudb_compare_two_hidden_keys(
1209 a_buf,
1210 TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH,
1211 b_buf,
1212 TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH
1213 );
1214 *a_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1215 *b_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1216 break;
1217 case (toku_type_int):
1218 num_bytes = row_desc_pos[0];
1219 is_unsigned = row_desc_pos[1];
1220 ret_val = cmp_toku_int(
1221 a_buf,
1222 b_buf,
1223 is_unsigned,
1224 num_bytes
1225 );
1226 *a_bytes_read = num_bytes;
1227 *b_bytes_read = num_bytes;
1228 row_desc_pos += 2;
1229 break;
1230 case (toku_type_double):
1231 ret_val = cmp_toku_double(a_buf, b_buf);
1232 *a_bytes_read = sizeof(double);
1233 *b_bytes_read = sizeof(double);
1234 break;
1235 case (toku_type_float):
1236 ret_val = cmp_toku_float(a_buf, b_buf);
1237 *a_bytes_read = sizeof(float);
1238 *b_bytes_read = sizeof(float);
1239 break;
1240 case (toku_type_fixbinary):
1241 num_bytes = row_desc_pos[0];
1242 ret_val = cmp_toku_binary(a_buf, num_bytes, b_buf,num_bytes);
1243 *a_bytes_read = num_bytes;
1244 *b_bytes_read = num_bytes;
1245 row_desc_pos++;
1246 break;
1247 case (toku_type_varbinary):
1248 length_bytes = row_desc_pos[0];
1249 ret_val = cmp_toku_varbinary(
1250 a_buf,
1251 b_buf,
1252 length_bytes,
1253 a_bytes_read,
1254 b_bytes_read
1255 );
1256 row_desc_pos++;
1257 break;
1258 case (toku_type_fixstring):
1259 case (toku_type_varstring):
1260 case (toku_type_blob):
1261 length_bytes = row_desc_pos[0];
1262 row_desc_pos++;
1263 //
1264 // not sure we want to read charset_num like this
1265 //
1266 charset_num = *(uint32_t *)row_desc_pos;
1267 row_desc_pos += sizeof(uint32_t);
1268 ret_val = cmp_toku_varstring(
1269 a_buf,
1270 b_buf,
1271 length_bytes,
1272 charset_num,
1273 a_bytes_read,
1274 b_bytes_read
1275 );
1276 *read_string = true;
1277 break;
1278 default:
1279 assert_unreachable();
1280 }
1281
1282 *row_desc_bytes_read = row_desc_pos - row_desc;
1283 return ret_val;
1284 }
1285
1286 //
1287 // packs a field from a MySQL buffer into a tokudb buffer.
1288 // Used for inserts/updates
1289 //
pack_toku_key_field(uchar * to_tokudb,uchar * from_mysql,Field * field,uint32_t key_part_length)1290 static uchar* pack_toku_key_field(
1291 uchar* to_tokudb,
1292 uchar* from_mysql,
1293 Field* field,
1294 uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1295 )
1296 {
1297 uchar* new_pos = NULL;
1298 uint32_t num_bytes = 0;
1299 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1300 switch(toku_type) {
1301 case (toku_type_int):
1302 assert_always(key_part_length == field->pack_length());
1303 new_pos = pack_toku_int(
1304 to_tokudb,
1305 from_mysql,
1306 field->pack_length()
1307 );
1308 goto exit;
1309 case (toku_type_double):
1310 assert_always(field->pack_length() == sizeof(double));
1311 assert_always(key_part_length == sizeof(double));
1312 new_pos = pack_toku_double(to_tokudb, from_mysql);
1313 goto exit;
1314 case (toku_type_float):
1315 assert_always(field->pack_length() == sizeof(float));
1316 assert_always(key_part_length == sizeof(float));
1317 new_pos = pack_toku_float(to_tokudb, from_mysql);
1318 goto exit;
1319 case (toku_type_fixbinary):
1320 num_bytes = field->pack_length();
1321 set_if_smaller(num_bytes, key_part_length);
1322 new_pos = pack_toku_binary(
1323 to_tokudb,
1324 from_mysql,
1325 num_bytes
1326 );
1327 goto exit;
1328 case (toku_type_fixstring):
1329 num_bytes = field->pack_length();
1330 set_if_smaller(num_bytes, key_part_length);
1331 new_pos = pack_toku_varstring(
1332 to_tokudb,
1333 from_mysql,
1334 get_length_bytes_from_max(key_part_length),
1335 0,
1336 num_bytes,
1337 field->charset()
1338 );
1339 goto exit;
1340 case (toku_type_varbinary):
1341 new_pos = pack_toku_varbinary(
1342 to_tokudb,
1343 from_mysql,
1344 ((Field_varstring *)field)->length_bytes,
1345 key_part_length
1346 );
1347 goto exit;
1348 case (toku_type_varstring):
1349 new_pos = pack_toku_varstring(
1350 to_tokudb,
1351 from_mysql,
1352 get_length_bytes_from_max(key_part_length),
1353 ((Field_varstring *)field)->length_bytes,
1354 key_part_length,
1355 field->charset()
1356 );
1357 goto exit;
1358 case (toku_type_blob):
1359 new_pos = pack_toku_blob(
1360 to_tokudb,
1361 from_mysql,
1362 get_length_bytes_from_max(key_part_length),
1363 ((Field_blob *)field)->row_pack_length(), //only calling this because packlength is returned
1364 key_part_length,
1365 field->charset()
1366 );
1367 goto exit;
1368 default:
1369 assert_unreachable();
1370 }
1371 assert_unreachable();
1372 exit:
1373 return new_pos;
1374 }
1375
1376 //
1377 // packs a field from a MySQL buffer into a tokudb buffer.
1378 // Used for queries. The only difference between this function
1379 // and pack_toku_key_field is that all variable sized columns
1380 // use 2 bytes to encode the length, regardless of the field
1381 // So varchar(4) will still use 2 bytes to encode the field
1382 //
pack_key_toku_key_field(uchar * to_tokudb,uchar * from_mysql,Field * field,uint32_t key_part_length)1383 static uchar* pack_key_toku_key_field(
1384 uchar* to_tokudb,
1385 uchar* from_mysql,
1386 Field* field,
1387 uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1388 )
1389 {
1390 uchar* new_pos = NULL;
1391 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1392 switch(toku_type) {
1393 case (toku_type_int):
1394 case (toku_type_double):
1395 case (toku_type_float):
1396 case (toku_type_fixbinary):
1397 case (toku_type_fixstring):
1398 new_pos = pack_toku_key_field(to_tokudb, from_mysql, field, key_part_length);
1399 goto exit;
1400 case (toku_type_varbinary):
1401 new_pos = pack_toku_varbinary(
1402 to_tokudb,
1403 from_mysql,
1404 2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1405 key_part_length
1406 );
1407 goto exit;
1408 case (toku_type_varstring):
1409 case (toku_type_blob):
1410 new_pos = pack_toku_varstring(
1411 to_tokudb,
1412 from_mysql,
1413 get_length_bytes_from_max(key_part_length),
1414 2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1415 key_part_length,
1416 field->charset()
1417 );
1418 goto exit;
1419 default:
1420 assert_unreachable();
1421 }
1422
1423 assert_unreachable();
1424 exit:
1425 return new_pos;
1426 }
1427
1428
unpack_toku_key_field(uchar * to_mysql,uchar * from_tokudb,Field * field,uint32_t key_part_length)1429 uchar* unpack_toku_key_field(
1430 uchar* to_mysql,
1431 uchar* from_tokudb,
1432 Field* field,
1433 uint32_t key_part_length) {
1434
1435 uchar* new_pos = NULL;
1436 uint32_t num_bytes = 0;
1437 uint32_t num_bytes_copied;
1438 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1439 switch(toku_type) {
1440 case (toku_type_int):
1441 assert_always(key_part_length == field->pack_length());
1442 new_pos = unpack_toku_int(
1443 to_mysql,
1444 from_tokudb,
1445 field->pack_length()
1446 );
1447 goto exit;
1448 case (toku_type_double):
1449 assert_always(field->pack_length() == sizeof(double));
1450 assert_always(key_part_length == sizeof(double));
1451 new_pos = unpack_toku_double(to_mysql, from_tokudb);
1452 goto exit;
1453 case (toku_type_float):
1454 assert_always(field->pack_length() == sizeof(float));
1455 assert_always(key_part_length == sizeof(float));
1456 new_pos = unpack_toku_float(to_mysql, from_tokudb);
1457 goto exit;
1458 case (toku_type_fixbinary):
1459 num_bytes = field->pack_length();
1460 set_if_smaller(num_bytes, key_part_length);
1461 new_pos = unpack_toku_binary(
1462 to_mysql,
1463 from_tokudb,
1464 num_bytes);
1465 goto exit;
1466 case (toku_type_fixstring):
1467 num_bytes = field->pack_length();
1468 new_pos = unpack_toku_varbinary(
1469 to_mysql,
1470 from_tokudb,
1471 get_length_bytes_from_max(key_part_length),
1472 0);
1473 num_bytes_copied =
1474 new_pos -
1475 (from_tokudb + get_length_bytes_from_max(key_part_length));
1476 assert_always(num_bytes_copied <= num_bytes);
1477 memset(
1478 to_mysql + num_bytes_copied,
1479 field->charset()->pad_char,
1480 num_bytes - num_bytes_copied);
1481 goto exit;
1482 case (toku_type_varbinary):
1483 case (toku_type_varstring):
1484 new_pos = unpack_toku_varbinary(
1485 to_mysql,
1486 from_tokudb,
1487 get_length_bytes_from_max(key_part_length),
1488 ((Field_varstring*)field)->length_bytes);
1489 goto exit;
1490 case (toku_type_blob):
1491 new_pos = unpack_toku_blob(
1492 to_mysql,
1493 from_tokudb,
1494 get_length_bytes_from_max(key_part_length),
1495 //only calling this because packlength is returned
1496 ((Field_blob *)field)->row_pack_length());
1497 goto exit;
1498 default:
1499 assert_unreachable();
1500 }
1501 assert_unreachable();
1502 exit:
1503 return new_pos;
1504 }
1505
1506
tokudb_compare_two_keys(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size,const void * row_desc,const uint32_t row_desc_size,bool cmp_prefix,bool * read_string)1507 static int tokudb_compare_two_keys(
1508 const void* new_key_data,
1509 const uint32_t new_key_size,
1510 const void* saved_key_data,
1511 const uint32_t saved_key_size,
1512 const void* row_desc,
1513 const uint32_t row_desc_size,
1514 bool cmp_prefix,
1515 bool* read_string) {
1516
1517 int ret_val = 0;
1518 int8_t new_key_inf_val = COL_NEG_INF;
1519 int8_t saved_key_inf_val = COL_NEG_INF;
1520
1521 uchar* row_desc_ptr = (uchar *)row_desc;
1522 uchar *new_key_ptr = (uchar *)new_key_data;
1523 uchar *saved_key_ptr = (uchar *)saved_key_data;
1524
1525 uint32_t new_key_bytes_left = new_key_size;
1526 uint32_t saved_key_bytes_left = saved_key_size;
1527
1528 //
1529 // if the keys have an infinity byte, set it
1530 //
1531 if (row_desc_ptr[0]) {
1532 new_key_inf_val = (int8_t)new_key_ptr[0];
1533 saved_key_inf_val = (int8_t)saved_key_ptr[0];
1534 new_key_ptr++;
1535 saved_key_ptr++;
1536 }
1537 row_desc_ptr++;
1538
1539 while ((uint32_t)(new_key_ptr - (uchar*)new_key_data) < new_key_size &&
1540 (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) < saved_key_size &&
1541 (uint32_t)(row_desc_ptr - (uchar*)row_desc) < row_desc_size) {
1542 uint32_t new_key_field_length;
1543 uint32_t saved_key_field_length;
1544 uint32_t row_desc_field_length;
1545 //
1546 // if there is a null byte at this point in the key
1547 //
1548 if (row_desc_ptr[0]) {
1549 //
1550 // compare null bytes. If different, return
1551 //
1552 if (new_key_ptr[0] != saved_key_ptr[0]) {
1553 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1554 goto exit;
1555 }
1556 saved_key_ptr++;
1557 //
1558 // in case we just read the fact that new_key_ptr and saved_key_ptr
1559 // have NULL as their next field
1560 //
1561 if (!*new_key_ptr++) {
1562 //
1563 // skip row_desc_ptr[0] read in if clause
1564 //
1565 row_desc_ptr++;
1566 //
1567 // skip data that describes rest of field
1568 //
1569 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1570 continue;
1571 }
1572 }
1573 row_desc_ptr++;
1574
1575 ret_val = compare_toku_field(
1576 new_key_ptr,
1577 saved_key_ptr,
1578 row_desc_ptr,
1579 &new_key_field_length,
1580 &saved_key_field_length,
1581 &row_desc_field_length,
1582 read_string);
1583 new_key_ptr += new_key_field_length;
1584 saved_key_ptr += saved_key_field_length;
1585 row_desc_ptr += row_desc_field_length;
1586 if (ret_val) {
1587 goto exit;
1588 }
1589
1590 assert_always(
1591 (uint32_t)(new_key_ptr - (uchar*)new_key_data) <= new_key_size);
1592 assert_always(
1593 (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) <= saved_key_size);
1594 assert_always(
1595 (uint32_t)(row_desc_ptr - (uchar*)row_desc) <= row_desc_size);
1596 }
1597 new_key_bytes_left =
1598 new_key_size - ((uint32_t)(new_key_ptr - (uchar*)new_key_data));
1599 saved_key_bytes_left =
1600 saved_key_size - ((uint32_t)(saved_key_ptr - (uchar*)saved_key_data));
1601 if (cmp_prefix) {
1602 ret_val = 0;
1603 } else if (new_key_bytes_left== 0 && saved_key_bytes_left== 0) {
1604 // in this case, read both keys to completion, now read infinity byte
1605 ret_val = new_key_inf_val - saved_key_inf_val;
1606 } else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) {
1607 // at this point, one SHOULD be 0
1608 ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1;
1609 } else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) {
1610 ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1;
1611 } else {
1612 // this should never happen, perhaps we should assert(false)
1613 assert_unreachable();
1614 ret_val = new_key_bytes_left - saved_key_bytes_left;
1615 }
1616 exit:
1617 return ret_val;
1618 }
1619
simple_memcmp(const DBT * keya,const DBT * keyb)1620 static int simple_memcmp(const DBT *keya, const DBT *keyb) {
1621 int cmp;
1622 int num_bytes_cmp = keya->size < keyb->size ?
1623 keya->size : keyb->size;
1624 cmp = memcmp(keya->data,keyb->data,num_bytes_cmp);
1625 if (cmp == 0 && (keya->size != keyb->size)) {
1626 cmp = keya->size < keyb->size ? -1 : 1;
1627 }
1628 return cmp;
1629 }
1630
1631 // comparison function to be used by the fractal trees.
tokudb_cmp_dbt_key(DB * file,const DBT * keya,const DBT * keyb)1632 static int tokudb_cmp_dbt_key(DB* file, const DBT *keya, const DBT *keyb) {
1633 int cmp;
1634 if (file->cmp_descriptor->dbt.size == 0) {
1635 cmp = simple_memcmp(keya, keyb);
1636 }
1637 else {
1638 bool read_string = false;
1639 cmp = tokudb_compare_two_keys(
1640 keya->data,
1641 keya->size,
1642 keyb->data,
1643 keyb->size,
1644 (uchar *)file->cmp_descriptor->dbt.data + 4,
1645 (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1646 false,
1647 &read_string
1648 );
1649 // comparison above may be case-insensitive, but fractal tree
1650 // needs to distinguish between different data, so we do this
1651 // additional check here
1652 if (read_string && (cmp == 0)) {
1653 cmp = simple_memcmp(keya, keyb);
1654 }
1655 }
1656 return cmp;
1657 }
1658
1659 //TODO: QQQ Only do one direction for prefix.
tokudb_prefix_cmp_dbt_key(DB * file,const DBT * keya,const DBT * keyb)1660 static int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb) {
1661 // calls to this function are done by the handlerton, and are
1662 // comparing just the keys as MySQL would compare them.
1663 bool read_string = false;
1664 int cmp = tokudb_compare_two_keys(
1665 keya->data,
1666 keya->size,
1667 keyb->data,
1668 keyb->size,
1669 (uchar *)file->cmp_descriptor->dbt.data + 4,
1670 *(uint32_t *)file->cmp_descriptor->dbt.data - 4,
1671 true,
1672 &read_string
1673 );
1674 return cmp;
1675 }
1676
tokudb_compare_two_key_parts(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size,const void * row_desc,const uint32_t row_desc_size,uint max_parts)1677 static int tokudb_compare_two_key_parts(
1678 const void* new_key_data,
1679 const uint32_t new_key_size,
1680 const void* saved_key_data,
1681 const uint32_t saved_key_size,
1682 const void* row_desc,
1683 const uint32_t row_desc_size,
1684 uint max_parts
1685 )
1686 {
1687 int ret_val = 0;
1688
1689 uchar* row_desc_ptr = (uchar *)row_desc;
1690 uchar *new_key_ptr = (uchar *)new_key_data;
1691 uchar *saved_key_ptr = (uchar *)saved_key_data;
1692
1693 //
1694 // if the keys have an infinity byte, set it
1695 //
1696 if (row_desc_ptr[0]) {
1697 // new_key_inf_val = (int8_t)new_key_ptr[0];
1698 // saved_key_inf_val = (int8_t)saved_key_ptr[0];
1699 new_key_ptr++;
1700 saved_key_ptr++;
1701 }
1702 row_desc_ptr++;
1703
1704 for (uint i = 0; i < max_parts; i++) {
1705 if (!((uint32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size &&
1706 (uint32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size &&
1707 (uint32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size))
1708 break;
1709 uint32_t new_key_field_length;
1710 uint32_t saved_key_field_length;
1711 uint32_t row_desc_field_length;
1712 //
1713 // if there is a null byte at this point in the key
1714 //
1715 if (row_desc_ptr[0]) {
1716 //
1717 // compare null bytes. If different, return
1718 //
1719 if (new_key_ptr[0] != saved_key_ptr[0]) {
1720 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1721 goto exit;
1722 }
1723 saved_key_ptr++;
1724 //
1725 // in case we just read the fact that new_key_ptr and saved_key_ptr
1726 // have NULL as their next field
1727 //
1728 if (!*new_key_ptr++) {
1729 //
1730 // skip row_desc_ptr[0] read in if clause
1731 //
1732 row_desc_ptr++;
1733 //
1734 // skip data that describes rest of field
1735 //
1736 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1737 continue;
1738 }
1739 }
1740 row_desc_ptr++;
1741 bool read_string = false;
1742 ret_val = compare_toku_field(
1743 new_key_ptr,
1744 saved_key_ptr,
1745 row_desc_ptr,
1746 &new_key_field_length,
1747 &saved_key_field_length,
1748 &row_desc_field_length,
1749 &read_string
1750 );
1751 new_key_ptr += new_key_field_length;
1752 saved_key_ptr += saved_key_field_length;
1753 row_desc_ptr += row_desc_field_length;
1754 if (ret_val) {
1755 goto exit;
1756 }
1757
1758 assert_always((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
1759 assert_always((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
1760 assert_always((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
1761 }
1762
1763 ret_val = 0;
1764 exit:
1765 return ret_val;
1766 }
1767
tokudb_cmp_dbt_key_parts(DB * file,const DBT * keya,const DBT * keyb,uint max_parts)1768 static int tokudb_cmp_dbt_key_parts(DB *file, const DBT *keya, const DBT *keyb, uint max_parts) {
1769 assert_always(file->cmp_descriptor->dbt.size);
1770 return tokudb_compare_two_key_parts(
1771 keya->data,
1772 keya->size,
1773 keyb->data,
1774 keyb->size,
1775 (uchar *)file->cmp_descriptor->dbt.data + 4,
1776 (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1777 max_parts);
1778 }
1779
create_toku_main_key_pack_descriptor(uchar * buf)1780 static uint32_t create_toku_main_key_pack_descriptor (
1781 uchar* buf
1782 )
1783 {
1784 //
1785 // The first four bytes always contain the offset of where the first key
1786 // ends.
1787 //
1788 uchar* pos = buf + 4;
1789 uint32_t offset = 0;
1790 //
1791 // one byte states if this is the main dictionary
1792 //
1793 pos[0] = 1;
1794 pos++;
1795 goto exit;
1796
1797
1798 exit:
1799 offset = pos - buf;
1800 buf[0] = (uchar)(offset & 255);
1801 buf[1] = (uchar)((offset >> 8) & 255);
1802 buf[2] = (uchar)((offset >> 16) & 255);
1803 buf[3] = (uchar)((offset >> 24) & 255);
1804
1805 return pos - buf;
1806 }
1807
1808 #define COL_HAS_NO_CHARSET 0x44
1809 #define COL_HAS_CHARSET 0x55
1810
1811 #define COL_FIX_PK_OFFSET 0x66
1812 #define COL_VAR_PK_OFFSET 0x77
1813
1814 #define CK_FIX_RANGE 0x88
1815 #define CK_VAR_RANGE 0x99
1816
1817 #define COPY_OFFSET_TO_BUF memcpy ( \
1818 pos, \
1819 &kc_info->cp_info[pk_index][field_index].col_pack_val, \
1820 sizeof(uint32_t) \
1821 ); \
1822 pos += sizeof(uint32_t);
1823
1824
pack_desc_pk_info(uchar * buf,KEY_AND_COL_INFO * kc_info,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1825 static uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1826 uchar* pos = buf;
1827 uint16 field_index = key_part->field->field_index;
1828 Field* field = table_share->field[field_index];
1829 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1830 uint32_t key_part_length = key_part->length;
1831 uint32_t field_length;
1832 uchar len_bytes = 0;
1833
1834 switch(toku_type) {
1835 case (toku_type_int):
1836 case (toku_type_double):
1837 case (toku_type_float):
1838 pos[0] = COL_FIX_FIELD;
1839 pos++;
1840 assert_always(kc_info->field_lengths[field_index] < 256);
1841 pos[0] = kc_info->field_lengths[field_index];
1842 pos++;
1843 break;
1844 case (toku_type_fixbinary):
1845 pos[0] = COL_FIX_FIELD;
1846 pos++;
1847 field_length = field->pack_length();
1848 set_if_smaller(key_part_length, field_length);
1849 assert_always(key_part_length < 256);
1850 pos[0] = (uchar)key_part_length;
1851 pos++;
1852 break;
1853 case (toku_type_fixstring):
1854 case (toku_type_varbinary):
1855 case (toku_type_varstring):
1856 case (toku_type_blob):
1857 pos[0] = COL_VAR_FIELD;
1858 pos++;
1859 len_bytes = (key_part_length > 255) ? 2 : 1;
1860 pos[0] = len_bytes;
1861 pos++;
1862 break;
1863 default:
1864 assert_unreachable();
1865 }
1866
1867 return pos - buf;
1868 }
1869
pack_desc_pk_offset_info(uchar * buf,KEY_PART_INFO * key_part,KEY * prim_key,uchar * pk_info)1870 static uint32_t pack_desc_pk_offset_info(uchar* buf,
1871 KEY_PART_INFO* key_part,
1872 KEY* prim_key,
1873 uchar* pk_info) {
1874 uchar* pos = buf;
1875 uint16 field_index = key_part->field->field_index;
1876 bool found_col_in_pk = false;
1877 uint32_t index_in_pk;
1878
1879 bool is_constant_offset = true;
1880 uint32_t offset = 0;
1881 for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
1882 KEY_PART_INFO curr = prim_key->key_part[i];
1883 uint16 curr_field_index = curr.field->field_index;
1884
1885 if (pk_info[2*i] == COL_VAR_FIELD) {
1886 is_constant_offset = false;
1887 }
1888
1889 if (curr_field_index == field_index) {
1890 found_col_in_pk = true;
1891 index_in_pk = i;
1892 break;
1893 }
1894 offset += pk_info[2*i + 1];
1895 }
1896 assert_always(found_col_in_pk);
1897 if (is_constant_offset) {
1898 pos[0] = COL_FIX_PK_OFFSET;
1899 pos++;
1900
1901 memcpy (pos, &offset, sizeof(offset));
1902 pos += sizeof(offset);
1903 }
1904 else {
1905 pos[0] = COL_VAR_PK_OFFSET;
1906 pos++;
1907
1908 memcpy(pos, &index_in_pk, sizeof(index_in_pk));
1909 pos += sizeof(index_in_pk);
1910 }
1911 return pos - buf;
1912 }
1913
pack_desc_offset_info(uchar * buf,KEY_AND_COL_INFO * kc_info,uint pk_index,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1914 static uint32_t pack_desc_offset_info(uchar* buf, KEY_AND_COL_INFO* kc_info, uint pk_index, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1915 uchar* pos = buf;
1916 uint16 field_index = key_part->field->field_index;
1917 Field* field = table_share->field[field_index];
1918 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1919 bool found_index = false;
1920
1921 switch(toku_type) {
1922 case (toku_type_int):
1923 case (toku_type_double):
1924 case (toku_type_float):
1925 case (toku_type_fixbinary):
1926 case (toku_type_fixstring):
1927 pos[0] = COL_FIX_FIELD;
1928 pos++;
1929
1930 // copy the offset
1931 COPY_OFFSET_TO_BUF;
1932 break;
1933 case (toku_type_varbinary):
1934 case (toku_type_varstring):
1935 pos[0] = COL_VAR_FIELD;
1936 pos++;
1937
1938 // copy the offset
1939 COPY_OFFSET_TO_BUF;
1940 break;
1941 case (toku_type_blob):
1942 pos[0] = COL_BLOB_FIELD;
1943 pos++;
1944 for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
1945 uint32_t blob_index = kc_info->blob_fields[i];
1946 if (blob_index == field_index) {
1947 uint32_t val = i;
1948 memcpy(pos, &val, sizeof(uint32_t));
1949 pos += sizeof(uint32_t);
1950 found_index = true;
1951 break;
1952 }
1953 }
1954 assert_always(found_index);
1955 break;
1956 default:
1957 assert_unreachable();
1958 }
1959
1960 return pos - buf;
1961 }
1962
pack_desc_key_length_info(uchar * buf,KEY_AND_COL_INFO * kc_info,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1963 static uint32_t pack_desc_key_length_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1964 uchar* pos = buf;
1965 uint16 field_index = key_part->field->field_index;
1966 Field* field = table_share->field[field_index];
1967 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1968 uint32_t key_part_length = key_part->length;
1969 uint32_t field_length;
1970
1971 switch(toku_type) {
1972 case (toku_type_int):
1973 case (toku_type_double):
1974 case (toku_type_float):
1975 // copy the key_part length
1976 field_length = kc_info->field_lengths[field_index];
1977 memcpy(pos, &field_length, sizeof(field_length));
1978 pos += sizeof(key_part_length);
1979 break;
1980 case (toku_type_fixbinary):
1981 case (toku_type_fixstring):
1982 field_length = field->pack_length();
1983 set_if_smaller(key_part_length, field_length);
1984 // fallthrough
1985 case (toku_type_varbinary):
1986 case (toku_type_varstring):
1987 case (toku_type_blob):
1988 // copy the key_part length
1989 memcpy(pos, &key_part_length, sizeof(key_part_length));
1990 pos += sizeof(key_part_length);
1991 break;
1992 default:
1993 assert_unreachable();
1994 }
1995
1996 return pos - buf;
1997 }
1998
pack_desc_char_info(uchar * buf,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1999 static uint32_t pack_desc_char_info(uchar* buf,
2000 TABLE_SHARE* table_share,
2001 KEY_PART_INFO* key_part) {
2002 uchar* pos = buf;
2003 uint16 field_index = key_part->field->field_index;
2004 Field* field = table_share->field[field_index];
2005 TOKU_TYPE toku_type = mysql_to_toku_type(field);
2006 uint32_t charset_num = 0;
2007
2008 switch(toku_type) {
2009 case (toku_type_int):
2010 case (toku_type_double):
2011 case (toku_type_float):
2012 case (toku_type_fixbinary):
2013 case (toku_type_varbinary):
2014 pos[0] = COL_HAS_NO_CHARSET;
2015 pos++;
2016 break;
2017 case (toku_type_fixstring):
2018 case (toku_type_varstring):
2019 case (toku_type_blob):
2020 pos[0] = COL_HAS_CHARSET;
2021 pos++;
2022
2023 // copy the charset
2024 charset_num = field->charset()->number;
2025 pos[0] = (uchar)(charset_num & 255);
2026 pos[1] = (uchar)((charset_num >> 8) & 255);
2027 pos[2] = (uchar)((charset_num >> 16) & 255);
2028 pos[3] = (uchar)((charset_num >> 24) & 255);
2029 pos += 4;
2030 break;
2031 default:
2032 assert_unreachable();
2033 }
2034
2035 return pos - buf;
2036 }
2037
pack_some_row_info(uchar * buf,uint pk_index,TABLE_SHARE * table_share,KEY_AND_COL_INFO * kc_info)2038 static uint32_t pack_some_row_info (
2039 uchar* buf,
2040 uint pk_index,
2041 TABLE_SHARE* table_share,
2042 KEY_AND_COL_INFO* kc_info
2043 )
2044 {
2045 uchar* pos = buf;
2046 uint32_t num_null_bytes = 0;
2047 //
2048 // four bytes stating number of null bytes
2049 //
2050 num_null_bytes = table_share->null_bytes;
2051 memcpy(pos, &num_null_bytes, sizeof(num_null_bytes));
2052 pos += sizeof(num_null_bytes);
2053 //
2054 // eight bytes stating mcp_info
2055 //
2056 memcpy(pos, &kc_info->mcp_info[pk_index], sizeof(MULTI_COL_PACK_INFO));
2057 pos += sizeof(MULTI_COL_PACK_INFO);
2058 //
2059 // one byte for the number of offset bytes
2060 //
2061 pos[0] = (uchar)kc_info->num_offset_bytes;
2062 pos++;
2063
2064 return pos - buf;
2065 }
2066
get_max_clustering_val_pack_desc_size(TABLE_SHARE * table_share)2067 static uint32_t get_max_clustering_val_pack_desc_size(
2068 TABLE_SHARE* table_share
2069 )
2070 {
2071 uint32_t ret_val = 0;
2072 //
2073 // the fixed stuff:
2074 // first the things in pack_some_row_info
2075 // second another mcp_info
2076 // third a byte that states if blobs exist
2077 ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2078 ret_val += sizeof(MULTI_COL_PACK_INFO);
2079 ret_val++;
2080 //
2081 // now the variable stuff
2082 // an upper bound is, for each field, byte stating if it is fixed or var, followed
2083 // by 8 bytes for endpoints
2084 //
2085 ret_val += (table_share->fields)*(1 + 2*sizeof(uint32_t));
2086 //
2087 // four bytes storing the length of this portion
2088 //
2089 ret_val += 4;
2090
2091 return ret_val;
2092 }
2093
create_toku_clustering_val_pack_descriptor(uchar * buf,uint pk_index,TABLE_SHARE * table_share,KEY_AND_COL_INFO * kc_info,uint32_t keynr,bool is_clustering)2094 static uint32_t create_toku_clustering_val_pack_descriptor (
2095 uchar* buf,
2096 uint pk_index,
2097 TABLE_SHARE* table_share,
2098 KEY_AND_COL_INFO* kc_info,
2099 uint32_t keynr,
2100 bool is_clustering
2101 )
2102 {
2103 uchar* pos = buf + 4;
2104 uint32_t offset = 0;
2105 bool start_range_set = false;
2106 uint32_t last_col = 0;
2107 //
2108 // do not need to write anything if the key is not clustering
2109 //
2110 if (!is_clustering) {
2111 goto exit;
2112 }
2113
2114 pos += pack_some_row_info(
2115 pos,
2116 pk_index,
2117 table_share,
2118 kc_info
2119 );
2120
2121 //
2122 // eight bytes stating mcp_info of clustering key
2123 //
2124 memcpy(pos, &kc_info->mcp_info[keynr], sizeof(MULTI_COL_PACK_INFO));
2125 pos += sizeof(MULTI_COL_PACK_INFO);
2126
2127 //
2128 // store bit that states if blobs exist
2129 //
2130 pos[0] = (kc_info->num_blobs) ? 1 : 0;
2131 pos++;
2132
2133 //
2134 // descriptor assumes that all fields filtered from pk are
2135 // also filtered from clustering key val. Doing check here to
2136 // make sure something unexpected does not happen
2137 //
2138 for (uint i = 0; i < table_share->fields; i++) {
2139 bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2140 bool col_filtered_in_pk = bitmap_is_set(&kc_info->key_filters[pk_index],i);
2141 if (col_filtered_in_pk) {
2142 assert_always(col_filtered);
2143 }
2144 }
2145
2146 //
2147 // first handle the fixed fields
2148 //
2149 start_range_set = false;
2150 last_col = 0;
2151 for (uint i = 0; i < table_share->fields; i++) {
2152 bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2153 if (!is_fixed_field(kc_info, i)) {
2154 //
2155 // not a fixed field, continue
2156 //
2157 continue;
2158 }
2159 if (col_filtered && start_range_set) {
2160 //
2161 // need to set the end range
2162 //
2163 start_range_set = false;
2164 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val + kc_info->field_lengths[last_col];
2165 memcpy(pos, &end_offset, sizeof(end_offset));
2166 pos += sizeof(end_offset);
2167 }
2168 else if (!col_filtered) {
2169 if (!start_range_set) {
2170 pos[0] = CK_FIX_RANGE;
2171 pos++;
2172 start_range_set = true;
2173 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2174 memcpy(pos, &start_offset , sizeof(start_offset));
2175 pos += sizeof(start_offset);
2176 }
2177 last_col = i;
2178 }
2179 else {
2180 continue;
2181 }
2182 }
2183 if (start_range_set) {
2184 //
2185 // need to set the end range
2186 //
2187 start_range_set = false;
2188 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val+ kc_info->field_lengths[last_col];
2189 memcpy(pos, &end_offset, sizeof(end_offset));
2190 pos += sizeof(end_offset);
2191 }
2192
2193 //
2194 // now handle the var fields
2195 //
2196 start_range_set = false;
2197 last_col = 0;
2198 for (uint i = 0; i < table_share->fields; i++) {
2199 bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2200 if (!is_variable_field(kc_info, i)) {
2201 //
2202 // not a var field, continue
2203 //
2204 continue;
2205 }
2206 if (col_filtered && start_range_set) {
2207 //
2208 // need to set the end range
2209 //
2210 start_range_set = false;
2211 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2212 memcpy(pos, &end_offset, sizeof(end_offset));
2213 pos += sizeof(end_offset);
2214 }
2215 else if (!col_filtered) {
2216 if (!start_range_set) {
2217 pos[0] = CK_VAR_RANGE;
2218 pos++;
2219
2220 start_range_set = true;
2221 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2222 memcpy(pos, &start_offset , sizeof(start_offset));
2223 pos += sizeof(start_offset);
2224 }
2225 last_col = i;
2226 }
2227 else {
2228 continue;
2229 }
2230 }
2231 if (start_range_set) {
2232 start_range_set = false;
2233 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2234 memcpy(pos, &end_offset, sizeof(end_offset));
2235 pos += sizeof(end_offset);
2236 }
2237
2238 exit:
2239 offset = pos - buf;
2240 buf[0] = (uchar)(offset & 255);
2241 buf[1] = (uchar)((offset >> 8) & 255);
2242 buf[2] = (uchar)((offset >> 16) & 255);
2243 buf[3] = (uchar)((offset >> 24) & 255);
2244
2245 return pos - buf;
2246 }
2247
pack_clustering_val_from_desc(uchar * buf,void * row_desc,uint32_t row_desc_size,const DBT * pk_val)2248 static uint32_t pack_clustering_val_from_desc(
2249 uchar* buf,
2250 void* row_desc,
2251 uint32_t row_desc_size,
2252 const DBT* pk_val
2253 )
2254 {
2255 uchar* null_bytes_src_ptr = NULL;
2256 uchar* fixed_src_ptr = NULL;
2257 uchar* var_src_offset_ptr = NULL;
2258 uchar* var_src_data_ptr = NULL;
2259 uchar* fixed_dest_ptr = NULL;
2260 uchar* var_dest_offset_ptr = NULL;
2261 uchar* var_dest_data_ptr = NULL;
2262 uchar* orig_var_dest_data_ptr = NULL;
2263 uchar* desc_pos = (uchar *)row_desc;
2264 uint32_t num_null_bytes = 0;
2265 uint32_t num_offset_bytes;
2266 MULTI_COL_PACK_INFO src_mcp_info, dest_mcp_info;
2267 uchar has_blobs;
2268
2269 memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2270 desc_pos += sizeof(num_null_bytes);
2271
2272 memcpy(&src_mcp_info, desc_pos, sizeof(src_mcp_info));
2273 desc_pos += sizeof(src_mcp_info);
2274
2275 num_offset_bytes = desc_pos[0];
2276 desc_pos++;
2277
2278 memcpy(&dest_mcp_info, desc_pos, sizeof(dest_mcp_info));
2279 desc_pos += sizeof(dest_mcp_info);
2280
2281 has_blobs = desc_pos[0];
2282 desc_pos++;
2283
2284 //
2285 //set the variables
2286 //
2287 null_bytes_src_ptr = (uchar *)pk_val->data;
2288 fixed_src_ptr = null_bytes_src_ptr + num_null_bytes;
2289 var_src_offset_ptr = fixed_src_ptr + src_mcp_info.fixed_field_size;
2290 var_src_data_ptr = var_src_offset_ptr + src_mcp_info.len_of_offsets;
2291
2292 fixed_dest_ptr = buf + num_null_bytes;
2293 var_dest_offset_ptr = fixed_dest_ptr + dest_mcp_info.fixed_field_size;
2294 var_dest_data_ptr = var_dest_offset_ptr + dest_mcp_info.len_of_offsets;
2295 orig_var_dest_data_ptr = var_dest_data_ptr;
2296
2297 //
2298 // copy the null bytes
2299 //
2300 memcpy(buf, null_bytes_src_ptr, num_null_bytes);
2301 while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2302 uint32_t start, end, length;
2303 uchar curr = desc_pos[0];
2304 desc_pos++;
2305
2306 memcpy(&start, desc_pos, sizeof(start));
2307 desc_pos += sizeof(start);
2308
2309 memcpy(&end, desc_pos, sizeof(end));
2310 desc_pos += sizeof(end);
2311
2312 assert_always (start <= end);
2313
2314 if (curr == CK_FIX_RANGE) {
2315 length = end - start;
2316
2317 memcpy(fixed_dest_ptr, fixed_src_ptr + start, length);
2318 fixed_dest_ptr += length;
2319 }
2320 else if (curr == CK_VAR_RANGE) {
2321 uint32_t start_data_size;
2322 uint32_t start_data_offset;
2323 uint32_t end_data_size;
2324 uint32_t end_data_offset;
2325 uint32_t offset_diffs;
2326
2327 get_var_field_info(
2328 &start_data_size,
2329 &start_data_offset,
2330 start,
2331 var_src_offset_ptr,
2332 num_offset_bytes
2333 );
2334 get_var_field_info(
2335 &end_data_size,
2336 &end_data_offset,
2337 end,
2338 var_src_offset_ptr,
2339 num_offset_bytes
2340 );
2341 length = end_data_offset + end_data_size - start_data_offset;
2342 //
2343 // copy the data
2344 //
2345 memcpy(
2346 var_dest_data_ptr,
2347 var_src_data_ptr + start_data_offset,
2348 length
2349 );
2350 var_dest_data_ptr += length;
2351
2352 //
2353 // put in offset info
2354 //
2355 offset_diffs = (end_data_offset + end_data_size) - (uint32_t)(var_dest_data_ptr - orig_var_dest_data_ptr);
2356 for (uint32_t i = start; i <= end; i++) {
2357 if ( num_offset_bytes == 1 ) {
2358 assert_always(offset_diffs < 256);
2359 var_dest_offset_ptr[0] = var_src_offset_ptr[i] - (uchar)offset_diffs;
2360 var_dest_offset_ptr++;
2361 } else if ( num_offset_bytes == 2 ) {
2362 uint32_t tmp = uint2korr(var_src_offset_ptr + 2*i);
2363 uint32_t new_offset = tmp - offset_diffs;
2364 assert_always(new_offset < 1<<16);
2365 int2store(var_dest_offset_ptr,new_offset);
2366 var_dest_offset_ptr += 2;
2367 } else {
2368 assert_unreachable();
2369 }
2370 }
2371 } else {
2372 assert_unreachable();
2373 }
2374 }
2375 //
2376 // copy blobs
2377 // at this point, var_dest_data_ptr is pointing to the end, where blobs should be located
2378 // so, we put the blobs at var_dest_data_ptr
2379 //
2380 if (has_blobs) {
2381 uint32_t num_blob_bytes;
2382 uint32_t start_offset;
2383 uchar* src_blob_ptr = NULL;
2384 get_blob_field_info(
2385 &start_offset,
2386 src_mcp_info.len_of_offsets,
2387 var_src_data_ptr,
2388 num_offset_bytes
2389 );
2390 src_blob_ptr = var_src_data_ptr + start_offset;
2391 num_blob_bytes = pk_val->size - (start_offset + (var_src_data_ptr - null_bytes_src_ptr));
2392 memcpy(var_dest_data_ptr, src_blob_ptr, num_blob_bytes);
2393 var_dest_data_ptr += num_blob_bytes;
2394 }
2395 return var_dest_data_ptr - buf;
2396 }
2397
2398
get_max_secondary_key_pack_desc_size(KEY_AND_COL_INFO * kc_info)2399 static uint32_t get_max_secondary_key_pack_desc_size(
2400 KEY_AND_COL_INFO* kc_info
2401 )
2402 {
2403 uint32_t ret_val = 0;
2404 //
2405 // the fixed stuff:
2406 // byte that states if main dictionary
2407 // byte that states if hpk
2408 // the things in pack_some_row_info
2409 ret_val++;
2410 ret_val++;
2411 ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2412 //
2413 // now variable sized stuff
2414 //
2415
2416 // first the blobs
2417 ret_val += sizeof(kc_info->num_blobs);
2418 ret_val+= kc_info->num_blobs;
2419
2420 // then the pk
2421 // one byte for num key parts
2422 // two bytes for each key part
2423 ret_val++;
2424 ret_val += MAX_REF_PARTS*2;
2425
2426 // then the key
2427 // null bit, then null byte,
2428 // then 1 byte stating what it is, then 4 for offset, 4 for key length,
2429 // 1 for if charset exists, and 4 for charset
2430 ret_val += MAX_REF_PARTS*(1 + sizeof(uint32_t) + 1 + 3*sizeof(uint32_t) + 1);
2431 //
2432 // four bytes storing the length of this portion
2433 //
2434 ret_val += 4;
2435 return ret_val;
2436 }
2437
create_toku_secondary_key_pack_descriptor(uchar * buf,bool has_hpk,uint pk_index,TABLE_SHARE * table_share,TABLE * table,KEY_AND_COL_INFO * kc_info,KEY * key_info,KEY * prim_key)2438 static uint32_t create_toku_secondary_key_pack_descriptor (
2439 uchar* buf,
2440 bool has_hpk,
2441 uint pk_index,
2442 TABLE_SHARE* table_share,
2443 TABLE* table,
2444 KEY_AND_COL_INFO* kc_info,
2445 KEY* key_info,
2446 KEY* prim_key
2447 )
2448 {
2449 //
2450 // The first four bytes always contain the offset of where the first key
2451 // ends.
2452 //
2453 uchar* pk_info = NULL;
2454 uchar* pos = buf + 4;
2455 uint32_t offset = 0;
2456
2457 //
2458 // first byte states that it is NOT main dictionary
2459 //
2460 pos[0] = 0;
2461 pos++;
2462
2463 //
2464 // one byte states if main dictionary has an hpk or not
2465 //
2466 if (has_hpk) {
2467 pos[0] = 1;
2468 }
2469 else {
2470 pos[0] = 0;
2471 }
2472 pos++;
2473
2474 pos += pack_some_row_info(
2475 pos,
2476 pk_index,
2477 table_share,
2478 kc_info
2479 );
2480
2481 //
2482 // store blob information
2483 //
2484 memcpy(pos, &kc_info->num_blobs, sizeof(kc_info->num_blobs));
2485 pos += sizeof(uint32_t);
2486 for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
2487 //
2488 // store length bytes for each blob
2489 //
2490 Field* field = table_share->field[kc_info->blob_fields[i]];
2491 pos[0] = (uchar)field->row_pack_length();
2492 pos++;
2493 }
2494
2495 //
2496 // store the pk information
2497 //
2498 if (has_hpk) {
2499 pos[0] = 0;
2500 pos++;
2501 }
2502 else {
2503 //
2504 // store number of parts
2505 //
2506 assert_always(prim_key->user_defined_key_parts < 128);
2507 pos[0] = 2 * prim_key->user_defined_key_parts;
2508 pos++;
2509 //
2510 // for each part, store if it is a fixed field or var field
2511 // if fixed, store number of bytes, if var, store
2512 // number of length bytes
2513 // total should be two bytes per key part stored
2514 //
2515 pk_info = pos;
2516 uchar* tmp = pos;
2517 for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
2518 tmp += pack_desc_pk_info(
2519 tmp,
2520 kc_info,
2521 table_share,
2522 &prim_key->key_part[i]
2523 );
2524 }
2525 //
2526 // asserting that we moved forward as much as we think we have
2527 //
2528 assert_always(tmp - pos == (2 * prim_key->user_defined_key_parts));
2529 pos = tmp;
2530 }
2531
2532 for (uint i = 0; i < key_info->user_defined_key_parts; i++) {
2533 KEY_PART_INFO curr_kpi = key_info->key_part[i];
2534 uint16 field_index = curr_kpi.field->field_index;
2535 Field* field = table_share->field[field_index];
2536 bool is_col_in_pk = false;
2537
2538 if (bitmap_is_set(&kc_info->key_filters[pk_index],field_index)) {
2539 assert_always(!has_hpk);
2540 assert_always(prim_key != nullptr);
2541 is_col_in_pk = true;
2542 }
2543 else {
2544 is_col_in_pk = false;
2545 }
2546
2547 pos[0] = field->null_bit;
2548 pos++;
2549
2550 if (is_col_in_pk) {
2551 //
2552 // assert that columns in pk do not have a null bit
2553 // because in MySQL, pk columns cannot be null
2554 //
2555 assert_always(!field->null_bit);
2556 }
2557
2558 if (field->null_bit) {
2559 uint32_t null_offset = get_null_offset(table,table->field[field_index]);
2560 memcpy(pos, &null_offset, sizeof(uint32_t));
2561 pos += sizeof(uint32_t);
2562 }
2563 if (is_col_in_pk) {
2564 pos += pack_desc_pk_offset_info(pos, &curr_kpi, prim_key, pk_info);
2565 }
2566 else {
2567 pos += pack_desc_offset_info(
2568 pos,
2569 kc_info,
2570 pk_index,
2571 table_share,
2572 &curr_kpi
2573 );
2574 }
2575 pos += pack_desc_key_length_info(
2576 pos,
2577 kc_info,
2578 table_share,
2579 &curr_kpi
2580 );
2581 pos += pack_desc_char_info(pos, table_share, &curr_kpi);
2582 }
2583
2584 offset = pos - buf;
2585 buf[0] = (uchar)(offset & 255);
2586 buf[1] = (uchar)((offset >> 8) & 255);
2587 buf[2] = (uchar)((offset >> 16) & 255);
2588 buf[3] = (uchar)((offset >> 24) & 255);
2589
2590 return pos - buf;
2591 }
2592
skip_key_in_desc(uchar * row_desc)2593 static uint32_t skip_key_in_desc(
2594 uchar* row_desc
2595 )
2596 {
2597 uchar* pos = row_desc;
2598 uchar col_bin_or_char;
2599 //
2600 // skip the byte that states if it is a fix field or var field, we do not care
2601 //
2602 pos++;
2603
2604 //
2605 // skip the offset information
2606 //
2607 pos += sizeof(uint32_t);
2608
2609 //
2610 // skip the key_part_length info
2611 //
2612 pos += sizeof(uint32_t);
2613 col_bin_or_char = pos[0];
2614 pos++;
2615 if (col_bin_or_char == COL_HAS_NO_CHARSET) {
2616 goto exit;
2617 }
2618 //
2619 // skip the charset info
2620 //
2621 pos += 4;
2622
2623
2624 exit:
2625 return (uint32_t)(pos-row_desc);
2626 }
2627
2628
max_key_size_from_desc(void * row_desc,uint32_t row_desc_size)2629 static uint32_t max_key_size_from_desc(
2630 void* row_desc,
2631 uint32_t row_desc_size
2632 )
2633 {
2634 uchar* desc_pos = (uchar *)row_desc;
2635 uint32_t num_blobs;
2636 uint32_t num_pk_columns;
2637 //
2638 // start at 1 for the infinity byte
2639 //
2640 uint32_t max_size = 1;
2641
2642 // skip byte that states if main dictionary
2643 bool is_main_dictionary = desc_pos[0];
2644 desc_pos++;
2645 assert_always(!is_main_dictionary);
2646
2647 // skip hpk byte
2648 desc_pos++;
2649
2650 // skip num_null_bytes
2651 desc_pos += sizeof(uint32_t);
2652
2653 // skip mcp_info
2654 desc_pos += sizeof(MULTI_COL_PACK_INFO);
2655
2656 // skip offset_bytes
2657 desc_pos++;
2658
2659 // skip over blobs
2660 memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2661 desc_pos += sizeof(num_blobs);
2662 desc_pos += num_blobs;
2663
2664 // skip over pk info
2665 num_pk_columns = desc_pos[0]/2;
2666 desc_pos++;
2667 desc_pos += 2*num_pk_columns;
2668
2669 while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2670 uchar has_charset;
2671 uint32_t key_length = 0;
2672
2673 uchar null_bit = desc_pos[0];
2674 desc_pos++;
2675
2676 if (null_bit) {
2677 //
2678 // column is NULLable, skip null_offset, and add a null byte
2679 //
2680 max_size++;
2681 desc_pos += sizeof(uint32_t);
2682 }
2683 //
2684 // skip over byte that states if fix or var
2685 //
2686 desc_pos++;
2687
2688 // skip over offset
2689 desc_pos += sizeof(uint32_t);
2690
2691 //
2692 // get the key length and add it to return value
2693 //
2694 memcpy(&key_length, desc_pos, sizeof(key_length));
2695 desc_pos += sizeof(key_length);
2696 max_size += key_length;
2697 max_size += 2; // 2 bytes for a potential length bytes, we are upperbounding, does not need to be super tight
2698
2699 has_charset = desc_pos[0];
2700 desc_pos++;
2701
2702 uint32_t charset_num;
2703 if (has_charset == COL_HAS_CHARSET) {
2704 // skip over charsent num
2705 desc_pos += sizeof(charset_num);
2706 }
2707 else {
2708 assert_always(has_charset == COL_HAS_NO_CHARSET);
2709 }
2710 }
2711 return max_size;
2712 }
2713
pack_key_from_desc(uchar * buf,void * row_desc,uint32_t row_desc_size,const DBT * pk_key,const DBT * pk_val)2714 static uint32_t pack_key_from_desc(
2715 uchar* buf,
2716 void* row_desc,
2717 uint32_t row_desc_size,
2718 const DBT* pk_key,
2719 const DBT* pk_val) {
2720
2721 MULTI_COL_PACK_INFO mcp_info;
2722 uint32_t num_null_bytes;
2723 uint32_t num_blobs;
2724 uint32_t num_pk_columns;
2725 uchar* blob_lengths = NULL;
2726 uchar* pk_info = NULL;
2727 uchar* pk_data_ptr = NULL;
2728 uchar* null_bytes_ptr = NULL;
2729 uchar* fixed_field_ptr = NULL;
2730 uchar* var_field_offset_ptr = NULL;
2731 const uchar* var_field_data_ptr = NULL;
2732 uint32_t num_offset_bytes;
2733 uchar* packed_key_pos = buf;
2734 uchar* desc_pos = (uchar *)row_desc;
2735
2736 bool is_main_dictionary = desc_pos[0];
2737 desc_pos++;
2738 assert_always(!is_main_dictionary);
2739
2740 //
2741 // get the constant info out of descriptor
2742 //
2743 bool hpk = desc_pos[0];
2744 desc_pos++;
2745
2746 memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2747 desc_pos += sizeof(num_null_bytes);
2748
2749 memcpy(&mcp_info, desc_pos, sizeof(mcp_info));
2750 desc_pos += sizeof(mcp_info);
2751
2752 num_offset_bytes = desc_pos[0];
2753 desc_pos++;
2754
2755 memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2756 desc_pos += sizeof(num_blobs);
2757
2758 blob_lengths = desc_pos;
2759 desc_pos += num_blobs;
2760
2761 num_pk_columns = desc_pos[0]/2;
2762 desc_pos++;
2763 pk_info = desc_pos;
2764 desc_pos += 2*num_pk_columns;
2765
2766 //
2767 // now start packing the key
2768 //
2769
2770 //
2771 // pack the infinity byte
2772 //
2773 packed_key_pos[0] = COL_ZERO;
2774 packed_key_pos++;
2775 //
2776 // now start packing each column of the key, as described in descriptor
2777 //
2778 if (!hpk) {
2779 // +1 for the infinity byte
2780 pk_data_ptr = (uchar *)pk_key->data + 1;
2781 }
2782 null_bytes_ptr = (uchar *)pk_val->data;
2783 fixed_field_ptr = null_bytes_ptr + num_null_bytes;
2784 var_field_offset_ptr = fixed_field_ptr + mcp_info.fixed_field_size;
2785 var_field_data_ptr = var_field_offset_ptr + mcp_info.len_of_offsets;
2786 while ((uint32_t)(desc_pos - (uchar*)row_desc) < row_desc_size) {
2787 uchar col_fix_val;
2788 uchar has_charset;
2789 uint32_t col_pack_val = 0;
2790 uint32_t key_length = 0;
2791
2792 uchar null_bit = desc_pos[0];
2793 desc_pos++;
2794
2795 if (null_bit) {
2796 //
2797 // column is NULLable, need to check the null bytes to see if it is NULL
2798 //
2799 uint32_t null_offset = 0;
2800 bool is_field_null;
2801 memcpy(&null_offset, desc_pos, sizeof(null_offset));
2802 desc_pos += sizeof(null_offset);
2803
2804 is_field_null = (null_bytes_ptr[null_offset] & null_bit) ? true: false;
2805 if (is_field_null) {
2806 packed_key_pos[0] = NULL_COL_VAL;
2807 packed_key_pos++;
2808 desc_pos += skip_key_in_desc(desc_pos);
2809 continue;
2810 } else {
2811 packed_key_pos[0] = NONNULL_COL_VAL;
2812 packed_key_pos++;
2813 }
2814 }
2815 //
2816 // now pack the column (unless it was NULL, and we continued)
2817 //
2818 col_fix_val = desc_pos[0];
2819 desc_pos++;
2820
2821 memcpy(&col_pack_val, desc_pos, sizeof(col_pack_val));
2822 desc_pos += sizeof(col_pack_val);
2823
2824 memcpy(&key_length, desc_pos, sizeof(key_length));
2825 desc_pos += sizeof(key_length);
2826
2827 has_charset = desc_pos[0];
2828 desc_pos++;
2829
2830 uint32_t charset_num = 0;
2831 if (has_charset == COL_HAS_CHARSET) {
2832 memcpy(&charset_num, desc_pos, sizeof(charset_num));
2833 desc_pos += sizeof(charset_num);
2834 } else {
2835 assert_always(has_charset == COL_HAS_NO_CHARSET);
2836 }
2837 //
2838 // case where column is in pk val
2839 //
2840 if (col_fix_val == COL_FIX_FIELD ||
2841 col_fix_val == COL_VAR_FIELD ||
2842 col_fix_val == COL_BLOB_FIELD) {
2843 if (col_fix_val == COL_FIX_FIELD &&
2844 has_charset == COL_HAS_NO_CHARSET) {
2845 memcpy(
2846 packed_key_pos,
2847 &fixed_field_ptr[col_pack_val],
2848 key_length);
2849 packed_key_pos += key_length;
2850 } else if (col_fix_val == COL_VAR_FIELD &&
2851 has_charset == COL_HAS_NO_CHARSET) {
2852 uint32_t data_start_offset = 0;
2853
2854 uint32_t data_size = 0;
2855 get_var_field_info(
2856 &data_size,
2857 &data_start_offset,
2858 col_pack_val,
2859 var_field_offset_ptr,
2860 num_offset_bytes);
2861
2862 //
2863 // length of this field in this row is data_size
2864 // data is located beginning at var_field_data_ptr + data_start_offset
2865 //
2866 packed_key_pos = pack_toku_varbinary_from_desc(
2867 packed_key_pos,
2868 var_field_data_ptr + data_start_offset,
2869 //number of bytes to use to encode the length in to_tokudb
2870 key_length,
2871 //length of field
2872 data_size);
2873 } else {
2874 const uchar* data_start = NULL;
2875 uint32_t data_start_offset = 0;
2876 uint32_t data_size = 0;
2877
2878 if (col_fix_val == COL_FIX_FIELD) {
2879 data_start_offset = col_pack_val;
2880 data_size = key_length;
2881 data_start = fixed_field_ptr + data_start_offset;
2882 } else if (col_fix_val == COL_VAR_FIELD){
2883 get_var_field_info(
2884 &data_size,
2885 &data_start_offset,
2886 col_pack_val,
2887 var_field_offset_ptr,
2888 num_offset_bytes);
2889 data_start = var_field_data_ptr + data_start_offset;
2890 } else if (col_fix_val == COL_BLOB_FIELD) {
2891 uint32_t blob_index = col_pack_val;
2892 uint32_t blob_offset;
2893 const uchar* blob_ptr = NULL;
2894 uint32_t field_len;
2895 uint32_t field_len_bytes = blob_lengths[blob_index];
2896 get_blob_field_info(
2897 &blob_offset,
2898 mcp_info.len_of_offsets,
2899 var_field_data_ptr,
2900 num_offset_bytes);
2901 blob_ptr = var_field_data_ptr + blob_offset;
2902 assert_always(num_blobs > 0);
2903
2904 // skip over other blobs to get to the one we want to
2905 // make a key out of
2906 for (uint32_t i = 0; i < blob_index; i++) {
2907 blob_ptr = unpack_toku_field_blob(
2908 NULL,
2909 blob_ptr,
2910 blob_lengths[i],
2911 true);
2912 }
2913 // at this point, blob_ptr is pointing to the blob we
2914 // want to make a key from
2915 field_len = get_blob_field_len(blob_ptr, field_len_bytes);
2916 // now we set the variables to make the key
2917 data_start = blob_ptr + field_len_bytes;
2918 data_size = field_len;
2919 } else {
2920 assert_unreachable();
2921 }
2922
2923 packed_key_pos = pack_toku_varstring_from_desc(packed_key_pos,
2924 data_start,
2925 key_length,
2926 data_size,
2927 charset_num);
2928 }
2929 } else {
2930 // case where column is in pk key
2931 if (col_fix_val == COL_FIX_PK_OFFSET) {
2932 memcpy(packed_key_pos, &pk_data_ptr[col_pack_val], key_length);
2933 packed_key_pos += key_length;
2934 } else if (col_fix_val == COL_VAR_PK_OFFSET) {
2935 uchar* tmp_pk_data_ptr = pk_data_ptr;
2936 uint32_t index_in_pk = col_pack_val;
2937 //
2938 // skip along in pk to the right column
2939 //
2940 for (uint32_t i = 0; i < index_in_pk; i++) {
2941 if (pk_info[2*i] == COL_FIX_FIELD) {
2942 tmp_pk_data_ptr += pk_info[2*i + 1];
2943 } else if (pk_info[2*i] == COL_VAR_FIELD) {
2944 uint32_t len_bytes = pk_info[2*i + 1];
2945 uint32_t len;
2946 if (len_bytes == 1) {
2947 len = tmp_pk_data_ptr[0];
2948 tmp_pk_data_ptr++;
2949 } else if (len_bytes == 2) {
2950 len = uint2korr(tmp_pk_data_ptr);
2951 tmp_pk_data_ptr += 2;
2952 } else {
2953 assert_unreachable();
2954 }
2955 tmp_pk_data_ptr += len;
2956 } else {
2957 assert_unreachable();
2958 }
2959 }
2960 //
2961 // at this point, tmp_pk_data_ptr is pointing at the column
2962 //
2963 uint32_t is_fix_field = pk_info[2*index_in_pk];
2964 if (is_fix_field == COL_FIX_FIELD) {
2965 memcpy(packed_key_pos, tmp_pk_data_ptr, key_length);
2966 packed_key_pos += key_length;
2967 } else if (is_fix_field == COL_VAR_FIELD) {
2968 const uchar* data_start = NULL;
2969 uint32_t data_size = 0;
2970 uint32_t len_bytes = pk_info[2*index_in_pk + 1];
2971 if (len_bytes == 1) {
2972 data_size = tmp_pk_data_ptr[0];
2973 tmp_pk_data_ptr++;
2974 } else if (len_bytes == 2) {
2975 data_size = uint2korr(tmp_pk_data_ptr);
2976 tmp_pk_data_ptr += 2;
2977 } else {
2978 assert_unreachable();
2979 }
2980 data_start = tmp_pk_data_ptr;
2981
2982 if (has_charset == COL_HAS_CHARSET) {
2983 packed_key_pos = pack_toku_varstring_from_desc(
2984 packed_key_pos,
2985 data_start,
2986 key_length,
2987 data_size,
2988 charset_num);
2989 } else if (has_charset == COL_HAS_NO_CHARSET) {
2990 packed_key_pos = pack_toku_varbinary_from_desc(
2991 packed_key_pos,
2992 data_start,
2993 key_length,
2994 data_size);
2995 } else {
2996 assert_unreachable();
2997 }
2998 } else {
2999 assert_unreachable();
3000 }
3001 } else {
3002 assert_unreachable();
3003 }
3004 }
3005
3006 }
3007 assert_always( (uint32_t)(desc_pos - (uchar *)row_desc) == row_desc_size);
3008
3009 //
3010 // now append the primary key to the end of the key
3011 //
3012 if (hpk) {
3013 memcpy(packed_key_pos, pk_key->data, pk_key->size);
3014 packed_key_pos += pk_key->size;
3015 } else {
3016 memcpy(packed_key_pos, (uchar *)pk_key->data + 1, pk_key->size - 1);
3017 packed_key_pos += (pk_key->size - 1);
3018 }
3019
3020 return (uint32_t)(packed_key_pos - buf);
3021 }
3022
fields_have_same_name(Field * a,Field * b)3023 static bool fields_have_same_name(Field* a, Field* b) {
3024 return strcmp(a->field_name.str, b->field_name.str) == 0;
3025 }
3026
fields_are_same_type(Field * a,Field * b)3027 static bool fields_are_same_type(Field* a, Field* b) {
3028 bool retval = true;
3029 enum_field_types a_mysql_type = a->real_type();
3030 enum_field_types b_mysql_type = b->real_type();
3031 TOKU_TYPE a_toku_type = mysql_to_toku_type(a);
3032 TOKU_TYPE b_toku_type = mysql_to_toku_type(b);
3033 // make sure have same names
3034 // make sure have same types
3035 if (a_mysql_type != b_mysql_type) {
3036 retval = false;
3037 goto cleanup;
3038 }
3039 // Thanks to MariaDB 5.5, we can have two fields
3040 // be the same MySQL type but not the same toku type,
3041 // This is an issue introduced with MariaDB's fractional time
3042 // implementation
3043 if (a_toku_type != b_toku_type) {
3044 retval = false;
3045 goto cleanup;
3046 }
3047 // make sure that either both are nullable, or both not nullable
3048 if ((a->null_bit && !b->null_bit) || (!a->null_bit && b->null_bit)) {
3049 retval = false;
3050 goto cleanup;
3051 }
3052 switch (a_mysql_type) {
3053 case MYSQL_TYPE_TINY:
3054 case MYSQL_TYPE_SHORT:
3055 case MYSQL_TYPE_INT24:
3056 case MYSQL_TYPE_LONG:
3057 case MYSQL_TYPE_LONGLONG:
3058 // length, unsigned, auto increment
3059 if (a->pack_length() != b->pack_length() ||
3060 (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3061 (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3062 retval = false;
3063 goto cleanup;
3064 }
3065 break;
3066 case MYSQL_TYPE_DOUBLE:
3067 case MYSQL_TYPE_FLOAT:
3068 // length, unsigned, auto increment
3069 if (a->pack_length() != b->pack_length() ||
3070 (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3071 (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3072 retval = false;
3073 goto cleanup;
3074 }
3075 break;
3076 case MYSQL_TYPE_NEWDECIMAL:
3077 // length, unsigned
3078 if (a->pack_length() != b->pack_length() ||
3079 (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG)) {
3080 retval = false;
3081 goto cleanup;
3082 }
3083 break;
3084 case MYSQL_TYPE_ENUM: {
3085 Field_enum *a_enum = static_cast<Field_enum *>(a);
3086 if (!a_enum->eq_def(b)) {
3087 retval = false;
3088 goto cleanup;
3089 }
3090 break;
3091 }
3092 case MYSQL_TYPE_SET: {
3093 Field_set *a_set = static_cast<Field_set *>(a);
3094 if (!a_set->eq_def(b)) {
3095 retval = false;
3096 goto cleanup;
3097 }
3098 break;
3099 }
3100 case MYSQL_TYPE_BIT:
3101 // length
3102 if (a->pack_length() != b->pack_length()) {
3103 retval = false;
3104 goto cleanup;
3105 }
3106 break;
3107 case MYSQL_TYPE_DATE:
3108 case MYSQL_TYPE_DATETIME:
3109 case MYSQL_TYPE_YEAR:
3110 case MYSQL_TYPE_NEWDATE:
3111 case MYSQL_TYPE_TIME:
3112 case MYSQL_TYPE_TIMESTAMP:
3113 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
3114 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
3115 (100000 <= MYSQL_VERSION_ID)
3116 case MYSQL_TYPE_DATETIME2:
3117 case MYSQL_TYPE_TIMESTAMP2:
3118 case MYSQL_TYPE_TIME2:
3119 #endif
3120 // length
3121 if (a->pack_length() != b->pack_length()) {
3122 retval = false;
3123 goto cleanup;
3124 }
3125 break;
3126 case MYSQL_TYPE_TINY_BLOB:
3127 case MYSQL_TYPE_MEDIUM_BLOB:
3128 case MYSQL_TYPE_BLOB:
3129 case MYSQL_TYPE_LONG_BLOB:
3130 // test the charset
3131 if (a->charset()->number != b->charset()->number) {
3132 retval = false;
3133 goto cleanup;
3134 }
3135 if (a->row_pack_length() != b->row_pack_length()) {
3136 retval = false;
3137 goto cleanup;
3138 }
3139 break;
3140 case MYSQL_TYPE_STRING:
3141 if (a->pack_length() != b->pack_length()) {
3142 retval = false;
3143 goto cleanup;
3144 }
3145 // if both are binary, we know have same pack lengths,
3146 // so we can goto end
3147 if (a->binary() && b->binary()) {
3148 // nothing to do, we are good
3149 }
3150 else if (!a->binary() && !b->binary()) {
3151 // test the charset
3152 if (a->charset()->number != b->charset()->number) {
3153 retval = false;
3154 goto cleanup;
3155 }
3156 }
3157 else {
3158 // one is binary and the other is not, so not the same
3159 retval = false;
3160 goto cleanup;
3161 }
3162 break;
3163 case MYSQL_TYPE_VARCHAR:
3164 if (a->field_length != b->field_length) {
3165 retval = false;
3166 goto cleanup;
3167 }
3168 // if both are binary, we know have same pack lengths,
3169 // so we can goto end
3170 if (a->binary() && b->binary()) {
3171 // nothing to do, we are good
3172 }
3173 else if (!a->binary() && !b->binary()) {
3174 // test the charset
3175 if (a->charset()->number != b->charset()->number) {
3176 retval = false;
3177 goto cleanup;
3178 }
3179 }
3180 else {
3181 // one is binary and the other is not, so not the same
3182 retval = false;
3183 goto cleanup;
3184 }
3185 break;
3186 //
3187 // I believe these are old types that are no longer
3188 // in any 5.1 tables, so tokudb does not need
3189 // to worry about them
3190 // Putting in this assert in case I am wrong.
3191 // Do not support geometry yet.
3192 //
3193 case MYSQL_TYPE_GEOMETRY:
3194 case MYSQL_TYPE_DECIMAL:
3195 case MYSQL_TYPE_VAR_STRING:
3196 case MYSQL_TYPE_NULL:
3197 case MYSQL_TYPE_VARCHAR_COMPRESSED:
3198 case MYSQL_TYPE_BLOB_COMPRESSED:
3199 assert_unreachable();
3200 }
3201
3202 cleanup:
3203 return retval;
3204 }
3205
are_two_fields_same(Field * a,Field * b)3206 static bool are_two_fields_same(Field* a, Field* b) {
3207 return fields_have_same_name(a, b) && fields_are_same_type(a, b);
3208 }
3209
3210
3211