1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of TokuDB
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     TokuDBis is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     TokuDB is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with TokuDB.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ======= */
23 
24 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
25 
26 #include "hatoku_cmp.h"
27 
28 #ifdef WORDS_BIGENDIAN
29 #error "WORDS_BIGENDIAN not supported"
30 #endif
31 
32 // returns true if the field is a valid field to be used
33 // in a TokuDB table. The non-valid fields are those
34 // that have been deprecated since before 5.1, and can
35 // only exist through upgrades of old versions of MySQL
field_valid_for_tokudb_table(Field * field)36 static bool field_valid_for_tokudb_table(Field* field) {
37     bool ret_val = false;
38     enum_field_types mysql_type = field->real_type();
39     switch (mysql_type) {
40     case MYSQL_TYPE_LONG:
41     case MYSQL_TYPE_LONGLONG:
42     case MYSQL_TYPE_TINY:
43     case MYSQL_TYPE_SHORT:
44     case MYSQL_TYPE_INT24:
45     case MYSQL_TYPE_DATE:
46     case MYSQL_TYPE_YEAR:
47     case MYSQL_TYPE_NEWDATE:
48     case MYSQL_TYPE_ENUM:
49     case MYSQL_TYPE_SET:
50     case MYSQL_TYPE_TIME:
51     case MYSQL_TYPE_DATETIME:
52     case MYSQL_TYPE_TIMESTAMP:
53     case MYSQL_TYPE_DOUBLE:
54     case MYSQL_TYPE_FLOAT:
55 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
56     (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
57     (100000 <= MYSQL_VERSION_ID)
58     case MYSQL_TYPE_DATETIME2:
59     case MYSQL_TYPE_TIMESTAMP2:
60     case MYSQL_TYPE_TIME2:
61 #endif
62     case MYSQL_TYPE_NEWDECIMAL:
63     case MYSQL_TYPE_BIT:
64     case MYSQL_TYPE_STRING:
65     case MYSQL_TYPE_VARCHAR:
66     case MYSQL_TYPE_TINY_BLOB:
67     case MYSQL_TYPE_MEDIUM_BLOB:
68     case MYSQL_TYPE_BLOB:
69     case MYSQL_TYPE_LONG_BLOB:
70         ret_val = true;
71         goto exit;
72     //
73     // I believe these are old types that are no longer
74     // in any 5.1 tables, so tokudb does not need
75     // to worry about them
76     // Putting in this assert in case I am wrong.
77     // Do not support geometry yet.
78     //
79     case MYSQL_TYPE_GEOMETRY:
80     case MYSQL_TYPE_DECIMAL:
81     case MYSQL_TYPE_VAR_STRING:
82     case MYSQL_TYPE_NULL:
83     case MYSQL_TYPE_VARCHAR_COMPRESSED:
84     case MYSQL_TYPE_BLOB_COMPRESSED:
85         ret_val = false;
86     }
87 exit:
88     return ret_val;
89 }
90 
get_var_field_info(uint32_t * field_len,uint32_t * start_offset,uint32_t var_field_index,const uchar * var_field_offset_ptr,uint32_t num_offset_bytes)91 static void get_var_field_info(
92     uint32_t* field_len, // output: length of field
93     uint32_t* start_offset, // output, length of offset where data starts
94     uint32_t var_field_index, //input, index of var field we want info on
95     const uchar* var_field_offset_ptr, //input, pointer to where offset information for all var fields begins
96     uint32_t num_offset_bytes //input, number of bytes used to store offsets starting at var_field_offset_ptr
97     )
98 {
99     uint32_t data_start_offset = 0;
100     uint32_t data_end_offset = 0;
101     switch (num_offset_bytes) {
102     case (1):
103         data_end_offset = (var_field_offset_ptr + var_field_index)[0];
104         break;
105     case (2):
106         data_end_offset = uint2korr(var_field_offset_ptr + 2*var_field_index);
107         break;
108     default:
109         assert_unreachable();
110     }
111 
112     if (var_field_index) {
113         switch (num_offset_bytes) {
114         case (1):
115             data_start_offset = (var_field_offset_ptr + var_field_index - 1)[0];
116             break;
117         case (2):
118             data_start_offset = uint2korr(var_field_offset_ptr + 2*(var_field_index-1));
119             break;
120         default:
121             assert_unreachable();
122         }
123     }
124     else {
125         data_start_offset = 0;
126     }
127 
128     *start_offset = data_start_offset;
129     assert_always(data_end_offset >= data_start_offset);
130     *field_len = data_end_offset - data_start_offset;
131 }
132 
get_blob_field_info(uint32_t * start_offset,uint32_t len_of_offsets,const uchar * var_field_data_ptr,uint32_t num_offset_bytes)133 static void get_blob_field_info(
134     uint32_t* start_offset,
135     uint32_t len_of_offsets,
136     const uchar* var_field_data_ptr,
137     uint32_t num_offset_bytes
138     )
139 {
140     uint32_t data_end_offset;
141     //
142     // need to set var_field_data_ptr to point to beginning of blobs, which
143     // is at the end of the var stuff (if they exist), if var stuff does not exist
144     // then the bottom variable will be 0, and var_field_data_ptr is already
145     // set correctly
146     //
147     if (len_of_offsets) {
148         switch (num_offset_bytes) {
149         case (1):
150             data_end_offset = (var_field_data_ptr - 1)[0];
151             break;
152         case (2):
153             data_end_offset = uint2korr(var_field_data_ptr - 2);
154             break;
155         default:
156             assert_unreachable();
157         }
158     }
159     else {
160         data_end_offset = 0;
161     }
162     *start_offset = data_end_offset;
163 }
164 
165 
166 // this function is pattern matched from
167 // InnoDB's get_innobase_type_from_mysql_type
mysql_to_toku_type(Field * field)168 static TOKU_TYPE mysql_to_toku_type (Field* field) {
169     TOKU_TYPE ret_val = toku_type_unknown;
170     enum_field_types mysql_type = field->real_type();
171     switch (mysql_type) {
172     case MYSQL_TYPE_LONG:
173     case MYSQL_TYPE_LONGLONG:
174     case MYSQL_TYPE_TINY:
175     case MYSQL_TYPE_SHORT:
176     case MYSQL_TYPE_INT24:
177     case MYSQL_TYPE_DATE:
178     case MYSQL_TYPE_YEAR:
179     case MYSQL_TYPE_NEWDATE:
180     case MYSQL_TYPE_ENUM:
181     case MYSQL_TYPE_SET:
182         ret_val = toku_type_int;
183         goto exit;
184     case MYSQL_TYPE_TIME:
185     case MYSQL_TYPE_DATETIME:
186     case MYSQL_TYPE_TIMESTAMP:
187 #ifdef MARIADB_BASE_VERSION
188         // case to handle fractional seconds in MariaDB
189         //
190         if (field->key_type() == HA_KEYTYPE_BINARY) {
191             ret_val = toku_type_fixbinary;
192             goto exit;
193         }
194 #endif
195         ret_val = toku_type_int;
196         goto exit;
197     case MYSQL_TYPE_DOUBLE:
198         ret_val = toku_type_double;
199         goto exit;
200     case MYSQL_TYPE_FLOAT:
201         ret_val = toku_type_float;
202         goto exit;
203 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
204     (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
205     (100000 <= MYSQL_VERSION_ID)
206     case MYSQL_TYPE_DATETIME2:
207     case MYSQL_TYPE_TIMESTAMP2:
208     case MYSQL_TYPE_TIME2:
209 #endif
210     case MYSQL_TYPE_NEWDECIMAL:
211     case MYSQL_TYPE_BIT:
212         ret_val = toku_type_fixbinary;
213         goto exit;
214     case MYSQL_TYPE_STRING:
215         if (field->binary()) {
216             ret_val = toku_type_fixbinary;
217         }
218         else {
219             ret_val = toku_type_fixstring;
220         }
221         goto exit;
222     case MYSQL_TYPE_VARCHAR:
223         if (field->binary()) {
224             ret_val = toku_type_varbinary;
225         }
226         else {
227             ret_val = toku_type_varstring;
228         }
229         goto exit;
230     case MYSQL_TYPE_TINY_BLOB:
231     case MYSQL_TYPE_MEDIUM_BLOB:
232     case MYSQL_TYPE_BLOB:
233     case MYSQL_TYPE_LONG_BLOB:
234         ret_val = toku_type_blob;
235         goto exit;
236     //
237     // I believe these are old types that are no longer
238     // in any 5.1 tables, so tokudb does not need
239     // to worry about them
240     // Putting in this assert in case I am wrong.
241     // Do not support geometry yet.
242     //
243     case MYSQL_TYPE_GEOMETRY:
244     case MYSQL_TYPE_DECIMAL:
245     case MYSQL_TYPE_VAR_STRING:
246     case MYSQL_TYPE_NULL:
247     case MYSQL_TYPE_VARCHAR_COMPRESSED:
248     case MYSQL_TYPE_BLOB_COMPRESSED:
249         assert_unreachable();
250     }
251 exit:
252     return ret_val;
253 }
254 
255 
get_charset_from_num(uint32_t charset_number)256 static inline CHARSET_INFO* get_charset_from_num (uint32_t charset_number) {
257     //
258     // patternmatched off of InnoDB, due to MySQL bug 42649
259     //
260     if (charset_number == default_charset_info->number) {
261         return default_charset_info;
262     }
263     else if (charset_number == my_charset_latin1.number) {
264         return &my_charset_latin1;
265     }
266     else {
267         return get_charset(charset_number, MYF(MY_WME));
268     }
269 }
270 
271 
272 
273 //
274 // used to read the length of a variable sized field in a tokudb key (buf).
275 //
get_length_from_var_tokudata(uchar * buf,uint32_t length_bytes)276 static inline uint32_t get_length_from_var_tokudata (uchar* buf, uint32_t length_bytes) {
277     uint32_t length = (uint32_t)(buf[0]);
278     if (length_bytes == 2) {
279         uint32_t rest_of_length = (uint32_t)buf[1];
280         length += rest_of_length<<8;
281     }
282     return length;
283 }
284 
285 //
286 // used to deduce the number of bytes used to store the length of a varstring/varbinary
287 // in a key field stored in tokudb
288 //
get_length_bytes_from_max(uint32_t max_num_bytes)289 static inline uint32_t get_length_bytes_from_max(uint32_t max_num_bytes) {
290     return (max_num_bytes > 255) ? 2 : 1;
291 }
292 
293 
294 
295 //
296 // assuming MySQL in little endian, and we are storing in little endian
297 //
pack_toku_int(uchar * to_tokudb,uchar * from_mysql,uint32_t num_bytes)298 static inline uchar* pack_toku_int (uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
299     switch (num_bytes) {
300     case (1):
301         memcpy(to_tokudb, from_mysql, 1);
302         break;
303     case (2):
304         memcpy(to_tokudb, from_mysql, 2);
305         break;
306     case (3):
307         memcpy(to_tokudb, from_mysql, 3);
308         break;
309     case (4):
310         memcpy(to_tokudb, from_mysql, 4);
311         break;
312     case (8):
313         memcpy(to_tokudb, from_mysql, 8);
314         break;
315     default:
316         assert_unreachable();
317     }
318     return to_tokudb+num_bytes;
319 }
320 
321 //
322 // assuming MySQL in little endian, and we are unpacking to little endian
323 //
unpack_toku_int(uchar * to_mysql,uchar * from_tokudb,uint32_t num_bytes)324 static inline uchar* unpack_toku_int(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
325     switch (num_bytes) {
326     case (1):
327         memcpy(to_mysql, from_tokudb, 1);
328         break;
329     case (2):
330         memcpy(to_mysql, from_tokudb, 2);
331         break;
332     case (3):
333         memcpy(to_mysql, from_tokudb, 3);
334         break;
335     case (4):
336         memcpy(to_mysql, from_tokudb, 4);
337         break;
338     case (8):
339         memcpy(to_mysql, from_tokudb, 8);
340         break;
341     default:
342         assert_unreachable();
343     }
344     return from_tokudb+num_bytes;
345 }
346 
cmp_toku_int(uchar * a_buf,uchar * b_buf,bool is_unsigned,uint32_t num_bytes)347 static inline int cmp_toku_int (uchar* a_buf, uchar* b_buf, bool is_unsigned, uint32_t num_bytes) {
348     int ret_val = 0;
349     //
350     // case for unsigned integers
351     //
352     if (is_unsigned) {
353         uint32_t a_num, b_num = 0;
354         uint64_t a_big_num, b_big_num = 0;
355         switch (num_bytes) {
356         case (1):
357             a_num = *a_buf;
358             b_num = *b_buf;
359             ret_val = a_num-b_num;
360             goto exit;
361         case (2):
362             a_num = uint2korr(a_buf);
363             b_num = uint2korr(b_buf);
364             ret_val = a_num-b_num;
365             goto exit;
366         case (3):
367             a_num = tokudb_uint3korr(a_buf);
368             b_num = tokudb_uint3korr(b_buf);
369             ret_val = a_num-b_num;
370             goto exit;
371         case (4):
372             a_num = uint4korr(a_buf);
373             b_num = uint4korr(b_buf);
374             if (a_num < b_num) {
375                 ret_val = -1; goto exit;
376             }
377             if (a_num > b_num) {
378                 ret_val = 1; goto exit;
379             }
380             ret_val = 0;
381             goto exit;
382         case (8):
383             a_big_num = uint8korr(a_buf);
384             b_big_num = uint8korr(b_buf);
385             if (a_big_num < b_big_num) {
386                 ret_val = -1; goto exit;
387             }
388             else if (a_big_num > b_big_num) {
389                 ret_val = 1; goto exit;
390             }
391             ret_val = 0;
392             goto exit;
393         default:
394             assert_unreachable();
395         }
396     }
397     //
398     // case for signed integers
399     //
400     else {
401         int32_t a_num, b_num = 0;
402         int64_t a_big_num, b_big_num = 0;
403         switch (num_bytes) {
404         case (1):
405             a_num = *(signed char *)a_buf;
406             b_num = *(signed char *)b_buf;
407             ret_val = a_num-b_num;
408             goto exit;
409         case (2):
410             a_num = sint2korr(a_buf);
411             b_num = sint2korr(b_buf);
412             ret_val = a_num-b_num;
413             goto exit;
414         case (3):
415             a_num = sint3korr(a_buf);
416             b_num = sint3korr(b_buf);
417             ret_val = a_num - b_num;
418             goto exit;
419         case (4):
420             a_num = sint4korr(a_buf);
421             b_num = sint4korr(b_buf);
422             if (a_num < b_num) {
423                 ret_val = -1; goto exit;
424             }
425             if (a_num > b_num) {
426                 ret_val = 1; goto exit;
427             }
428             ret_val = 0;
429             goto exit;
430         case (8):
431             a_big_num = sint8korr(a_buf);
432             b_big_num = sint8korr(b_buf);
433             if (a_big_num < b_big_num) {
434                 ret_val = -1; goto exit;
435             }
436             else if (a_big_num > b_big_num) {
437                 ret_val = 1; goto exit;
438             }
439             ret_val = 0;
440             goto exit;
441         default:
442             assert_unreachable();
443         }
444     }
445     //
446     // if this is hit, indicates bug in writing of this function
447     //
448     assert_unreachable();
449 exit:
450     return ret_val;
451 }
452 
pack_toku_double(uchar * to_tokudb,uchar * from_mysql)453 static inline uchar* pack_toku_double (uchar* to_tokudb, uchar* from_mysql) {
454     memcpy(to_tokudb, from_mysql, sizeof(double));
455     return to_tokudb + sizeof(double);
456 }
457 
458 
unpack_toku_double(uchar * to_mysql,uchar * from_tokudb)459 static inline uchar* unpack_toku_double(uchar* to_mysql, uchar* from_tokudb) {
460     memcpy(to_mysql, from_tokudb, sizeof(double));
461     return from_tokudb + sizeof(double);
462 }
463 
cmp_toku_double(uchar * a_buf,uchar * b_buf)464 static inline int cmp_toku_double(uchar* a_buf, uchar* b_buf) {
465     int ret_val;
466     double a_num;
467     double b_num;
468     doubleget(a_num, a_buf);
469     doubleget(b_num, b_buf);
470     if (a_num < b_num) {
471         ret_val = -1;
472         goto exit;
473     }
474     else if (a_num > b_num) {
475         ret_val = 1;
476         goto exit;
477     }
478     ret_val = 0;
479 exit:
480     return ret_val;
481 }
482 
483 
pack_toku_float(uchar * to_tokudb,uchar * from_mysql)484 static inline uchar* pack_toku_float (uchar* to_tokudb, uchar* from_mysql) {
485     memcpy(to_tokudb, from_mysql, sizeof(float));
486     return to_tokudb + sizeof(float);
487 }
488 
489 
unpack_toku_float(uchar * to_mysql,uchar * from_tokudb)490 static inline uchar* unpack_toku_float(uchar* to_mysql, uchar* from_tokudb) {
491     memcpy(to_mysql, from_tokudb, sizeof(float));
492     return from_tokudb + sizeof(float);
493 }
494 
cmp_toku_float(uchar * a_buf,uchar * b_buf)495 static inline int cmp_toku_float(uchar* a_buf, uchar* b_buf) {
496     int ret_val;
497     float a_num;
498     float b_num;
499     //
500     // This is the way Field_float::cmp gets the floats from the buffers
501     //
502     memcpy(&a_num, a_buf, sizeof(float));
503     memcpy(&b_num, b_buf, sizeof(float));
504     if (a_num < b_num) {
505         ret_val = -1;
506         goto exit;
507     }
508     else if (a_num > b_num) {
509         ret_val = 1;
510         goto exit;
511     }
512     ret_val = 0;
513 exit:
514     return ret_val;
515 }
516 
517 
pack_toku_binary(uchar * to_tokudb,uchar * from_mysql,uint32_t num_bytes)518 static inline uchar* pack_toku_binary(uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
519     memcpy(to_tokudb, from_mysql, num_bytes);
520     return to_tokudb + num_bytes;
521 }
522 
unpack_toku_binary(uchar * to_mysql,uchar * from_tokudb,uint32_t num_bytes)523 static inline uchar* unpack_toku_binary(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
524     memcpy(to_mysql, from_tokudb, num_bytes);
525     return from_tokudb + num_bytes;
526 }
527 
528 
cmp_toku_binary(uchar * a_buf,uint32_t a_num_bytes,uchar * b_buf,uint32_t b_num_bytes)529 static inline int cmp_toku_binary(
530     uchar* a_buf,
531     uint32_t a_num_bytes,
532     uchar* b_buf,
533     uint32_t b_num_bytes
534     )
535 {
536     int ret_val = 0;
537     uint32_t num_bytes_to_cmp = (a_num_bytes < b_num_bytes) ? a_num_bytes : b_num_bytes;
538     ret_val = memcmp(a_buf, b_buf, num_bytes_to_cmp);
539     if ((ret_val != 0) || (a_num_bytes == b_num_bytes)) {
540         goto exit;
541     }
542     if (a_num_bytes < b_num_bytes) {
543         ret_val = -1;
544         goto exit;
545     }
546     else {
547         ret_val = 1;
548         goto exit;
549     }
550 exit:
551     return ret_val;
552 }
553 
554 //
555 // partially copied from below
556 //
pack_toku_varbinary_from_desc(uchar * to_tokudb,const uchar * from_desc,uint32_t key_part_length,uint32_t field_length)557 static uchar* pack_toku_varbinary_from_desc(
558     uchar* to_tokudb,
559     const uchar* from_desc,
560     uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
561     uint32_t field_length //length of field
562     )
563 {
564     uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
565     uint32_t length = field_length;
566     set_if_smaller(length, key_part_length);
567 
568     //
569     // copy the length bytes, assuming both are in little endian
570     //
571     to_tokudb[0] = (uchar)length & 255;
572     if (length_bytes_in_tokudb > 1) {
573         to_tokudb[1] = (uchar) (length >> 8);
574     }
575     //
576     // copy the string
577     //
578     memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
579     return to_tokudb + length + length_bytes_in_tokudb;
580 }
581 
pack_toku_varbinary(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes)582 static inline uchar* pack_toku_varbinary(
583     uchar* to_tokudb,
584     uchar* from_mysql,
585     uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
586     uint32_t max_num_bytes
587     )
588 {
589     uint32_t length = 0;
590     uint32_t length_bytes_in_tokudb;
591     switch (length_bytes_in_mysql) {
592     case (0):
593         length = max_num_bytes;
594         break;
595     case (1):
596         length = (uint32_t)(*from_mysql);
597         break;
598     case (2):
599         length = uint2korr(from_mysql);
600         break;
601     case (3):
602         length = tokudb_uint3korr(from_mysql);
603         break;
604     case (4):
605         length = uint4korr(from_mysql);
606         break;
607     }
608 
609     //
610     // from this point on, functionality equivalent to pack_toku_varbinary_from_desc
611     //
612     set_if_smaller(length,max_num_bytes);
613 
614     length_bytes_in_tokudb = get_length_bytes_from_max(max_num_bytes);
615     //
616     // copy the length bytes, assuming both are in little endian
617     //
618     to_tokudb[0] = (uchar)length & 255;
619     if (length_bytes_in_tokudb > 1) {
620         to_tokudb[1] = (uchar) (length >> 8);
621     }
622     //
623     // copy the string
624     //
625     memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
626     return to_tokudb + length + length_bytes_in_tokudb;
627 }
628 
unpack_toku_varbinary(uchar * to_mysql,uchar * from_tokudb,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql)629 static inline uchar* unpack_toku_varbinary(
630     uchar* to_mysql,
631     uchar* from_tokudb,
632     uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
633     uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
634     )
635 {
636     uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
637 
638     //
639     // copy the length into the mysql buffer
640     //
641     switch (length_bytes_in_mysql) {
642     case (0):
643         break;
644     case (1):
645         *to_mysql = (uchar) length;
646         break;
647     case (2):
648         int2store(to_mysql, length);
649         break;
650     case (3):
651         int3store(to_mysql, length);
652         break;
653     case (4):
654         int4store(to_mysql, length);
655         break;
656     default:
657         assert_unreachable();
658     }
659     //
660     // copy the binary data
661     //
662     memcpy(to_mysql + length_bytes_in_mysql, from_tokudb + length_bytes_in_tokudb, length);
663     return from_tokudb + length_bytes_in_tokudb+ length;
664 }
665 
cmp_toku_varbinary(uchar * a_buf,uchar * b_buf,uint32_t length_bytes,uint32_t * a_bytes_read,uint32_t * b_bytes_read)666 static inline int cmp_toku_varbinary(
667     uchar* a_buf,
668     uchar* b_buf,
669     uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
670     uint32_t* a_bytes_read,
671     uint32_t* b_bytes_read
672     )
673 {
674     int ret_val = 0;
675     uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
676     uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
677     ret_val = cmp_toku_binary(
678         a_buf + length_bytes,
679         a_len,
680         b_buf + length_bytes,
681         b_len
682         );
683     *a_bytes_read = a_len + length_bytes;
684     *b_bytes_read = b_len + length_bytes;
685     return ret_val;
686 }
687 
pack_toku_blob(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes,const CHARSET_INFO * charset)688 static inline uchar* pack_toku_blob(
689     uchar* to_tokudb,
690     uchar* from_mysql,
691     uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
692     uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
693     uint32_t max_num_bytes,
694 #if MYSQL_VERSION_ID >= 50600
695     const CHARSET_INFO* charset
696 #else
697     CHARSET_INFO* charset
698 #endif
699     )
700 {
701     uint32_t length = 0;
702     uint32_t local_char_length = 0;
703     uchar* blob_buf = NULL;
704 
705     switch (length_bytes_in_mysql) {
706     case (0):
707         length = max_num_bytes;
708         break;
709     case (1):
710         length = (uint32_t)(*from_mysql);
711         break;
712     case (2):
713         length = uint2korr(from_mysql);
714         break;
715     case (3):
716         length = tokudb_uint3korr(from_mysql);
717         break;
718     case (4):
719         length = uint4korr(from_mysql);
720         break;
721     }
722     set_if_smaller(length,max_num_bytes);
723 
724     memcpy(&blob_buf,from_mysql+length_bytes_in_mysql,sizeof(uchar *));
725 
726     local_char_length= ((charset->mbmaxlen > 1) ?
727                        max_num_bytes/charset->mbmaxlen : max_num_bytes);
728     if (length > local_char_length)
729     {
730       local_char_length= my_charpos(
731         charset,
732         blob_buf,
733         blob_buf+length,
734         local_char_length
735         );
736       set_if_smaller(length, local_char_length);
737     }
738 
739 
740     //
741     // copy the length bytes, assuming both are in little endian
742     //
743     to_tokudb[0] = (uchar)length & 255;
744     if (length_bytes_in_tokudb > 1) {
745         to_tokudb[1] = (uchar) (length >> 8);
746     }
747     //
748     // copy the string
749     //
750     memcpy(to_tokudb + length_bytes_in_tokudb, blob_buf, length);
751     return to_tokudb + length + length_bytes_in_tokudb;
752 }
753 
754 
unpack_toku_blob(uchar * to_mysql,uchar * from_tokudb,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql)755 static inline uchar* unpack_toku_blob(
756     uchar* to_mysql,
757     uchar* from_tokudb,
758     uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
759     uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
760     )
761 {
762     uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
763     uchar* blob_pos = NULL;
764     //
765     // copy the length into the mysql buffer
766     //
767     switch (length_bytes_in_mysql) {
768     case (0):
769         break;
770     case (1):
771         *to_mysql = (uchar) length;
772         break;
773     case (2):
774         int2store(to_mysql, length);
775         break;
776     case (3):
777         int3store(to_mysql, length);
778         break;
779     case (4):
780         int4store(to_mysql, length);
781         break;
782     default:
783         assert_unreachable();
784     }
785     //
786     // copy the binary data
787     //
788     blob_pos = from_tokudb + length_bytes_in_tokudb;
789     memcpy(to_mysql + length_bytes_in_mysql, &blob_pos, sizeof(uchar *));
790     return from_tokudb + length_bytes_in_tokudb+ length;
791 }
792 
793 
794 //
795 // partially copied from below
796 //
pack_toku_varstring_from_desc(uchar * to_tokudb,const uchar * from_desc,uint32_t key_part_length,uint32_t field_length,uint32_t charset_num)797 static uchar* pack_toku_varstring_from_desc(
798     uchar* to_tokudb,
799     const uchar* from_desc,
800     uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
801     uint32_t field_length,
802     uint32_t charset_num//length of field
803     )
804 {
805     CHARSET_INFO* charset = NULL;
806     uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
807     uint32_t length = field_length;
808     uint32_t local_char_length = 0;
809     set_if_smaller(length, key_part_length);
810 
811     charset = get_charset_from_num(charset_num);
812 
813     //
814     // copy the string
815     //
816     local_char_length= ((charset->mbmaxlen > 1) ?
817                        key_part_length/charset->mbmaxlen : key_part_length);
818     if (length > local_char_length)
819     {
820       local_char_length= my_charpos(
821         charset,
822         from_desc,
823         from_desc+length,
824         local_char_length
825         );
826       set_if_smaller(length, local_char_length);
827     }
828 
829 
830     //
831     // copy the length bytes, assuming both are in little endian
832     //
833     to_tokudb[0] = (uchar)length & 255;
834     if (length_bytes_in_tokudb > 1) {
835         to_tokudb[1] = (uchar) (length >> 8);
836     }
837     //
838     // copy the string
839     //
840     memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
841     return to_tokudb + length + length_bytes_in_tokudb;
842 }
843 
pack_toku_varstring(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes,const CHARSET_INFO * charset)844 static inline uchar* pack_toku_varstring(
845     uchar* to_tokudb,
846     uchar* from_mysql,
847     uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
848     uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
849     uint32_t max_num_bytes,
850 #if MYSQL_VERSION_ID >= 50600
851     const CHARSET_INFO *charset
852 #else
853     CHARSET_INFO* charset
854 #endif
855     )
856 {
857     uint32_t length = 0;
858     uint32_t local_char_length = 0;
859 
860     switch (length_bytes_in_mysql) {
861     case (0):
862         length = max_num_bytes;
863         break;
864     case (1):
865         length = (uint32_t)(*from_mysql);
866         break;
867     case (2):
868         length = uint2korr(from_mysql);
869         break;
870     case (3):
871         length = tokudb_uint3korr(from_mysql);
872         break;
873     case (4):
874         length = uint4korr(from_mysql);
875         break;
876     }
877     set_if_smaller(length,max_num_bytes);
878 
879     local_char_length= ((charset->mbmaxlen > 1) ?
880                        max_num_bytes/charset->mbmaxlen : max_num_bytes);
881     if (length > local_char_length)
882     {
883       local_char_length= my_charpos(
884         charset,
885         from_mysql+length_bytes_in_mysql,
886         from_mysql+length_bytes_in_mysql+length,
887         local_char_length
888         );
889       set_if_smaller(length, local_char_length);
890     }
891 
892 
893     //
894     // copy the length bytes, assuming both are in little endian
895     //
896     to_tokudb[0] = (uchar)length & 255;
897     if (length_bytes_in_tokudb > 1) {
898         to_tokudb[1] = (uchar) (length >> 8);
899     }
900     //
901     // copy the string
902     //
903     memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
904     return to_tokudb + length + length_bytes_in_tokudb;
905 }
906 
cmp_toku_string(uchar * a_buf,uint32_t a_num_bytes,uchar * b_buf,uint32_t b_num_bytes,uint32_t charset_number)907 static inline int cmp_toku_string(
908     uchar* a_buf,
909     uint32_t a_num_bytes,
910     uchar* b_buf,
911     uint32_t b_num_bytes,
912     uint32_t charset_number
913     )
914 {
915     int ret_val = 0;
916     CHARSET_INFO* charset = NULL;
917 
918     charset = get_charset_from_num(charset_number);
919 
920     ret_val = charset->coll->strnncollsp(
921         charset,
922         a_buf,
923         a_num_bytes,
924         b_buf,
925         b_num_bytes
926         );
927     return ret_val;
928 }
929 
cmp_toku_varstring(uchar * a_buf,uchar * b_buf,uint32_t length_bytes,uint32_t charset_num,uint32_t * a_bytes_read,uint32_t * b_bytes_read)930 static inline int cmp_toku_varstring(
931     uchar* a_buf,
932     uchar* b_buf,
933     uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
934     uint32_t charset_num,
935     uint32_t* a_bytes_read,
936     uint32_t* b_bytes_read
937     )
938 {
939     int ret_val = 0;
940     uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
941     uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
942     ret_val = cmp_toku_string(
943         a_buf + length_bytes,
944         a_len,
945         b_buf + length_bytes,
946         b_len,
947         charset_num
948         );
949     *a_bytes_read = a_len + length_bytes;
950     *b_bytes_read = b_len + length_bytes;
951     return ret_val;
952 }
953 
tokudb_compare_two_hidden_keys(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size)954 static inline int tokudb_compare_two_hidden_keys(
955     const void* new_key_data,
956     const uint32_t new_key_size,
957     const void*  saved_key_data,
958     const uint32_t saved_key_size
959     ) {
960     assert_always(new_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
961     assert_always(saved_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
962     ulonglong a = hpk_char_to_num((uchar *) new_key_data);
963     ulonglong b = hpk_char_to_num((uchar *) saved_key_data);
964     return a < b ? -1 : (a > b ? 1 : 0);
965 }
966 
967 //
968 // Returns number of bytes used for a given TOKU_TYPE
969 // in a key descriptor. The number of bytes returned
970 // here MUST match the number of bytes used for the encoding
971 // in create_toku_key_descriptor_for_key
972 // Parameters:
973 //      [in]    row_desc - buffer that contains portion of descriptor
974 //              created in create_toku_key_descriptor_for_key. The first
975 //              byte points to the TOKU_TYPE.
976 //
skip_field_in_descriptor(uchar * row_desc)977 static uint32_t skip_field_in_descriptor(uchar* row_desc) {
978     uchar* row_desc_pos = row_desc;
979     TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
980     row_desc_pos++;
981 
982     switch (toku_type) {
983     case (toku_type_hpk):
984     case (toku_type_double):
985     case (toku_type_float):
986         break;
987     case (toku_type_int):
988         row_desc_pos += 2;
989         break;
990     case (toku_type_fixbinary):
991     case (toku_type_varbinary):
992         row_desc_pos++;
993         break;
994     case (toku_type_fixstring):
995     case (toku_type_varstring):
996     case (toku_type_blob):
997         row_desc_pos++;
998         row_desc_pos += sizeof(uint32_t);
999         break;
1000     default:
1001         assert_unreachable();
1002     }
1003     return (uint32_t)(row_desc_pos - row_desc);
1004 }
1005 
1006 //
1007 // outputs a descriptor for key into buf. Returns number of bytes used in buf
1008 // to store the descriptor. Number of bytes used MUST match number of bytes
1009 // we would skip in skip_field_in_descriptor
1010 //
create_toku_key_descriptor_for_key(KEY * key,uchar * buf)1011 static int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
1012     uchar* pos = buf;
1013     uint32_t num_bytes_in_field = 0;
1014     uint32_t charset_num = 0;
1015     for (uint i = 0; i < key->user_defined_key_parts; i++) {
1016         Field* field = key->key_part[i].field;
1017         //
1018         // The first byte states if there is a null byte
1019         // 0 means no null byte, non-zer means there
1020         // is one
1021         //
1022         *pos = field->null_bit;
1023         pos++;
1024 
1025         //
1026         // The second byte for each field is the type
1027         //
1028         TOKU_TYPE type = mysql_to_toku_type(field);
1029         assert_always((int)type < 256);
1030         *pos = (uchar)(type & 255);
1031         pos++;
1032 
1033         //
1034         // based on the type, extra data follows afterwards
1035         //
1036         switch (type) {
1037         //
1038         // two bytes follow for ints, first one states how many
1039         // bytes the int is (1 , 2, 3, 4 or 8)
1040         // next one states if it is signed or not
1041         //
1042         case (toku_type_int):
1043             num_bytes_in_field = field->pack_length();
1044             assert_always (num_bytes_in_field < 256);
1045             *pos = (uchar)(num_bytes_in_field & 255);
1046             pos++;
1047             *pos = (field->flags & UNSIGNED_FLAG) ? 1 : 0;
1048             pos++;
1049             break;
1050         //
1051         // nothing follows floats and doubles
1052         //
1053         case (toku_type_double):
1054         case (toku_type_float):
1055             break;
1056         //
1057         // one byte follow stating the length of the field
1058         //
1059         case (toku_type_fixbinary):
1060             num_bytes_in_field = field->pack_length();
1061             set_if_smaller(num_bytes_in_field, key->key_part[i].length);
1062             assert_always(num_bytes_in_field < 256);
1063             pos[0] = (uchar)(num_bytes_in_field & 255);
1064             pos++;
1065             break;
1066         //
1067         // one byte follows: the number of bytes used to encode the length
1068         //
1069         case (toku_type_varbinary):
1070             *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1071             pos++;
1072             break;
1073         //
1074         // five bytes follow: one for the number of bytes to encode the length,
1075         //                           four for the charset number
1076         //
1077         case (toku_type_fixstring):
1078         case (toku_type_varstring):
1079         case (toku_type_blob):
1080             *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1081             pos++;
1082             charset_num = field->charset()->number;
1083             pos[0] = (uchar)(charset_num & 255);
1084             pos[1] = (uchar)((charset_num >> 8) & 255);
1085             pos[2] = (uchar)((charset_num >> 16) & 255);
1086             pos[3] = (uchar)((charset_num >> 24) & 255);
1087             pos += 4;
1088             break;
1089         default:
1090             assert_unreachable();
1091         }
1092     }
1093     return pos - buf;
1094 }
1095 
1096 
1097 //
1098 // Creates a descriptor for a DB. That contains all information necessary
1099 // to do both key comparisons and data comparisons (for dup-sort databases).
1100 //
1101 // There are two types of descriptors we care about:
1102 // 1) Primary key, (in a no-dup database)
1103 // 2) secondary keys, which are a secondary key followed by a primary key,
1104 //      but in a no-dup database.
1105 //
1106 // I realize this may be confusing, but here is how it works.
1107 // All DB's have a key compare.
1108 // The format of the descriptor must be able to handle both.
1109 //
1110 // The first four bytes store an offset into the descriptor to the second piece
1111 // used for data comparisons. So, if in the future we want to append something
1112 // to the descriptor, we can.
1113 //
1114 //
create_toku_key_descriptor(uchar * buf,bool is_first_hpk,KEY * first_key,bool is_second_hpk,KEY * second_key)1115 static int create_toku_key_descriptor(
1116     uchar* buf,
1117     bool is_first_hpk,
1118     KEY* first_key,
1119     bool is_second_hpk,
1120     KEY* second_key
1121     )
1122 {
1123     //
1124     // The first four bytes always contain the offset of where the first key
1125     // ends.
1126     //
1127     uchar* pos = buf + 4;
1128     uint32_t num_bytes = 0;
1129     uint32_t offset = 0;
1130 
1131 
1132     if (is_first_hpk) {
1133         pos[0] = 0; //say there is NO infinity byte
1134         pos[1] = 0; //field cannot be NULL, stating it
1135         pos[2] = toku_type_hpk;
1136         pos += 3;
1137     }
1138     else {
1139         //
1140         // first key is NOT a hidden primary key, so we now pack first_key
1141         //
1142         pos[0] = 1; //say there is an infinity byte
1143         pos++;
1144         num_bytes = create_toku_key_descriptor_for_key(first_key, pos);
1145         pos += num_bytes;
1146     }
1147 
1148     //
1149     // if we do not have a second key, we can jump to exit right now
1150     // we do not have a second key if it is not a hidden primary key
1151     // and if second_key is NULL
1152     //
1153     if (is_first_hpk || (!is_second_hpk && (second_key == NULL)) ) {
1154         goto exit;
1155     }
1156 
1157     //
1158     // if we have a second key, and it is an hpk, we need to pack it, and
1159     // write in the offset to this position in the first four bytes
1160     //
1161     if (is_second_hpk) {
1162         pos[0] = 0; //field cannot be NULL, stating it
1163         pos[1] = toku_type_hpk;
1164         pos += 2;
1165     }
1166     else {
1167         //
1168         // second key is NOT a hidden primary key, so we now pack second_key
1169         //
1170         num_bytes = create_toku_key_descriptor_for_key(second_key, pos);
1171         pos += num_bytes;
1172     }
1173 
1174 
1175 exit:
1176     offset = pos - buf;
1177     buf[0] = (uchar)(offset & 255);
1178     buf[1] = (uchar)((offset >> 8) & 255);
1179     buf[2] = (uchar)((offset >> 16) & 255);
1180     buf[3] = (uchar)((offset >> 24) & 255);
1181 
1182     return pos - buf;
1183 }
1184 
1185 
compare_toku_field(uchar * a_buf,uchar * b_buf,uchar * row_desc,uint32_t * a_bytes_read,uint32_t * b_bytes_read,uint32_t * row_desc_bytes_read,bool * read_string)1186 static inline int compare_toku_field(
1187     uchar* a_buf,
1188     uchar* b_buf,
1189     uchar* row_desc,
1190     uint32_t* a_bytes_read,
1191     uint32_t* b_bytes_read,
1192     uint32_t* row_desc_bytes_read,
1193     bool* read_string
1194     )
1195 {
1196     int ret_val = 0;
1197     uchar* row_desc_pos = row_desc;
1198     uint32_t num_bytes = 0;
1199     uint32_t length_bytes = 0;
1200     uint32_t charset_num = 0;
1201     bool is_unsigned = false;
1202 
1203     TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
1204     row_desc_pos++;
1205 
1206     switch (toku_type) {
1207     case (toku_type_hpk):
1208         ret_val = tokudb_compare_two_hidden_keys(
1209             a_buf,
1210             TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH,
1211             b_buf,
1212             TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH
1213             );
1214         *a_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1215         *b_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1216         break;
1217     case (toku_type_int):
1218         num_bytes = row_desc_pos[0];
1219         is_unsigned = row_desc_pos[1];
1220         ret_val = cmp_toku_int(
1221             a_buf,
1222             b_buf,
1223             is_unsigned,
1224             num_bytes
1225             );
1226         *a_bytes_read = num_bytes;
1227         *b_bytes_read = num_bytes;
1228         row_desc_pos += 2;
1229         break;
1230     case (toku_type_double):
1231         ret_val = cmp_toku_double(a_buf, b_buf);
1232         *a_bytes_read = sizeof(double);
1233         *b_bytes_read = sizeof(double);
1234         break;
1235     case (toku_type_float):
1236         ret_val = cmp_toku_float(a_buf, b_buf);
1237         *a_bytes_read = sizeof(float);
1238         *b_bytes_read = sizeof(float);
1239         break;
1240     case (toku_type_fixbinary):
1241         num_bytes = row_desc_pos[0];
1242         ret_val = cmp_toku_binary(a_buf, num_bytes, b_buf,num_bytes);
1243         *a_bytes_read = num_bytes;
1244         *b_bytes_read = num_bytes;
1245         row_desc_pos++;
1246         break;
1247     case (toku_type_varbinary):
1248         length_bytes = row_desc_pos[0];
1249         ret_val = cmp_toku_varbinary(
1250             a_buf,
1251             b_buf,
1252             length_bytes,
1253             a_bytes_read,
1254             b_bytes_read
1255             );
1256         row_desc_pos++;
1257         break;
1258     case (toku_type_fixstring):
1259     case (toku_type_varstring):
1260     case (toku_type_blob):
1261         length_bytes = row_desc_pos[0];
1262         row_desc_pos++;
1263         //
1264         // not sure we want to read charset_num like this
1265         //
1266         charset_num = *(uint32_t *)row_desc_pos;
1267         row_desc_pos += sizeof(uint32_t);
1268         ret_val = cmp_toku_varstring(
1269             a_buf,
1270             b_buf,
1271             length_bytes,
1272             charset_num,
1273             a_bytes_read,
1274             b_bytes_read
1275             );
1276         *read_string = true;
1277         break;
1278     default:
1279         assert_unreachable();
1280     }
1281 
1282     *row_desc_bytes_read = row_desc_pos - row_desc;
1283     return ret_val;
1284 }
1285 
1286 //
1287 // packs a field from a  MySQL buffer into a tokudb buffer.
1288 // Used for inserts/updates
1289 //
pack_toku_key_field(uchar * to_tokudb,uchar * from_mysql,Field * field,uint32_t key_part_length)1290 static uchar* pack_toku_key_field(
1291     uchar* to_tokudb,
1292     uchar* from_mysql,
1293     Field* field,
1294     uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1295     )
1296 {
1297     uchar* new_pos = NULL;
1298     uint32_t num_bytes = 0;
1299     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1300     switch(toku_type) {
1301     case (toku_type_int):
1302         assert_always(key_part_length == field->pack_length());
1303         new_pos = pack_toku_int(
1304             to_tokudb,
1305             from_mysql,
1306             field->pack_length()
1307             );
1308         goto exit;
1309     case (toku_type_double):
1310         assert_always(field->pack_length() == sizeof(double));
1311         assert_always(key_part_length == sizeof(double));
1312         new_pos = pack_toku_double(to_tokudb, from_mysql);
1313         goto exit;
1314     case (toku_type_float):
1315         assert_always(field->pack_length() == sizeof(float));
1316         assert_always(key_part_length == sizeof(float));
1317         new_pos = pack_toku_float(to_tokudb, from_mysql);
1318         goto exit;
1319     case (toku_type_fixbinary):
1320         num_bytes = field->pack_length();
1321         set_if_smaller(num_bytes, key_part_length);
1322         new_pos = pack_toku_binary(
1323             to_tokudb,
1324             from_mysql,
1325             num_bytes
1326             );
1327         goto exit;
1328     case (toku_type_fixstring):
1329         num_bytes = field->pack_length();
1330         set_if_smaller(num_bytes, key_part_length);
1331         new_pos = pack_toku_varstring(
1332             to_tokudb,
1333             from_mysql,
1334             get_length_bytes_from_max(key_part_length),
1335             0,
1336             num_bytes,
1337             field->charset()
1338             );
1339         goto exit;
1340     case (toku_type_varbinary):
1341         new_pos = pack_toku_varbinary(
1342             to_tokudb,
1343             from_mysql,
1344             ((Field_varstring *)field)->length_bytes,
1345             key_part_length
1346             );
1347         goto exit;
1348     case (toku_type_varstring):
1349         new_pos = pack_toku_varstring(
1350             to_tokudb,
1351             from_mysql,
1352             get_length_bytes_from_max(key_part_length),
1353             ((Field_varstring *)field)->length_bytes,
1354             key_part_length,
1355             field->charset()
1356             );
1357         goto exit;
1358     case (toku_type_blob):
1359         new_pos = pack_toku_blob(
1360             to_tokudb,
1361             from_mysql,
1362             get_length_bytes_from_max(key_part_length),
1363             ((Field_blob *)field)->row_pack_length(), //only calling this because packlength is returned
1364             key_part_length,
1365             field->charset()
1366             );
1367         goto exit;
1368     default:
1369         assert_unreachable();
1370     }
1371     assert_unreachable();
1372 exit:
1373     return new_pos;
1374 }
1375 
1376 //
1377 // packs a field from a  MySQL buffer into a tokudb buffer.
1378 // Used for queries. The only difference between this function
1379 // and pack_toku_key_field is that all variable sized columns
1380 // use 2 bytes to encode the length, regardless of the field
1381 // So varchar(4) will still use 2 bytes to encode the field
1382 //
pack_key_toku_key_field(uchar * to_tokudb,uchar * from_mysql,Field * field,uint32_t key_part_length)1383 static uchar* pack_key_toku_key_field(
1384     uchar* to_tokudb,
1385     uchar* from_mysql,
1386     Field* field,
1387     uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1388     )
1389 {
1390     uchar* new_pos = NULL;
1391     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1392     switch(toku_type) {
1393     case (toku_type_int):
1394     case (toku_type_double):
1395     case (toku_type_float):
1396     case (toku_type_fixbinary):
1397     case (toku_type_fixstring):
1398         new_pos = pack_toku_key_field(to_tokudb, from_mysql, field, key_part_length);
1399         goto exit;
1400     case (toku_type_varbinary):
1401         new_pos = pack_toku_varbinary(
1402             to_tokudb,
1403             from_mysql,
1404             2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1405             key_part_length
1406             );
1407         goto exit;
1408     case (toku_type_varstring):
1409     case (toku_type_blob):
1410         new_pos = pack_toku_varstring(
1411             to_tokudb,
1412             from_mysql,
1413             get_length_bytes_from_max(key_part_length),
1414             2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1415             key_part_length,
1416             field->charset()
1417             );
1418         goto exit;
1419     default:
1420         assert_unreachable();
1421     }
1422 
1423     assert_unreachable();
1424 exit:
1425     return new_pos;
1426 }
1427 
1428 
unpack_toku_key_field(uchar * to_mysql,uchar * from_tokudb,Field * field,uint32_t key_part_length)1429 uchar* unpack_toku_key_field(
1430     uchar* to_mysql,
1431     uchar* from_tokudb,
1432     Field* field,
1433     uint32_t key_part_length) {
1434 
1435     uchar* new_pos = NULL;
1436     uint32_t num_bytes = 0;
1437     uint32_t num_bytes_copied;
1438     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1439     switch(toku_type) {
1440     case (toku_type_int):
1441         assert_always(key_part_length == field->pack_length());
1442         new_pos = unpack_toku_int(
1443             to_mysql,
1444             from_tokudb,
1445             field->pack_length()
1446             );
1447         goto exit;
1448     case (toku_type_double):
1449         assert_always(field->pack_length() == sizeof(double));
1450         assert_always(key_part_length == sizeof(double));
1451         new_pos = unpack_toku_double(to_mysql, from_tokudb);
1452         goto exit;
1453     case (toku_type_float):
1454         assert_always(field->pack_length() == sizeof(float));
1455         assert_always(key_part_length == sizeof(float));
1456         new_pos = unpack_toku_float(to_mysql, from_tokudb);
1457         goto exit;
1458     case (toku_type_fixbinary):
1459         num_bytes = field->pack_length();
1460         set_if_smaller(num_bytes, key_part_length);
1461         new_pos = unpack_toku_binary(
1462             to_mysql,
1463             from_tokudb,
1464             num_bytes);
1465         goto exit;
1466     case (toku_type_fixstring):
1467         num_bytes = field->pack_length();
1468         new_pos = unpack_toku_varbinary(
1469             to_mysql,
1470             from_tokudb,
1471             get_length_bytes_from_max(key_part_length),
1472             0);
1473         num_bytes_copied =
1474             new_pos -
1475             (from_tokudb + get_length_bytes_from_max(key_part_length));
1476         assert_always(num_bytes_copied <= num_bytes);
1477         memset(
1478             to_mysql + num_bytes_copied,
1479             field->charset()->pad_char,
1480             num_bytes - num_bytes_copied);
1481         goto exit;
1482     case (toku_type_varbinary):
1483     case (toku_type_varstring):
1484         new_pos = unpack_toku_varbinary(
1485             to_mysql,
1486             from_tokudb,
1487             get_length_bytes_from_max(key_part_length),
1488             ((Field_varstring*)field)->length_bytes);
1489         goto exit;
1490     case (toku_type_blob):
1491         new_pos = unpack_toku_blob(
1492             to_mysql,
1493             from_tokudb,
1494             get_length_bytes_from_max(key_part_length),
1495             //only calling this because packlength is returned
1496             ((Field_blob *)field)->row_pack_length());
1497         goto exit;
1498     default:
1499         assert_unreachable();
1500     }
1501     assert_unreachable();
1502 exit:
1503     return new_pos;
1504 }
1505 
1506 
tokudb_compare_two_keys(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size,const void * row_desc,const uint32_t row_desc_size,bool cmp_prefix,bool * read_string)1507 static int tokudb_compare_two_keys(
1508     const void* new_key_data,
1509     const uint32_t new_key_size,
1510     const void*  saved_key_data,
1511     const uint32_t saved_key_size,
1512     const void*  row_desc,
1513     const uint32_t row_desc_size,
1514     bool cmp_prefix,
1515     bool* read_string) {
1516 
1517     int ret_val = 0;
1518     int8_t new_key_inf_val = COL_NEG_INF;
1519     int8_t saved_key_inf_val = COL_NEG_INF;
1520 
1521     uchar* row_desc_ptr = (uchar *)row_desc;
1522     uchar *new_key_ptr = (uchar *)new_key_data;
1523     uchar *saved_key_ptr = (uchar *)saved_key_data;
1524 
1525     uint32_t new_key_bytes_left = new_key_size;
1526     uint32_t saved_key_bytes_left = saved_key_size;
1527 
1528     //
1529     // if the keys have an infinity byte, set it
1530     //
1531     if (row_desc_ptr[0]) {
1532         new_key_inf_val = (int8_t)new_key_ptr[0];
1533         saved_key_inf_val = (int8_t)saved_key_ptr[0];
1534         new_key_ptr++;
1535         saved_key_ptr++;
1536     }
1537     row_desc_ptr++;
1538 
1539     while ((uint32_t)(new_key_ptr - (uchar*)new_key_data) < new_key_size &&
1540            (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) < saved_key_size &&
1541            (uint32_t)(row_desc_ptr - (uchar*)row_desc) < row_desc_size) {
1542         uint32_t new_key_field_length;
1543         uint32_t saved_key_field_length;
1544         uint32_t row_desc_field_length;
1545         //
1546         // if there is a null byte at this point in the key
1547         //
1548         if (row_desc_ptr[0]) {
1549             //
1550             // compare null bytes. If different, return
1551             //
1552             if (new_key_ptr[0] != saved_key_ptr[0]) {
1553                 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1554                 goto exit;
1555             }
1556             saved_key_ptr++;
1557             //
1558             // in case we just read the fact that new_key_ptr and saved_key_ptr
1559             // have NULL as their next field
1560             //
1561             if (!*new_key_ptr++) {
1562                 //
1563                 // skip row_desc_ptr[0] read in if clause
1564                 //
1565                 row_desc_ptr++;
1566                 //
1567                 // skip data that describes rest of field
1568                 //
1569                 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1570                 continue;
1571             }
1572         }
1573         row_desc_ptr++;
1574 
1575         ret_val = compare_toku_field(
1576             new_key_ptr,
1577             saved_key_ptr,
1578             row_desc_ptr,
1579             &new_key_field_length,
1580             &saved_key_field_length,
1581             &row_desc_field_length,
1582             read_string);
1583         new_key_ptr += new_key_field_length;
1584         saved_key_ptr += saved_key_field_length;
1585         row_desc_ptr += row_desc_field_length;
1586         if (ret_val) {
1587             goto exit;
1588         }
1589 
1590         assert_always(
1591             (uint32_t)(new_key_ptr - (uchar*)new_key_data) <= new_key_size);
1592         assert_always(
1593             (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) <= saved_key_size);
1594         assert_always(
1595             (uint32_t)(row_desc_ptr - (uchar*)row_desc) <= row_desc_size);
1596     }
1597     new_key_bytes_left =
1598         new_key_size - ((uint32_t)(new_key_ptr - (uchar*)new_key_data));
1599     saved_key_bytes_left =
1600         saved_key_size - ((uint32_t)(saved_key_ptr - (uchar*)saved_key_data));
1601     if (cmp_prefix) {
1602         ret_val = 0;
1603     } else if (new_key_bytes_left== 0 && saved_key_bytes_left== 0) {
1604         // in this case, read both keys to completion, now read infinity byte
1605         ret_val = new_key_inf_val - saved_key_inf_val;
1606     } else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) {
1607         // at this point, one SHOULD be 0
1608         ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1;
1609     } else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) {
1610         ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1;
1611     } else {
1612         // this should never happen, perhaps we should assert(false)
1613         assert_unreachable();
1614         ret_val = new_key_bytes_left - saved_key_bytes_left;
1615     }
1616 exit:
1617     return ret_val;
1618 }
1619 
simple_memcmp(const DBT * keya,const DBT * keyb)1620 static int simple_memcmp(const DBT *keya, const DBT *keyb) {
1621     int cmp;
1622     int num_bytes_cmp = keya->size < keyb->size ?
1623         keya->size : keyb->size;
1624     cmp = memcmp(keya->data,keyb->data,num_bytes_cmp);
1625     if (cmp == 0 && (keya->size != keyb->size)) {
1626         cmp = keya->size < keyb->size ? -1 : 1;
1627     }
1628     return cmp;
1629 }
1630 
1631 // comparison function to be used by the fractal trees.
tokudb_cmp_dbt_key(DB * file,const DBT * keya,const DBT * keyb)1632 static int tokudb_cmp_dbt_key(DB* file, const DBT *keya, const DBT *keyb) {
1633     int cmp;
1634     if (file->cmp_descriptor->dbt.size == 0) {
1635         cmp = simple_memcmp(keya, keyb);
1636     }
1637     else {
1638         bool read_string = false;
1639         cmp = tokudb_compare_two_keys(
1640             keya->data,
1641             keya->size,
1642             keyb->data,
1643             keyb->size,
1644             (uchar *)file->cmp_descriptor->dbt.data + 4,
1645             (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1646             false,
1647             &read_string
1648             );
1649         // comparison above may be case-insensitive, but fractal tree
1650         // needs to distinguish between different data, so we do this
1651         // additional check here
1652         if (read_string && (cmp == 0)) {
1653             cmp = simple_memcmp(keya, keyb);
1654         }
1655     }
1656     return cmp;
1657 }
1658 
1659 //TODO: QQQ Only do one direction for prefix.
tokudb_prefix_cmp_dbt_key(DB * file,const DBT * keya,const DBT * keyb)1660 static int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb) {
1661     // calls to this function are done by the handlerton, and are
1662     // comparing just the keys as MySQL would compare them.
1663     bool read_string = false;
1664     int cmp = tokudb_compare_two_keys(
1665         keya->data,
1666         keya->size,
1667         keyb->data,
1668         keyb->size,
1669         (uchar *)file->cmp_descriptor->dbt.data + 4,
1670         *(uint32_t *)file->cmp_descriptor->dbt.data - 4,
1671         true,
1672         &read_string
1673         );
1674     return cmp;
1675 }
1676 
tokudb_compare_two_key_parts(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size,const void * row_desc,const uint32_t row_desc_size,uint max_parts)1677 static int tokudb_compare_two_key_parts(
1678     const void* new_key_data,
1679     const uint32_t new_key_size,
1680     const void*  saved_key_data,
1681     const uint32_t saved_key_size,
1682     const void*  row_desc,
1683     const uint32_t row_desc_size,
1684     uint max_parts
1685     )
1686 {
1687     int ret_val = 0;
1688 
1689     uchar* row_desc_ptr = (uchar *)row_desc;
1690     uchar *new_key_ptr = (uchar *)new_key_data;
1691     uchar *saved_key_ptr = (uchar *)saved_key_data;
1692 
1693     //
1694     // if the keys have an infinity byte, set it
1695     //
1696     if (row_desc_ptr[0]) {
1697         // new_key_inf_val = (int8_t)new_key_ptr[0];
1698         // saved_key_inf_val = (int8_t)saved_key_ptr[0];
1699         new_key_ptr++;
1700         saved_key_ptr++;
1701     }
1702     row_desc_ptr++;
1703 
1704     for (uint i = 0; i < max_parts; i++) {
1705         if (!((uint32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size &&
1706                (uint32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size &&
1707                (uint32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size))
1708             break;
1709         uint32_t new_key_field_length;
1710         uint32_t saved_key_field_length;
1711         uint32_t row_desc_field_length;
1712         //
1713         // if there is a null byte at this point in the key
1714         //
1715         if (row_desc_ptr[0]) {
1716             //
1717             // compare null bytes. If different, return
1718             //
1719             if (new_key_ptr[0] != saved_key_ptr[0]) {
1720                 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1721                 goto exit;
1722             }
1723             saved_key_ptr++;
1724             //
1725             // in case we just read the fact that new_key_ptr and saved_key_ptr
1726             // have NULL as their next field
1727             //
1728             if (!*new_key_ptr++) {
1729                 //
1730                 // skip row_desc_ptr[0] read in if clause
1731                 //
1732                 row_desc_ptr++;
1733                 //
1734                 // skip data that describes rest of field
1735                 //
1736                 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1737                 continue;
1738             }
1739         }
1740         row_desc_ptr++;
1741         bool read_string = false;
1742         ret_val = compare_toku_field(
1743             new_key_ptr,
1744             saved_key_ptr,
1745             row_desc_ptr,
1746             &new_key_field_length,
1747             &saved_key_field_length,
1748             &row_desc_field_length,
1749             &read_string
1750             );
1751         new_key_ptr += new_key_field_length;
1752         saved_key_ptr += saved_key_field_length;
1753         row_desc_ptr += row_desc_field_length;
1754         if (ret_val) {
1755             goto exit;
1756         }
1757 
1758         assert_always((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
1759         assert_always((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
1760         assert_always((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
1761     }
1762 
1763     ret_val = 0;
1764 exit:
1765     return ret_val;
1766 }
1767 
tokudb_cmp_dbt_key_parts(DB * file,const DBT * keya,const DBT * keyb,uint max_parts)1768 static int tokudb_cmp_dbt_key_parts(DB *file, const DBT *keya, const DBT *keyb, uint max_parts) {
1769     assert_always(file->cmp_descriptor->dbt.size);
1770     return tokudb_compare_two_key_parts(
1771             keya->data,
1772             keya->size,
1773             keyb->data,
1774             keyb->size,
1775             (uchar *)file->cmp_descriptor->dbt.data + 4,
1776             (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1777             max_parts);
1778 }
1779 
create_toku_main_key_pack_descriptor(uchar * buf)1780 static uint32_t create_toku_main_key_pack_descriptor (
1781     uchar* buf
1782     )
1783 {
1784     //
1785     // The first four bytes always contain the offset of where the first key
1786     // ends.
1787     //
1788     uchar* pos = buf + 4;
1789     uint32_t offset = 0;
1790     //
1791     // one byte states if this is the main dictionary
1792     //
1793     pos[0] = 1;
1794     pos++;
1795     goto exit;
1796 
1797 
1798 exit:
1799     offset = pos - buf;
1800     buf[0] = (uchar)(offset & 255);
1801     buf[1] = (uchar)((offset >> 8) & 255);
1802     buf[2] = (uchar)((offset >> 16) & 255);
1803     buf[3] = (uchar)((offset >> 24) & 255);
1804 
1805     return pos - buf;
1806 }
1807 
1808 #define COL_HAS_NO_CHARSET 0x44
1809 #define COL_HAS_CHARSET 0x55
1810 
1811 #define COL_FIX_PK_OFFSET 0x66
1812 #define COL_VAR_PK_OFFSET 0x77
1813 
1814 #define CK_FIX_RANGE 0x88
1815 #define CK_VAR_RANGE 0x99
1816 
1817 #define COPY_OFFSET_TO_BUF  memcpy ( \
1818     pos, \
1819     &kc_info->cp_info[pk_index][field_index].col_pack_val, \
1820     sizeof(uint32_t) \
1821     ); \
1822     pos += sizeof(uint32_t);
1823 
1824 
pack_desc_pk_info(uchar * buf,KEY_AND_COL_INFO * kc_info,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1825 static uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1826     uchar* pos = buf;
1827     uint16 field_index = key_part->field->field_index;
1828     Field* field = table_share->field[field_index];
1829     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1830     uint32_t key_part_length = key_part->length;
1831     uint32_t field_length;
1832     uchar len_bytes = 0;
1833 
1834     switch(toku_type) {
1835     case (toku_type_int):
1836     case (toku_type_double):
1837     case (toku_type_float):
1838         pos[0] = COL_FIX_FIELD;
1839         pos++;
1840         assert_always(kc_info->field_lengths[field_index] < 256);
1841         pos[0] = kc_info->field_lengths[field_index];
1842         pos++;
1843         break;
1844     case (toku_type_fixbinary):
1845         pos[0] = COL_FIX_FIELD;
1846         pos++;
1847         field_length = field->pack_length();
1848         set_if_smaller(key_part_length, field_length);
1849         assert_always(key_part_length < 256);
1850         pos[0] = (uchar)key_part_length;
1851         pos++;
1852         break;
1853     case (toku_type_fixstring):
1854     case (toku_type_varbinary):
1855     case (toku_type_varstring):
1856     case (toku_type_blob):
1857         pos[0] = COL_VAR_FIELD;
1858         pos++;
1859         len_bytes = (key_part_length > 255) ? 2 : 1;
1860         pos[0] = len_bytes;
1861         pos++;
1862         break;
1863     default:
1864         assert_unreachable();
1865     }
1866 
1867     return pos - buf;
1868 }
1869 
pack_desc_pk_offset_info(uchar * buf,KEY_PART_INFO * key_part,KEY * prim_key,uchar * pk_info)1870 static uint32_t pack_desc_pk_offset_info(uchar* buf,
1871                                          KEY_PART_INFO* key_part,
1872                                          KEY* prim_key,
1873                                          uchar* pk_info) {
1874     uchar* pos = buf;
1875     uint16 field_index = key_part->field->field_index;
1876     bool found_col_in_pk = false;
1877     uint32_t index_in_pk;
1878 
1879     bool is_constant_offset = true;
1880     uint32_t offset = 0;
1881     for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
1882         KEY_PART_INFO curr = prim_key->key_part[i];
1883         uint16 curr_field_index = curr.field->field_index;
1884 
1885         if (pk_info[2*i] == COL_VAR_FIELD) {
1886             is_constant_offset = false;
1887         }
1888 
1889         if (curr_field_index == field_index) {
1890             found_col_in_pk = true;
1891             index_in_pk = i;
1892             break;
1893         }
1894         offset += pk_info[2*i + 1];
1895     }
1896     assert_always(found_col_in_pk);
1897     if (is_constant_offset) {
1898         pos[0] = COL_FIX_PK_OFFSET;
1899         pos++;
1900 
1901         memcpy (pos, &offset, sizeof(offset));
1902         pos += sizeof(offset);
1903     }
1904     else {
1905         pos[0] = COL_VAR_PK_OFFSET;
1906         pos++;
1907 
1908         memcpy(pos, &index_in_pk, sizeof(index_in_pk));
1909         pos += sizeof(index_in_pk);
1910     }
1911     return pos - buf;
1912 }
1913 
pack_desc_offset_info(uchar * buf,KEY_AND_COL_INFO * kc_info,uint pk_index,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1914 static uint32_t pack_desc_offset_info(uchar* buf, KEY_AND_COL_INFO* kc_info, uint pk_index, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1915     uchar* pos = buf;
1916     uint16 field_index = key_part->field->field_index;
1917     Field* field = table_share->field[field_index];
1918     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1919     bool found_index = false;
1920 
1921     switch(toku_type) {
1922     case (toku_type_int):
1923     case (toku_type_double):
1924     case (toku_type_float):
1925     case (toku_type_fixbinary):
1926     case (toku_type_fixstring):
1927         pos[0] = COL_FIX_FIELD;
1928         pos++;
1929 
1930         // copy the offset
1931         COPY_OFFSET_TO_BUF;
1932         break;
1933     case (toku_type_varbinary):
1934     case (toku_type_varstring):
1935         pos[0] = COL_VAR_FIELD;
1936         pos++;
1937 
1938         // copy the offset
1939         COPY_OFFSET_TO_BUF;
1940         break;
1941     case (toku_type_blob):
1942         pos[0] = COL_BLOB_FIELD;
1943         pos++;
1944         for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
1945             uint32_t blob_index = kc_info->blob_fields[i];
1946             if (blob_index == field_index) {
1947                 uint32_t val = i;
1948                 memcpy(pos, &val, sizeof(uint32_t));
1949                 pos += sizeof(uint32_t);
1950                 found_index = true;
1951                 break;
1952             }
1953         }
1954         assert_always(found_index);
1955         break;
1956     default:
1957         assert_unreachable();
1958     }
1959 
1960     return pos - buf;
1961 }
1962 
pack_desc_key_length_info(uchar * buf,KEY_AND_COL_INFO * kc_info,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1963 static uint32_t pack_desc_key_length_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1964     uchar* pos = buf;
1965     uint16 field_index = key_part->field->field_index;
1966     Field* field = table_share->field[field_index];
1967     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1968     uint32_t key_part_length = key_part->length;
1969     uint32_t field_length;
1970 
1971     switch(toku_type) {
1972     case (toku_type_int):
1973     case (toku_type_double):
1974     case (toku_type_float):
1975         // copy the key_part length
1976         field_length = kc_info->field_lengths[field_index];
1977         memcpy(pos, &field_length, sizeof(field_length));
1978         pos += sizeof(key_part_length);
1979         break;
1980     case (toku_type_fixbinary):
1981     case (toku_type_fixstring):
1982         field_length = field->pack_length();
1983         set_if_smaller(key_part_length, field_length);
1984         // fallthrough
1985     case (toku_type_varbinary):
1986     case (toku_type_varstring):
1987     case (toku_type_blob):
1988         // copy the key_part length
1989         memcpy(pos, &key_part_length, sizeof(key_part_length));
1990         pos += sizeof(key_part_length);
1991         break;
1992     default:
1993         assert_unreachable();
1994     }
1995 
1996     return pos - buf;
1997 }
1998 
pack_desc_char_info(uchar * buf,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1999 static uint32_t pack_desc_char_info(uchar* buf,
2000                                     TABLE_SHARE* table_share,
2001                                     KEY_PART_INFO* key_part) {
2002     uchar* pos = buf;
2003     uint16 field_index = key_part->field->field_index;
2004     Field* field = table_share->field[field_index];
2005     TOKU_TYPE toku_type = mysql_to_toku_type(field);
2006     uint32_t charset_num = 0;
2007 
2008     switch(toku_type) {
2009     case (toku_type_int):
2010     case (toku_type_double):
2011     case (toku_type_float):
2012     case (toku_type_fixbinary):
2013     case (toku_type_varbinary):
2014         pos[0] = COL_HAS_NO_CHARSET;
2015         pos++;
2016         break;
2017     case (toku_type_fixstring):
2018     case (toku_type_varstring):
2019     case (toku_type_blob):
2020         pos[0] = COL_HAS_CHARSET;
2021         pos++;
2022 
2023         // copy the charset
2024         charset_num = field->charset()->number;
2025         pos[0] = (uchar)(charset_num & 255);
2026         pos[1] = (uchar)((charset_num >> 8) & 255);
2027         pos[2] = (uchar)((charset_num >> 16) & 255);
2028         pos[3] = (uchar)((charset_num >> 24) & 255);
2029         pos += 4;
2030         break;
2031     default:
2032         assert_unreachable();
2033     }
2034 
2035     return pos - buf;
2036 }
2037 
pack_some_row_info(uchar * buf,uint pk_index,TABLE_SHARE * table_share,KEY_AND_COL_INFO * kc_info)2038 static uint32_t pack_some_row_info (
2039     uchar* buf,
2040     uint pk_index,
2041     TABLE_SHARE* table_share,
2042     KEY_AND_COL_INFO* kc_info
2043     )
2044 {
2045     uchar* pos = buf;
2046     uint32_t num_null_bytes = 0;
2047     //
2048     // four bytes stating number of null bytes
2049     //
2050     num_null_bytes = table_share->null_bytes;
2051     memcpy(pos, &num_null_bytes, sizeof(num_null_bytes));
2052     pos += sizeof(num_null_bytes);
2053     //
2054     // eight bytes stating mcp_info
2055     //
2056     memcpy(pos, &kc_info->mcp_info[pk_index], sizeof(MULTI_COL_PACK_INFO));
2057     pos += sizeof(MULTI_COL_PACK_INFO);
2058     //
2059     // one byte for the number of offset bytes
2060     //
2061     pos[0] = (uchar)kc_info->num_offset_bytes;
2062     pos++;
2063 
2064     return pos - buf;
2065 }
2066 
get_max_clustering_val_pack_desc_size(TABLE_SHARE * table_share)2067 static uint32_t get_max_clustering_val_pack_desc_size(
2068     TABLE_SHARE* table_share
2069     )
2070 {
2071     uint32_t ret_val = 0;
2072     //
2073     // the fixed stuff:
2074     //  first the things in pack_some_row_info
2075     //  second another mcp_info
2076     //  third a byte that states if blobs exist
2077     ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2078     ret_val += sizeof(MULTI_COL_PACK_INFO);
2079     ret_val++;
2080     //
2081     // now the variable stuff
2082     //  an upper bound is, for each field, byte stating if it is fixed or var, followed
2083     // by 8 bytes for endpoints
2084     //
2085     ret_val += (table_share->fields)*(1 + 2*sizeof(uint32_t));
2086     //
2087     // four bytes storing the length of this portion
2088     //
2089     ret_val += 4;
2090 
2091     return ret_val;
2092 }
2093 
create_toku_clustering_val_pack_descriptor(uchar * buf,uint pk_index,TABLE_SHARE * table_share,KEY_AND_COL_INFO * kc_info,uint32_t keynr,bool is_clustering)2094 static uint32_t create_toku_clustering_val_pack_descriptor (
2095     uchar* buf,
2096     uint pk_index,
2097     TABLE_SHARE* table_share,
2098     KEY_AND_COL_INFO* kc_info,
2099     uint32_t keynr,
2100     bool is_clustering
2101     )
2102 {
2103     uchar* pos = buf + 4;
2104     uint32_t offset = 0;
2105     bool start_range_set = false;
2106     uint32_t last_col = 0;
2107     //
2108     // do not need to write anything if the key is not clustering
2109     //
2110     if (!is_clustering) {
2111         goto exit;
2112     }
2113 
2114     pos += pack_some_row_info(
2115         pos,
2116         pk_index,
2117         table_share,
2118         kc_info
2119         );
2120 
2121     //
2122     // eight bytes stating mcp_info of clustering key
2123     //
2124     memcpy(pos, &kc_info->mcp_info[keynr], sizeof(MULTI_COL_PACK_INFO));
2125     pos += sizeof(MULTI_COL_PACK_INFO);
2126 
2127     //
2128     // store bit that states if blobs exist
2129     //
2130     pos[0] = (kc_info->num_blobs) ? 1 : 0;
2131     pos++;
2132 
2133     //
2134     // descriptor assumes that all fields filtered from pk are
2135     // also filtered from clustering key val. Doing check here to
2136     // make sure something unexpected does not happen
2137     //
2138     for (uint i = 0; i < table_share->fields; i++) {
2139         bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2140         bool col_filtered_in_pk = bitmap_is_set(&kc_info->key_filters[pk_index],i);
2141         if (col_filtered_in_pk) {
2142             assert_always(col_filtered);
2143         }
2144     }
2145 
2146     //
2147     // first handle the fixed fields
2148     //
2149     start_range_set = false;
2150     last_col = 0;
2151     for (uint i = 0; i < table_share->fields; i++) {
2152         bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2153         if (!is_fixed_field(kc_info, i)) {
2154             //
2155             // not a fixed field, continue
2156             //
2157             continue;
2158         }
2159         if (col_filtered && start_range_set) {
2160             //
2161             // need to set the end range
2162             //
2163             start_range_set = false;
2164             uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val + kc_info->field_lengths[last_col];
2165             memcpy(pos, &end_offset, sizeof(end_offset));
2166             pos += sizeof(end_offset);
2167         }
2168         else if (!col_filtered) {
2169             if (!start_range_set) {
2170                 pos[0] = CK_FIX_RANGE;
2171                 pos++;
2172                 start_range_set = true;
2173                 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2174                 memcpy(pos, &start_offset , sizeof(start_offset));
2175                 pos += sizeof(start_offset);
2176             }
2177             last_col = i;
2178         }
2179         else {
2180             continue;
2181         }
2182     }
2183     if (start_range_set) {
2184         //
2185         // need to set the end range
2186         //
2187         start_range_set = false;
2188         uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val+ kc_info->field_lengths[last_col];
2189         memcpy(pos, &end_offset, sizeof(end_offset));
2190         pos += sizeof(end_offset);
2191     }
2192 
2193     //
2194     // now handle the var fields
2195     //
2196     start_range_set = false;
2197     last_col = 0;
2198     for (uint i = 0; i < table_share->fields; i++) {
2199         bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2200         if (!is_variable_field(kc_info, i)) {
2201             //
2202             // not a var field, continue
2203             //
2204             continue;
2205         }
2206         if (col_filtered && start_range_set) {
2207             //
2208             // need to set the end range
2209             //
2210             start_range_set = false;
2211             uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2212             memcpy(pos, &end_offset, sizeof(end_offset));
2213             pos += sizeof(end_offset);
2214         }
2215         else if (!col_filtered) {
2216             if (!start_range_set) {
2217                 pos[0] = CK_VAR_RANGE;
2218                 pos++;
2219 
2220                 start_range_set = true;
2221                 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2222                 memcpy(pos, &start_offset , sizeof(start_offset));
2223                 pos += sizeof(start_offset);
2224             }
2225             last_col = i;
2226         }
2227         else {
2228             continue;
2229         }
2230     }
2231     if (start_range_set) {
2232         start_range_set = false;
2233         uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2234         memcpy(pos, &end_offset, sizeof(end_offset));
2235         pos += sizeof(end_offset);
2236     }
2237 
2238 exit:
2239     offset = pos - buf;
2240     buf[0] = (uchar)(offset & 255);
2241     buf[1] = (uchar)((offset >> 8) & 255);
2242     buf[2] = (uchar)((offset >> 16) & 255);
2243     buf[3] = (uchar)((offset >> 24) & 255);
2244 
2245     return pos - buf;
2246 }
2247 
pack_clustering_val_from_desc(uchar * buf,void * row_desc,uint32_t row_desc_size,const DBT * pk_val)2248 static uint32_t pack_clustering_val_from_desc(
2249     uchar* buf,
2250     void* row_desc,
2251     uint32_t row_desc_size,
2252     const DBT* pk_val
2253     )
2254 {
2255     uchar* null_bytes_src_ptr = NULL;
2256     uchar* fixed_src_ptr = NULL;
2257     uchar* var_src_offset_ptr = NULL;
2258     uchar* var_src_data_ptr = NULL;
2259     uchar* fixed_dest_ptr = NULL;
2260     uchar* var_dest_offset_ptr = NULL;
2261     uchar* var_dest_data_ptr = NULL;
2262     uchar* orig_var_dest_data_ptr = NULL;
2263     uchar* desc_pos = (uchar *)row_desc;
2264     uint32_t num_null_bytes = 0;
2265     uint32_t num_offset_bytes;
2266     MULTI_COL_PACK_INFO src_mcp_info, dest_mcp_info;
2267     uchar has_blobs;
2268 
2269     memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2270     desc_pos += sizeof(num_null_bytes);
2271 
2272     memcpy(&src_mcp_info, desc_pos, sizeof(src_mcp_info));
2273     desc_pos += sizeof(src_mcp_info);
2274 
2275     num_offset_bytes = desc_pos[0];
2276     desc_pos++;
2277 
2278     memcpy(&dest_mcp_info, desc_pos, sizeof(dest_mcp_info));
2279     desc_pos += sizeof(dest_mcp_info);
2280 
2281     has_blobs = desc_pos[0];
2282     desc_pos++;
2283 
2284     //
2285     //set the variables
2286     //
2287     null_bytes_src_ptr = (uchar *)pk_val->data;
2288     fixed_src_ptr = null_bytes_src_ptr + num_null_bytes;
2289     var_src_offset_ptr = fixed_src_ptr + src_mcp_info.fixed_field_size;
2290     var_src_data_ptr = var_src_offset_ptr + src_mcp_info.len_of_offsets;
2291 
2292     fixed_dest_ptr = buf + num_null_bytes;
2293     var_dest_offset_ptr = fixed_dest_ptr + dest_mcp_info.fixed_field_size;
2294     var_dest_data_ptr = var_dest_offset_ptr + dest_mcp_info.len_of_offsets;
2295     orig_var_dest_data_ptr = var_dest_data_ptr;
2296 
2297     //
2298     // copy the null bytes
2299     //
2300     memcpy(buf, null_bytes_src_ptr, num_null_bytes);
2301     while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2302         uint32_t start, end, length;
2303         uchar curr = desc_pos[0];
2304         desc_pos++;
2305 
2306         memcpy(&start, desc_pos, sizeof(start));
2307         desc_pos += sizeof(start);
2308 
2309         memcpy(&end, desc_pos, sizeof(end));
2310         desc_pos += sizeof(end);
2311 
2312         assert_always (start <= end);
2313 
2314         if (curr == CK_FIX_RANGE) {
2315             length = end - start;
2316 
2317             memcpy(fixed_dest_ptr, fixed_src_ptr + start, length);
2318             fixed_dest_ptr += length;
2319         }
2320         else if (curr == CK_VAR_RANGE) {
2321             uint32_t start_data_size;
2322             uint32_t start_data_offset;
2323             uint32_t end_data_size;
2324             uint32_t end_data_offset;
2325             uint32_t offset_diffs;
2326 
2327             get_var_field_info(
2328                 &start_data_size,
2329                 &start_data_offset,
2330                 start,
2331                 var_src_offset_ptr,
2332                 num_offset_bytes
2333                 );
2334             get_var_field_info(
2335                 &end_data_size,
2336                 &end_data_offset,
2337                 end,
2338                 var_src_offset_ptr,
2339                 num_offset_bytes
2340                 );
2341             length = end_data_offset + end_data_size - start_data_offset;
2342             //
2343             // copy the data
2344             //
2345             memcpy(
2346                 var_dest_data_ptr,
2347                 var_src_data_ptr + start_data_offset,
2348                 length
2349                 );
2350             var_dest_data_ptr += length;
2351 
2352             //
2353             // put in offset info
2354             //
2355             offset_diffs = (end_data_offset + end_data_size) - (uint32_t)(var_dest_data_ptr - orig_var_dest_data_ptr);
2356             for (uint32_t i = start; i <= end; i++) {
2357                 if ( num_offset_bytes == 1 ) {
2358                     assert_always(offset_diffs < 256);
2359                     var_dest_offset_ptr[0] = var_src_offset_ptr[i] - (uchar)offset_diffs;
2360                     var_dest_offset_ptr++;
2361                 } else if ( num_offset_bytes == 2 ) {
2362                     uint32_t tmp = uint2korr(var_src_offset_ptr + 2*i);
2363                     uint32_t new_offset = tmp - offset_diffs;
2364                     assert_always(new_offset < 1<<16);
2365                     int2store(var_dest_offset_ptr,new_offset);
2366                     var_dest_offset_ptr += 2;
2367                 } else {
2368                     assert_unreachable();
2369                 }
2370             }
2371         } else {
2372             assert_unreachable();
2373         }
2374     }
2375     //
2376     // copy blobs
2377     // at this point, var_dest_data_ptr is pointing to the end, where blobs should be located
2378     // so, we put the blobs at var_dest_data_ptr
2379     //
2380     if (has_blobs) {
2381         uint32_t num_blob_bytes;
2382         uint32_t start_offset;
2383         uchar* src_blob_ptr = NULL;
2384         get_blob_field_info(
2385             &start_offset,
2386             src_mcp_info.len_of_offsets,
2387             var_src_data_ptr,
2388             num_offset_bytes
2389             );
2390         src_blob_ptr = var_src_data_ptr + start_offset;
2391         num_blob_bytes = pk_val->size - (start_offset + (var_src_data_ptr - null_bytes_src_ptr));
2392         memcpy(var_dest_data_ptr, src_blob_ptr, num_blob_bytes);
2393         var_dest_data_ptr += num_blob_bytes;
2394     }
2395     return var_dest_data_ptr - buf;
2396 }
2397 
2398 
get_max_secondary_key_pack_desc_size(KEY_AND_COL_INFO * kc_info)2399 static uint32_t get_max_secondary_key_pack_desc_size(
2400     KEY_AND_COL_INFO* kc_info
2401     )
2402 {
2403     uint32_t ret_val = 0;
2404     //
2405     // the fixed stuff:
2406     //  byte that states if main dictionary
2407     //  byte that states if hpk
2408     //  the things in pack_some_row_info
2409     ret_val++;
2410     ret_val++;
2411     ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2412     //
2413     // now variable sized stuff
2414     //
2415 
2416     //  first the blobs
2417     ret_val += sizeof(kc_info->num_blobs);
2418     ret_val+= kc_info->num_blobs;
2419 
2420     // then the pk
2421     // one byte for num key parts
2422     // two bytes for each key part
2423     ret_val++;
2424     ret_val += MAX_REF_PARTS*2;
2425 
2426     // then the key
2427     // null bit, then null byte,
2428     // then 1 byte stating what it is, then 4 for offset, 4 for key length,
2429     //      1 for if charset exists, and 4 for charset
2430     ret_val += MAX_REF_PARTS*(1 + sizeof(uint32_t) + 1 + 3*sizeof(uint32_t) + 1);
2431     //
2432     // four bytes storing the length of this portion
2433     //
2434     ret_val += 4;
2435     return ret_val;
2436 }
2437 
create_toku_secondary_key_pack_descriptor(uchar * buf,bool has_hpk,uint pk_index,TABLE_SHARE * table_share,TABLE * table,KEY_AND_COL_INFO * kc_info,KEY * key_info,KEY * prim_key)2438 static uint32_t create_toku_secondary_key_pack_descriptor (
2439     uchar* buf,
2440     bool has_hpk,
2441     uint pk_index,
2442     TABLE_SHARE* table_share,
2443     TABLE* table,
2444     KEY_AND_COL_INFO* kc_info,
2445     KEY* key_info,
2446     KEY* prim_key
2447     )
2448 {
2449     //
2450     // The first four bytes always contain the offset of where the first key
2451     // ends.
2452     //
2453     uchar* pk_info = NULL;
2454     uchar* pos = buf + 4;
2455     uint32_t offset = 0;
2456 
2457     //
2458     // first byte states that it is NOT main dictionary
2459     //
2460     pos[0] = 0;
2461     pos++;
2462 
2463     //
2464     // one byte states if main dictionary has an hpk or not
2465     //
2466     if (has_hpk) {
2467         pos[0] = 1;
2468     }
2469     else {
2470         pos[0] = 0;
2471     }
2472     pos++;
2473 
2474     pos += pack_some_row_info(
2475         pos,
2476         pk_index,
2477         table_share,
2478         kc_info
2479         );
2480 
2481     //
2482     // store blob information
2483     //
2484     memcpy(pos, &kc_info->num_blobs, sizeof(kc_info->num_blobs));
2485     pos += sizeof(uint32_t);
2486     for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
2487         //
2488         // store length bytes for each blob
2489         //
2490         Field* field = table_share->field[kc_info->blob_fields[i]];
2491         pos[0] = (uchar)field->row_pack_length();
2492         pos++;
2493     }
2494 
2495     //
2496     // store the pk information
2497     //
2498     if (has_hpk) {
2499         pos[0] = 0;
2500         pos++;
2501     }
2502     else {
2503         //
2504         // store number of parts
2505         //
2506         assert_always(prim_key->user_defined_key_parts < 128);
2507         pos[0] = 2 * prim_key->user_defined_key_parts;
2508         pos++;
2509         //
2510         // for each part, store if it is a fixed field or var field
2511         // if fixed, store number of bytes, if var, store
2512         // number of length bytes
2513         // total should be two bytes per key part stored
2514         //
2515         pk_info = pos;
2516         uchar* tmp = pos;
2517         for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
2518             tmp += pack_desc_pk_info(
2519                 tmp,
2520                 kc_info,
2521                 table_share,
2522                 &prim_key->key_part[i]
2523                 );
2524         }
2525         //
2526         // asserting that we moved forward as much as we think we have
2527         //
2528         assert_always(tmp - pos == (2 * prim_key->user_defined_key_parts));
2529         pos = tmp;
2530     }
2531 
2532     for (uint i = 0; i < key_info->user_defined_key_parts; i++) {
2533         KEY_PART_INFO curr_kpi = key_info->key_part[i];
2534         uint16 field_index = curr_kpi.field->field_index;
2535         Field* field = table_share->field[field_index];
2536         bool is_col_in_pk = false;
2537 
2538         if (bitmap_is_set(&kc_info->key_filters[pk_index],field_index)) {
2539             assert_always(!has_hpk);
2540             assert_always(prim_key != nullptr);
2541             is_col_in_pk = true;
2542         }
2543         else {
2544             is_col_in_pk = false;
2545         }
2546 
2547         pos[0] = field->null_bit;
2548         pos++;
2549 
2550         if (is_col_in_pk) {
2551             //
2552             // assert that columns in pk do not have a null bit
2553             // because in MySQL, pk columns cannot be null
2554             //
2555             assert_always(!field->null_bit);
2556         }
2557 
2558         if (field->null_bit) {
2559             uint32_t null_offset = get_null_offset(table,table->field[field_index]);
2560             memcpy(pos, &null_offset, sizeof(uint32_t));
2561             pos += sizeof(uint32_t);
2562         }
2563         if (is_col_in_pk) {
2564             pos += pack_desc_pk_offset_info(pos, &curr_kpi, prim_key, pk_info);
2565         }
2566         else {
2567             pos += pack_desc_offset_info(
2568                 pos,
2569                 kc_info,
2570                 pk_index,
2571                 table_share,
2572                 &curr_kpi
2573                 );
2574         }
2575         pos += pack_desc_key_length_info(
2576             pos,
2577             kc_info,
2578             table_share,
2579             &curr_kpi
2580             );
2581         pos += pack_desc_char_info(pos, table_share, &curr_kpi);
2582     }
2583 
2584     offset = pos - buf;
2585     buf[0] = (uchar)(offset & 255);
2586     buf[1] = (uchar)((offset >> 8) & 255);
2587     buf[2] = (uchar)((offset >> 16) & 255);
2588     buf[3] = (uchar)((offset >> 24) & 255);
2589 
2590     return pos - buf;
2591 }
2592 
skip_key_in_desc(uchar * row_desc)2593 static uint32_t skip_key_in_desc(
2594     uchar* row_desc
2595     )
2596 {
2597     uchar* pos = row_desc;
2598     uchar col_bin_or_char;
2599     //
2600     // skip the byte that states if it is a fix field or var field, we do not care
2601     //
2602     pos++;
2603 
2604     //
2605     // skip the offset information
2606     //
2607     pos += sizeof(uint32_t);
2608 
2609     //
2610     // skip the key_part_length info
2611     //
2612     pos += sizeof(uint32_t);
2613     col_bin_or_char = pos[0];
2614     pos++;
2615     if (col_bin_or_char == COL_HAS_NO_CHARSET) {
2616         goto exit;
2617     }
2618     //
2619     // skip the charset info
2620     //
2621     pos += 4;
2622 
2623 
2624 exit:
2625     return (uint32_t)(pos-row_desc);
2626 }
2627 
2628 
max_key_size_from_desc(void * row_desc,uint32_t row_desc_size)2629 static uint32_t max_key_size_from_desc(
2630     void* row_desc,
2631     uint32_t row_desc_size
2632     )
2633 {
2634     uchar* desc_pos = (uchar *)row_desc;
2635     uint32_t num_blobs;
2636     uint32_t num_pk_columns;
2637     //
2638     // start at 1 for the infinity byte
2639     //
2640     uint32_t max_size = 1;
2641 
2642     // skip byte that states if main dictionary
2643     bool is_main_dictionary = desc_pos[0];
2644     desc_pos++;
2645     assert_always(!is_main_dictionary);
2646 
2647     // skip hpk byte
2648     desc_pos++;
2649 
2650     // skip num_null_bytes
2651     desc_pos += sizeof(uint32_t);
2652 
2653     // skip mcp_info
2654     desc_pos += sizeof(MULTI_COL_PACK_INFO);
2655 
2656     // skip offset_bytes
2657     desc_pos++;
2658 
2659     // skip over blobs
2660     memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2661     desc_pos += sizeof(num_blobs);
2662     desc_pos += num_blobs;
2663 
2664     // skip over pk info
2665     num_pk_columns = desc_pos[0]/2;
2666     desc_pos++;
2667     desc_pos += 2*num_pk_columns;
2668 
2669     while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2670         uchar has_charset;
2671         uint32_t key_length = 0;
2672 
2673         uchar null_bit = desc_pos[0];
2674         desc_pos++;
2675 
2676         if (null_bit) {
2677             //
2678             // column is NULLable, skip null_offset, and add a null byte
2679             //
2680             max_size++;
2681             desc_pos += sizeof(uint32_t);
2682         }
2683         //
2684         // skip over byte that states if fix or var
2685         //
2686         desc_pos++;
2687 
2688         // skip over offset
2689         desc_pos += sizeof(uint32_t);
2690 
2691         //
2692         // get the key length and add it to return value
2693         //
2694         memcpy(&key_length, desc_pos, sizeof(key_length));
2695         desc_pos += sizeof(key_length);
2696         max_size += key_length;
2697         max_size += 2; // 2 bytes for a potential length bytes, we are upperbounding, does not need to be super tight
2698 
2699         has_charset = desc_pos[0];
2700         desc_pos++;
2701 
2702         uint32_t charset_num;
2703         if (has_charset == COL_HAS_CHARSET) {
2704             // skip over charsent num
2705             desc_pos += sizeof(charset_num);
2706         }
2707         else {
2708             assert_always(has_charset == COL_HAS_NO_CHARSET);
2709         }
2710     }
2711     return max_size;
2712 }
2713 
pack_key_from_desc(uchar * buf,void * row_desc,uint32_t row_desc_size,const DBT * pk_key,const DBT * pk_val)2714 static uint32_t pack_key_from_desc(
2715     uchar* buf,
2716     void* row_desc,
2717     uint32_t row_desc_size,
2718     const DBT* pk_key,
2719     const DBT* pk_val) {
2720 
2721     MULTI_COL_PACK_INFO mcp_info;
2722     uint32_t num_null_bytes;
2723     uint32_t num_blobs;
2724     uint32_t num_pk_columns;
2725     uchar* blob_lengths = NULL;
2726     uchar* pk_info = NULL;
2727     uchar* pk_data_ptr = NULL;
2728     uchar* null_bytes_ptr = NULL;
2729     uchar* fixed_field_ptr = NULL;
2730     uchar* var_field_offset_ptr = NULL;
2731     const uchar* var_field_data_ptr = NULL;
2732     uint32_t num_offset_bytes;
2733     uchar* packed_key_pos = buf;
2734     uchar* desc_pos = (uchar *)row_desc;
2735 
2736     bool is_main_dictionary = desc_pos[0];
2737     desc_pos++;
2738     assert_always(!is_main_dictionary);
2739 
2740     //
2741     // get the constant info out of descriptor
2742     //
2743     bool hpk = desc_pos[0];
2744     desc_pos++;
2745 
2746     memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2747     desc_pos += sizeof(num_null_bytes);
2748 
2749     memcpy(&mcp_info, desc_pos, sizeof(mcp_info));
2750     desc_pos += sizeof(mcp_info);
2751 
2752     num_offset_bytes = desc_pos[0];
2753     desc_pos++;
2754 
2755     memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2756     desc_pos += sizeof(num_blobs);
2757 
2758     blob_lengths = desc_pos;
2759     desc_pos += num_blobs;
2760 
2761     num_pk_columns = desc_pos[0]/2;
2762     desc_pos++;
2763     pk_info = desc_pos;
2764     desc_pos += 2*num_pk_columns;
2765 
2766     //
2767     // now start packing the key
2768     //
2769 
2770     //
2771     // pack the infinity byte
2772     //
2773     packed_key_pos[0] = COL_ZERO;
2774     packed_key_pos++;
2775     //
2776     // now start packing each column of the key, as described in descriptor
2777     //
2778     if (!hpk) {
2779         // +1 for the infinity byte
2780         pk_data_ptr = (uchar *)pk_key->data + 1;
2781     }
2782     null_bytes_ptr = (uchar *)pk_val->data;
2783     fixed_field_ptr = null_bytes_ptr + num_null_bytes;
2784     var_field_offset_ptr = fixed_field_ptr + mcp_info.fixed_field_size;
2785     var_field_data_ptr = var_field_offset_ptr + mcp_info.len_of_offsets;
2786     while ((uint32_t)(desc_pos - (uchar*)row_desc) < row_desc_size) {
2787         uchar col_fix_val;
2788         uchar has_charset;
2789         uint32_t col_pack_val = 0;
2790         uint32_t key_length = 0;
2791 
2792         uchar null_bit = desc_pos[0];
2793         desc_pos++;
2794 
2795         if (null_bit) {
2796             //
2797             // column is NULLable, need to check the null bytes to see if it is NULL
2798             //
2799             uint32_t null_offset = 0;
2800             bool is_field_null;
2801             memcpy(&null_offset, desc_pos, sizeof(null_offset));
2802             desc_pos += sizeof(null_offset);
2803 
2804             is_field_null = (null_bytes_ptr[null_offset] & null_bit) ? true: false;
2805             if (is_field_null) {
2806                 packed_key_pos[0] = NULL_COL_VAL;
2807                 packed_key_pos++;
2808                 desc_pos += skip_key_in_desc(desc_pos);
2809                 continue;
2810             } else {
2811                 packed_key_pos[0] = NONNULL_COL_VAL;
2812                 packed_key_pos++;
2813             }
2814         }
2815         //
2816         // now pack the column (unless it was NULL, and we continued)
2817         //
2818         col_fix_val = desc_pos[0];
2819         desc_pos++;
2820 
2821         memcpy(&col_pack_val, desc_pos, sizeof(col_pack_val));
2822         desc_pos += sizeof(col_pack_val);
2823 
2824         memcpy(&key_length, desc_pos, sizeof(key_length));
2825         desc_pos += sizeof(key_length);
2826 
2827         has_charset = desc_pos[0];
2828         desc_pos++;
2829 
2830         uint32_t charset_num = 0;
2831         if (has_charset == COL_HAS_CHARSET) {
2832             memcpy(&charset_num, desc_pos, sizeof(charset_num));
2833             desc_pos += sizeof(charset_num);
2834         } else {
2835             assert_always(has_charset == COL_HAS_NO_CHARSET);
2836         }
2837         //
2838         // case where column is in pk val
2839         //
2840         if (col_fix_val == COL_FIX_FIELD ||
2841             col_fix_val == COL_VAR_FIELD ||
2842             col_fix_val == COL_BLOB_FIELD) {
2843             if (col_fix_val == COL_FIX_FIELD &&
2844                 has_charset == COL_HAS_NO_CHARSET) {
2845                 memcpy(
2846                     packed_key_pos,
2847                     &fixed_field_ptr[col_pack_val],
2848                     key_length);
2849                 packed_key_pos += key_length;
2850             } else if (col_fix_val == COL_VAR_FIELD &&
2851                        has_charset == COL_HAS_NO_CHARSET) {
2852                 uint32_t data_start_offset = 0;
2853 
2854                 uint32_t data_size = 0;
2855                 get_var_field_info(
2856                     &data_size,
2857                     &data_start_offset,
2858                     col_pack_val,
2859                     var_field_offset_ptr,
2860                     num_offset_bytes);
2861 
2862                 //
2863                 // length of this field in this row is data_size
2864                 // data is located beginning at var_field_data_ptr + data_start_offset
2865                 //
2866                 packed_key_pos = pack_toku_varbinary_from_desc(
2867                     packed_key_pos,
2868                     var_field_data_ptr + data_start_offset,
2869                     //number of bytes to use to encode the length in to_tokudb
2870                     key_length,
2871                     //length of field
2872                     data_size);
2873             } else {
2874                 const uchar* data_start = NULL;
2875                 uint32_t data_start_offset = 0;
2876                 uint32_t data_size = 0;
2877 
2878                 if (col_fix_val == COL_FIX_FIELD) {
2879                     data_start_offset = col_pack_val;
2880                     data_size = key_length;
2881                     data_start = fixed_field_ptr + data_start_offset;
2882                 } else if (col_fix_val == COL_VAR_FIELD){
2883                     get_var_field_info(
2884                         &data_size,
2885                         &data_start_offset,
2886                         col_pack_val,
2887                         var_field_offset_ptr,
2888                         num_offset_bytes);
2889                     data_start = var_field_data_ptr + data_start_offset;
2890                 } else if (col_fix_val == COL_BLOB_FIELD) {
2891                     uint32_t blob_index = col_pack_val;
2892                     uint32_t blob_offset;
2893                     const uchar* blob_ptr = NULL;
2894                     uint32_t field_len;
2895                     uint32_t field_len_bytes = blob_lengths[blob_index];
2896                     get_blob_field_info(
2897                         &blob_offset,
2898                         mcp_info.len_of_offsets,
2899                         var_field_data_ptr,
2900                         num_offset_bytes);
2901                     blob_ptr = var_field_data_ptr + blob_offset;
2902                     assert_always(num_blobs > 0);
2903 
2904                     // skip over other blobs to get to the one we want to
2905                     // make a key out of
2906                     for (uint32_t i = 0; i < blob_index; i++) {
2907                         blob_ptr = unpack_toku_field_blob(
2908                             NULL,
2909                             blob_ptr,
2910                             blob_lengths[i],
2911                             true);
2912                     }
2913                     // at this point, blob_ptr is pointing to the blob we
2914                     // want to make a key from
2915                     field_len = get_blob_field_len(blob_ptr, field_len_bytes);
2916                     // now we set the variables to make the key
2917                     data_start = blob_ptr + field_len_bytes;
2918                     data_size = field_len;
2919                 } else {
2920                     assert_unreachable();
2921                 }
2922 
2923                 packed_key_pos = pack_toku_varstring_from_desc(packed_key_pos,
2924                     data_start,
2925                     key_length,
2926                     data_size,
2927                     charset_num);
2928             }
2929         } else {
2930             // case where column is in pk key
2931             if (col_fix_val == COL_FIX_PK_OFFSET) {
2932                 memcpy(packed_key_pos, &pk_data_ptr[col_pack_val], key_length);
2933                 packed_key_pos += key_length;
2934             } else if (col_fix_val == COL_VAR_PK_OFFSET) {
2935                 uchar* tmp_pk_data_ptr = pk_data_ptr;
2936                 uint32_t index_in_pk = col_pack_val;
2937                 //
2938                 // skip along in pk to the right column
2939                 //
2940                 for (uint32_t i = 0; i < index_in_pk; i++) {
2941                     if (pk_info[2*i] == COL_FIX_FIELD) {
2942                         tmp_pk_data_ptr += pk_info[2*i + 1];
2943                     } else if (pk_info[2*i] == COL_VAR_FIELD) {
2944                         uint32_t len_bytes = pk_info[2*i + 1];
2945                         uint32_t len;
2946                         if (len_bytes == 1) {
2947                             len = tmp_pk_data_ptr[0];
2948                             tmp_pk_data_ptr++;
2949                         } else if (len_bytes == 2) {
2950                             len = uint2korr(tmp_pk_data_ptr);
2951                             tmp_pk_data_ptr += 2;
2952                         } else {
2953                             assert_unreachable();
2954                         }
2955                         tmp_pk_data_ptr += len;
2956                     } else {
2957                         assert_unreachable();
2958                     }
2959                 }
2960                 //
2961                 // at this point, tmp_pk_data_ptr is pointing at the column
2962                 //
2963                 uint32_t is_fix_field = pk_info[2*index_in_pk];
2964                 if (is_fix_field == COL_FIX_FIELD) {
2965                     memcpy(packed_key_pos, tmp_pk_data_ptr, key_length);
2966                     packed_key_pos += key_length;
2967                 } else if (is_fix_field == COL_VAR_FIELD) {
2968                     const uchar* data_start = NULL;
2969                     uint32_t data_size = 0;
2970                     uint32_t len_bytes = pk_info[2*index_in_pk + 1];
2971                     if (len_bytes == 1) {
2972                         data_size = tmp_pk_data_ptr[0];
2973                         tmp_pk_data_ptr++;
2974                     } else if (len_bytes == 2) {
2975                         data_size = uint2korr(tmp_pk_data_ptr);
2976                         tmp_pk_data_ptr += 2;
2977                     } else {
2978                         assert_unreachable();
2979                     }
2980                     data_start = tmp_pk_data_ptr;
2981 
2982                     if (has_charset == COL_HAS_CHARSET) {
2983                         packed_key_pos = pack_toku_varstring_from_desc(
2984                             packed_key_pos,
2985                             data_start,
2986                             key_length,
2987                             data_size,
2988                             charset_num);
2989                     } else if (has_charset == COL_HAS_NO_CHARSET) {
2990                         packed_key_pos = pack_toku_varbinary_from_desc(
2991                             packed_key_pos,
2992                             data_start,
2993                             key_length,
2994                             data_size);
2995                     } else {
2996                         assert_unreachable();
2997                     }
2998                 } else {
2999                     assert_unreachable();
3000                 }
3001             } else {
3002                 assert_unreachable();
3003             }
3004         }
3005 
3006     }
3007     assert_always( (uint32_t)(desc_pos - (uchar *)row_desc) == row_desc_size);
3008 
3009     //
3010     // now append the primary key to the end of the key
3011     //
3012     if (hpk) {
3013         memcpy(packed_key_pos, pk_key->data, pk_key->size);
3014         packed_key_pos += pk_key->size;
3015     } else {
3016         memcpy(packed_key_pos, (uchar *)pk_key->data + 1, pk_key->size - 1);
3017         packed_key_pos += (pk_key->size - 1);
3018     }
3019 
3020     return (uint32_t)(packed_key_pos - buf);
3021 }
3022 
fields_have_same_name(Field * a,Field * b)3023 static bool fields_have_same_name(Field* a, Field* b) {
3024     return strcmp(a->field_name.str, b->field_name.str) == 0;
3025 }
3026 
fields_are_same_type(Field * a,Field * b)3027 static bool fields_are_same_type(Field* a, Field* b) {
3028     bool retval = true;
3029     enum_field_types a_mysql_type = a->real_type();
3030     enum_field_types b_mysql_type = b->real_type();
3031     TOKU_TYPE a_toku_type = mysql_to_toku_type(a);
3032     TOKU_TYPE b_toku_type = mysql_to_toku_type(b);
3033     // make sure have same names
3034     // make sure have same types
3035     if (a_mysql_type != b_mysql_type) {
3036         retval = false;
3037         goto cleanup;
3038     }
3039     // Thanks to MariaDB 5.5, we can have two fields
3040     // be the same MySQL type but not the same toku type,
3041     // This is an issue introduced with MariaDB's fractional time
3042     // implementation
3043     if (a_toku_type != b_toku_type) {
3044         retval = false;
3045         goto cleanup;
3046     }
3047     // make sure that either both are nullable, or both not nullable
3048     if ((a->null_bit && !b->null_bit) || (!a->null_bit && b->null_bit)) {
3049         retval = false;
3050         goto cleanup;
3051     }
3052     switch (a_mysql_type) {
3053     case MYSQL_TYPE_TINY:
3054     case MYSQL_TYPE_SHORT:
3055     case MYSQL_TYPE_INT24:
3056     case MYSQL_TYPE_LONG:
3057     case MYSQL_TYPE_LONGLONG:
3058         // length, unsigned, auto increment
3059         if (a->pack_length() != b->pack_length() ||
3060             (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3061             (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3062             retval = false;
3063             goto cleanup;
3064         }
3065         break;
3066     case MYSQL_TYPE_DOUBLE:
3067     case MYSQL_TYPE_FLOAT:
3068         // length, unsigned, auto increment
3069         if (a->pack_length() != b->pack_length() ||
3070             (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3071             (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3072             retval = false;
3073             goto cleanup;
3074         }
3075         break;
3076     case MYSQL_TYPE_NEWDECIMAL:
3077         // length, unsigned
3078         if (a->pack_length() != b->pack_length() ||
3079             (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG)) {
3080             retval = false;
3081             goto cleanup;
3082         }
3083         break;
3084     case MYSQL_TYPE_ENUM: {
3085         Field_enum *a_enum = static_cast<Field_enum *>(a);
3086         if (!a_enum->eq_def(b)) {
3087             retval = false;
3088             goto cleanup;
3089         }
3090         break;
3091     }
3092     case MYSQL_TYPE_SET: {
3093         Field_set *a_set = static_cast<Field_set *>(a);
3094         if (!a_set->eq_def(b)) {
3095             retval = false;
3096             goto cleanup;
3097         }
3098         break;
3099     }
3100     case MYSQL_TYPE_BIT:
3101         // length
3102         if (a->pack_length() != b->pack_length()) {
3103             retval = false;
3104             goto cleanup;
3105         }
3106         break;
3107     case MYSQL_TYPE_DATE:
3108     case MYSQL_TYPE_DATETIME:
3109     case MYSQL_TYPE_YEAR:
3110     case MYSQL_TYPE_NEWDATE:
3111     case MYSQL_TYPE_TIME:
3112     case MYSQL_TYPE_TIMESTAMP:
3113 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
3114     (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
3115     (100000 <= MYSQL_VERSION_ID)
3116     case MYSQL_TYPE_DATETIME2:
3117     case MYSQL_TYPE_TIMESTAMP2:
3118     case MYSQL_TYPE_TIME2:
3119 #endif
3120         // length
3121         if (a->pack_length() != b->pack_length()) {
3122             retval = false;
3123             goto cleanup;
3124         }
3125         break;
3126     case MYSQL_TYPE_TINY_BLOB:
3127     case MYSQL_TYPE_MEDIUM_BLOB:
3128     case MYSQL_TYPE_BLOB:
3129     case MYSQL_TYPE_LONG_BLOB:
3130         // test the charset
3131         if (a->charset()->number != b->charset()->number) {
3132             retval = false;
3133             goto cleanup;
3134         }
3135         if (a->row_pack_length() != b->row_pack_length()) {
3136             retval = false;
3137             goto cleanup;
3138         }
3139         break;
3140     case MYSQL_TYPE_STRING:
3141         if (a->pack_length() != b->pack_length()) {
3142             retval = false;
3143             goto cleanup;
3144         }
3145         // if both are binary, we know have same pack lengths,
3146         // so we can goto end
3147         if (a->binary() && b->binary()) {
3148             // nothing to do, we are good
3149         }
3150         else if (!a->binary() && !b->binary()) {
3151             // test the charset
3152             if (a->charset()->number != b->charset()->number) {
3153                 retval = false;
3154                 goto cleanup;
3155             }
3156         }
3157         else {
3158             // one is binary and the other is not, so not the same
3159             retval = false;
3160             goto cleanup;
3161         }
3162         break;
3163     case MYSQL_TYPE_VARCHAR:
3164         if (a->field_length != b->field_length) {
3165             retval = false;
3166             goto cleanup;
3167         }
3168         // if both are binary, we know have same pack lengths,
3169         // so we can goto end
3170         if (a->binary() && b->binary()) {
3171             // nothing to do, we are good
3172         }
3173         else if (!a->binary() && !b->binary()) {
3174             // test the charset
3175             if (a->charset()->number != b->charset()->number) {
3176                 retval = false;
3177                 goto cleanup;
3178             }
3179         }
3180         else {
3181             // one is binary and the other is not, so not the same
3182             retval = false;
3183             goto cleanup;
3184         }
3185         break;
3186     //
3187     // I believe these are old types that are no longer
3188     // in any 5.1 tables, so tokudb does not need
3189     // to worry about them
3190     // Putting in this assert in case I am wrong.
3191     // Do not support geometry yet.
3192     //
3193     case MYSQL_TYPE_GEOMETRY:
3194     case MYSQL_TYPE_DECIMAL:
3195     case MYSQL_TYPE_VAR_STRING:
3196     case MYSQL_TYPE_NULL:
3197     case MYSQL_TYPE_VARCHAR_COMPRESSED:
3198     case MYSQL_TYPE_BLOB_COMPRESSED:
3199         assert_unreachable();
3200     }
3201 
3202 cleanup:
3203     return retval;
3204 }
3205 
are_two_fields_same(Field * a,Field * b)3206 static bool are_two_fields_same(Field* a, Field* b) {
3207     return fields_have_same_name(a, b) && fields_are_same_type(a, b);
3208 }
3209 
3210 
3211