1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of TokuDB
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 Copyright (c) 2020, MariaDB Corporation.
10 
11     TokuDBis is free software: you can redistribute it and/or modify
12     it under the terms of the GNU General Public License, version 2,
13     as published by the Free Software Foundation.
14 
15     TokuDB is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18     GNU General Public License for more details.
19 
20     You should have received a copy of the GNU General Public License
21     along with TokuDB.  If not, see <http://www.gnu.org/licenses/>.
22 
23 ======= */
24 
25 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
26 
27 #include "hatoku_cmp.h"
28 
29 #ifdef WORDS_BIGENDIAN
30 #error "WORDS_BIGENDIAN not supported"
31 #endif
32 
33 // returns true if the field is a valid field to be used
34 // in a TokuDB table. The non-valid fields are those
35 // that have been deprecated since before 5.1, and can
36 // only exist through upgrades of old versions of MySQL
field_valid_for_tokudb_table(Field * field)37 static bool field_valid_for_tokudb_table(Field* field) {
38     bool ret_val = false;
39     enum_field_types mysql_type = field->real_type();
40     switch (mysql_type) {
41     case MYSQL_TYPE_LONG:
42     case MYSQL_TYPE_LONGLONG:
43     case MYSQL_TYPE_TINY:
44     case MYSQL_TYPE_SHORT:
45     case MYSQL_TYPE_INT24:
46     case MYSQL_TYPE_DATE:
47     case MYSQL_TYPE_YEAR:
48     case MYSQL_TYPE_NEWDATE:
49     case MYSQL_TYPE_ENUM:
50     case MYSQL_TYPE_SET:
51     case MYSQL_TYPE_TIME:
52     case MYSQL_TYPE_DATETIME:
53     case MYSQL_TYPE_TIMESTAMP:
54     case MYSQL_TYPE_DOUBLE:
55     case MYSQL_TYPE_FLOAT:
56 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
57     (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
58     (100000 <= MYSQL_VERSION_ID)
59     case MYSQL_TYPE_DATETIME2:
60     case MYSQL_TYPE_TIMESTAMP2:
61     case MYSQL_TYPE_TIME2:
62 #endif
63     case MYSQL_TYPE_NEWDECIMAL:
64     case MYSQL_TYPE_BIT:
65     case MYSQL_TYPE_STRING:
66     case MYSQL_TYPE_VARCHAR:
67     case MYSQL_TYPE_TINY_BLOB:
68     case MYSQL_TYPE_MEDIUM_BLOB:
69     case MYSQL_TYPE_BLOB:
70     case MYSQL_TYPE_LONG_BLOB:
71         ret_val = true;
72         goto exit;
73     //
74     // I believe these are old types that are no longer
75     // in any 5.1 tables, so tokudb does not need
76     // to worry about them
77     // Putting in this assert in case I am wrong.
78     // Do not support geometry yet.
79     //
80     case MYSQL_TYPE_GEOMETRY:
81     case MYSQL_TYPE_DECIMAL:
82     case MYSQL_TYPE_VAR_STRING:
83     case MYSQL_TYPE_NULL:
84     case MYSQL_TYPE_VARCHAR_COMPRESSED:
85     case MYSQL_TYPE_BLOB_COMPRESSED:
86         ret_val = false;
87     }
88 exit:
89     return ret_val;
90 }
91 
get_var_field_info(uint32_t * field_len,uint32_t * start_offset,uint32_t var_field_index,const uchar * var_field_offset_ptr,uint32_t num_offset_bytes)92 static void get_var_field_info(
93     uint32_t* field_len, // output: length of field
94     uint32_t* start_offset, // output, length of offset where data starts
95     uint32_t var_field_index, //input, index of var field we want info on
96     const uchar* var_field_offset_ptr, //input, pointer to where offset information for all var fields begins
97     uint32_t num_offset_bytes //input, number of bytes used to store offsets starting at var_field_offset_ptr
98     )
99 {
100     uint32_t data_start_offset = 0;
101     uint32_t data_end_offset = 0;
102     switch (num_offset_bytes) {
103     case (1):
104         data_end_offset = (var_field_offset_ptr + var_field_index)[0];
105         break;
106     case (2):
107         data_end_offset = uint2korr(var_field_offset_ptr + 2*var_field_index);
108         break;
109     default:
110         assert_unreachable();
111     }
112 
113     if (var_field_index) {
114         switch (num_offset_bytes) {
115         case (1):
116             data_start_offset = (var_field_offset_ptr + var_field_index - 1)[0];
117             break;
118         case (2):
119             data_start_offset = uint2korr(var_field_offset_ptr + 2*(var_field_index-1));
120             break;
121         default:
122             assert_unreachable();
123         }
124     }
125     else {
126         data_start_offset = 0;
127     }
128 
129     *start_offset = data_start_offset;
130     assert_always(data_end_offset >= data_start_offset);
131     *field_len = data_end_offset - data_start_offset;
132 }
133 
get_blob_field_info(uint32_t * start_offset,uint32_t len_of_offsets,const uchar * var_field_data_ptr,uint32_t num_offset_bytes)134 static void get_blob_field_info(
135     uint32_t* start_offset,
136     uint32_t len_of_offsets,
137     const uchar* var_field_data_ptr,
138     uint32_t num_offset_bytes
139     )
140 {
141     uint32_t data_end_offset;
142     //
143     // need to set var_field_data_ptr to point to beginning of blobs, which
144     // is at the end of the var stuff (if they exist), if var stuff does not exist
145     // then the bottom variable will be 0, and var_field_data_ptr is already
146     // set correctly
147     //
148     if (len_of_offsets) {
149         switch (num_offset_bytes) {
150         case (1):
151             data_end_offset = (var_field_data_ptr - 1)[0];
152             break;
153         case (2):
154             data_end_offset = uint2korr(var_field_data_ptr - 2);
155             break;
156         default:
157             assert_unreachable();
158         }
159     }
160     else {
161         data_end_offset = 0;
162     }
163     *start_offset = data_end_offset;
164 }
165 
166 
167 // this function is pattern matched from
168 // InnoDB's get_innobase_type_from_mysql_type
mysql_to_toku_type(Field * field)169 static TOKU_TYPE mysql_to_toku_type (Field* field) {
170     TOKU_TYPE ret_val = toku_type_unknown;
171     enum_field_types mysql_type = field->real_type();
172     switch (mysql_type) {
173     case MYSQL_TYPE_LONG:
174     case MYSQL_TYPE_LONGLONG:
175     case MYSQL_TYPE_TINY:
176     case MYSQL_TYPE_SHORT:
177     case MYSQL_TYPE_INT24:
178     case MYSQL_TYPE_DATE:
179     case MYSQL_TYPE_YEAR:
180     case MYSQL_TYPE_NEWDATE:
181     case MYSQL_TYPE_ENUM:
182     case MYSQL_TYPE_SET:
183         ret_val = toku_type_int;
184         goto exit;
185     case MYSQL_TYPE_TIME:
186     case MYSQL_TYPE_DATETIME:
187     case MYSQL_TYPE_TIMESTAMP:
188 #ifdef MARIADB_BASE_VERSION
189         // case to handle fractional seconds in MariaDB
190         //
191         if (field->key_type() == HA_KEYTYPE_BINARY) {
192             ret_val = toku_type_fixbinary;
193             goto exit;
194         }
195 #endif
196         ret_val = toku_type_int;
197         goto exit;
198     case MYSQL_TYPE_DOUBLE:
199         ret_val = toku_type_double;
200         goto exit;
201     case MYSQL_TYPE_FLOAT:
202         ret_val = toku_type_float;
203         goto exit;
204 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
205     (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
206     (100000 <= MYSQL_VERSION_ID)
207     case MYSQL_TYPE_DATETIME2:
208     case MYSQL_TYPE_TIMESTAMP2:
209     case MYSQL_TYPE_TIME2:
210 #endif
211     case MYSQL_TYPE_NEWDECIMAL:
212     case MYSQL_TYPE_BIT:
213         ret_val = toku_type_fixbinary;
214         goto exit;
215     case MYSQL_TYPE_STRING:
216         if (field->binary()) {
217             ret_val = toku_type_fixbinary;
218         }
219         else {
220             ret_val = toku_type_fixstring;
221         }
222         goto exit;
223     case MYSQL_TYPE_VARCHAR:
224         if (field->binary()) {
225             ret_val = toku_type_varbinary;
226         }
227         else {
228             ret_val = toku_type_varstring;
229         }
230         goto exit;
231     case MYSQL_TYPE_TINY_BLOB:
232     case MYSQL_TYPE_MEDIUM_BLOB:
233     case MYSQL_TYPE_BLOB:
234     case MYSQL_TYPE_LONG_BLOB:
235         ret_val = toku_type_blob;
236         goto exit;
237     //
238     // I believe these are old types that are no longer
239     // in any 5.1 tables, so tokudb does not need
240     // to worry about them
241     // Putting in this assert in case I am wrong.
242     // Do not support geometry yet.
243     //
244     case MYSQL_TYPE_GEOMETRY:
245     case MYSQL_TYPE_DECIMAL:
246     case MYSQL_TYPE_VAR_STRING:
247     case MYSQL_TYPE_NULL:
248     case MYSQL_TYPE_VARCHAR_COMPRESSED:
249     case MYSQL_TYPE_BLOB_COMPRESSED:
250         assert_unreachable();
251     }
252 exit:
253     return ret_val;
254 }
255 
256 
get_charset_from_num(uint32_t charset_number)257 static inline CHARSET_INFO* get_charset_from_num (uint32_t charset_number) {
258     //
259     // patternmatched off of InnoDB, due to MySQL bug 42649
260     //
261     if (charset_number == default_charset_info->number) {
262         return default_charset_info;
263     }
264     else if (charset_number == my_charset_latin1.number) {
265         return &my_charset_latin1;
266     }
267     else {
268         return get_charset(charset_number, MYF(MY_WME));
269     }
270 }
271 
272 
273 
274 //
275 // used to read the length of a variable sized field in a tokudb key (buf).
276 //
get_length_from_var_tokudata(uchar * buf,uint32_t length_bytes)277 static inline uint32_t get_length_from_var_tokudata (uchar* buf, uint32_t length_bytes) {
278     uint32_t length = (uint32_t)(buf[0]);
279     if (length_bytes == 2) {
280         uint32_t rest_of_length = (uint32_t)buf[1];
281         length += rest_of_length<<8;
282     }
283     return length;
284 }
285 
286 //
287 // used to deduce the number of bytes used to store the length of a varstring/varbinary
288 // in a key field stored in tokudb
289 //
get_length_bytes_from_max(uint32_t max_num_bytes)290 static inline uint32_t get_length_bytes_from_max(uint32_t max_num_bytes) {
291     return (max_num_bytes > 255) ? 2 : 1;
292 }
293 
294 
295 
296 //
297 // assuming MySQL in little endian, and we are storing in little endian
298 //
pack_toku_int(uchar * to_tokudb,uchar * from_mysql,uint32_t num_bytes)299 static inline uchar* pack_toku_int (uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
300     switch (num_bytes) {
301     case (1):
302         memcpy(to_tokudb, from_mysql, 1);
303         break;
304     case (2):
305         memcpy(to_tokudb, from_mysql, 2);
306         break;
307     case (3):
308         memcpy(to_tokudb, from_mysql, 3);
309         break;
310     case (4):
311         memcpy(to_tokudb, from_mysql, 4);
312         break;
313     case (8):
314         memcpy(to_tokudb, from_mysql, 8);
315         break;
316     default:
317         assert_unreachable();
318     }
319     return to_tokudb+num_bytes;
320 }
321 
322 //
323 // assuming MySQL in little endian, and we are unpacking to little endian
324 //
unpack_toku_int(uchar * to_mysql,uchar * from_tokudb,uint32_t num_bytes)325 static inline uchar* unpack_toku_int(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
326     switch (num_bytes) {
327     case (1):
328         memcpy(to_mysql, from_tokudb, 1);
329         break;
330     case (2):
331         memcpy(to_mysql, from_tokudb, 2);
332         break;
333     case (3):
334         memcpy(to_mysql, from_tokudb, 3);
335         break;
336     case (4):
337         memcpy(to_mysql, from_tokudb, 4);
338         break;
339     case (8):
340         memcpy(to_mysql, from_tokudb, 8);
341         break;
342     default:
343         assert_unreachable();
344     }
345     return from_tokudb+num_bytes;
346 }
347 
cmp_toku_int(uchar * a_buf,uchar * b_buf,bool is_unsigned,uint32_t num_bytes)348 static inline int cmp_toku_int (uchar* a_buf, uchar* b_buf, bool is_unsigned, uint32_t num_bytes) {
349     int ret_val = 0;
350     //
351     // case for unsigned integers
352     //
353     if (is_unsigned) {
354         uint32_t a_num, b_num = 0;
355         uint64_t a_big_num, b_big_num = 0;
356         switch (num_bytes) {
357         case (1):
358             a_num = *a_buf;
359             b_num = *b_buf;
360             ret_val = a_num-b_num;
361             goto exit;
362         case (2):
363             a_num = uint2korr(a_buf);
364             b_num = uint2korr(b_buf);
365             ret_val = a_num-b_num;
366             goto exit;
367         case (3):
368             a_num = tokudb_uint3korr(a_buf);
369             b_num = tokudb_uint3korr(b_buf);
370             ret_val = a_num-b_num;
371             goto exit;
372         case (4):
373             a_num = uint4korr(a_buf);
374             b_num = uint4korr(b_buf);
375             if (a_num < b_num) {
376                 ret_val = -1; goto exit;
377             }
378             if (a_num > b_num) {
379                 ret_val = 1; goto exit;
380             }
381             ret_val = 0;
382             goto exit;
383         case (8):
384             a_big_num = uint8korr(a_buf);
385             b_big_num = uint8korr(b_buf);
386             if (a_big_num < b_big_num) {
387                 ret_val = -1; goto exit;
388             }
389             else if (a_big_num > b_big_num) {
390                 ret_val = 1; goto exit;
391             }
392             ret_val = 0;
393             goto exit;
394         default:
395             assert_unreachable();
396         }
397     }
398     //
399     // case for signed integers
400     //
401     else {
402         int32_t a_num, b_num = 0;
403         int64_t a_big_num, b_big_num = 0;
404         switch (num_bytes) {
405         case (1):
406             a_num = *(signed char *)a_buf;
407             b_num = *(signed char *)b_buf;
408             ret_val = a_num-b_num;
409             goto exit;
410         case (2):
411             a_num = sint2korr(a_buf);
412             b_num = sint2korr(b_buf);
413             ret_val = a_num-b_num;
414             goto exit;
415         case (3):
416             a_num = sint3korr(a_buf);
417             b_num = sint3korr(b_buf);
418             ret_val = a_num - b_num;
419             goto exit;
420         case (4):
421             a_num = sint4korr(a_buf);
422             b_num = sint4korr(b_buf);
423             if (a_num < b_num) {
424                 ret_val = -1; goto exit;
425             }
426             if (a_num > b_num) {
427                 ret_val = 1; goto exit;
428             }
429             ret_val = 0;
430             goto exit;
431         case (8):
432             a_big_num = sint8korr(a_buf);
433             b_big_num = sint8korr(b_buf);
434             if (a_big_num < b_big_num) {
435                 ret_val = -1; goto exit;
436             }
437             else if (a_big_num > b_big_num) {
438                 ret_val = 1; goto exit;
439             }
440             ret_val = 0;
441             goto exit;
442         default:
443             assert_unreachable();
444         }
445     }
446     //
447     // if this is hit, indicates bug in writing of this function
448     //
449     assert_unreachable();
450 exit:
451     return ret_val;
452 }
453 
pack_toku_double(uchar * to_tokudb,uchar * from_mysql)454 static inline uchar* pack_toku_double (uchar* to_tokudb, uchar* from_mysql) {
455     memcpy(to_tokudb, from_mysql, sizeof(double));
456     return to_tokudb + sizeof(double);
457 }
458 
459 
unpack_toku_double(uchar * to_mysql,uchar * from_tokudb)460 static inline uchar* unpack_toku_double(uchar* to_mysql, uchar* from_tokudb) {
461     memcpy(to_mysql, from_tokudb, sizeof(double));
462     return from_tokudb + sizeof(double);
463 }
464 
cmp_toku_double(uchar * a_buf,uchar * b_buf)465 static inline int cmp_toku_double(uchar* a_buf, uchar* b_buf) {
466     int ret_val;
467     double a_num;
468     double b_num;
469     doubleget(a_num, a_buf);
470     doubleget(b_num, b_buf);
471     if (a_num < b_num) {
472         ret_val = -1;
473         goto exit;
474     }
475     else if (a_num > b_num) {
476         ret_val = 1;
477         goto exit;
478     }
479     ret_val = 0;
480 exit:
481     return ret_val;
482 }
483 
484 
pack_toku_float(uchar * to_tokudb,uchar * from_mysql)485 static inline uchar* pack_toku_float (uchar* to_tokudb, uchar* from_mysql) {
486     memcpy(to_tokudb, from_mysql, sizeof(float));
487     return to_tokudb + sizeof(float);
488 }
489 
490 
unpack_toku_float(uchar * to_mysql,uchar * from_tokudb)491 static inline uchar* unpack_toku_float(uchar* to_mysql, uchar* from_tokudb) {
492     memcpy(to_mysql, from_tokudb, sizeof(float));
493     return from_tokudb + sizeof(float);
494 }
495 
cmp_toku_float(uchar * a_buf,uchar * b_buf)496 static inline int cmp_toku_float(uchar* a_buf, uchar* b_buf) {
497     int ret_val;
498     float a_num;
499     float b_num;
500     //
501     // This is the way Field_float::cmp gets the floats from the buffers
502     //
503     memcpy(&a_num, a_buf, sizeof(float));
504     memcpy(&b_num, b_buf, sizeof(float));
505     if (a_num < b_num) {
506         ret_val = -1;
507         goto exit;
508     }
509     else if (a_num > b_num) {
510         ret_val = 1;
511         goto exit;
512     }
513     ret_val = 0;
514 exit:
515     return ret_val;
516 }
517 
518 
pack_toku_binary(uchar * to_tokudb,uchar * from_mysql,uint32_t num_bytes)519 static inline uchar* pack_toku_binary(uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
520     memcpy(to_tokudb, from_mysql, num_bytes);
521     return to_tokudb + num_bytes;
522 }
523 
unpack_toku_binary(uchar * to_mysql,uchar * from_tokudb,uint32_t num_bytes)524 static inline uchar* unpack_toku_binary(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
525     memcpy(to_mysql, from_tokudb, num_bytes);
526     return from_tokudb + num_bytes;
527 }
528 
529 
cmp_toku_binary(uchar * a_buf,uint32_t a_num_bytes,uchar * b_buf,uint32_t b_num_bytes)530 static inline int cmp_toku_binary(
531     uchar* a_buf,
532     uint32_t a_num_bytes,
533     uchar* b_buf,
534     uint32_t b_num_bytes
535     )
536 {
537     int ret_val = 0;
538     uint32_t num_bytes_to_cmp = (a_num_bytes < b_num_bytes) ? a_num_bytes : b_num_bytes;
539     ret_val = memcmp(a_buf, b_buf, num_bytes_to_cmp);
540     if ((ret_val != 0) || (a_num_bytes == b_num_bytes)) {
541         goto exit;
542     }
543     if (a_num_bytes < b_num_bytes) {
544         ret_val = -1;
545         goto exit;
546     }
547     else {
548         ret_val = 1;
549         goto exit;
550     }
551 exit:
552     return ret_val;
553 }
554 
555 //
556 // partially copied from below
557 //
pack_toku_varbinary_from_desc(uchar * to_tokudb,const uchar * from_desc,uint32_t key_part_length,uint32_t field_length)558 static uchar* pack_toku_varbinary_from_desc(
559     uchar* to_tokudb,
560     const uchar* from_desc,
561     uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
562     uint32_t field_length //length of field
563     )
564 {
565     uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
566     uint32_t length = field_length;
567     set_if_smaller(length, key_part_length);
568 
569     //
570     // copy the length bytes, assuming both are in little endian
571     //
572     to_tokudb[0] = (uchar)length & 255;
573     if (length_bytes_in_tokudb > 1) {
574         to_tokudb[1] = (uchar) (length >> 8);
575     }
576     //
577     // copy the string
578     //
579     memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
580     return to_tokudb + length + length_bytes_in_tokudb;
581 }
582 
pack_toku_varbinary(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes)583 static inline uchar* pack_toku_varbinary(
584     uchar* to_tokudb,
585     uchar* from_mysql,
586     uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
587     uint32_t max_num_bytes
588     )
589 {
590     uint32_t length = 0;
591     uint32_t length_bytes_in_tokudb;
592     switch (length_bytes_in_mysql) {
593     case (0):
594         length = max_num_bytes;
595         break;
596     case (1):
597         length = (uint32_t)(*from_mysql);
598         break;
599     case (2):
600         length = uint2korr(from_mysql);
601         break;
602     case (3):
603         length = tokudb_uint3korr(from_mysql);
604         break;
605     case (4):
606         length = uint4korr(from_mysql);
607         break;
608     }
609 
610     //
611     // from this point on, functionality equivalent to pack_toku_varbinary_from_desc
612     //
613     set_if_smaller(length,max_num_bytes);
614 
615     length_bytes_in_tokudb = get_length_bytes_from_max(max_num_bytes);
616     //
617     // copy the length bytes, assuming both are in little endian
618     //
619     to_tokudb[0] = (uchar)length & 255;
620     if (length_bytes_in_tokudb > 1) {
621         to_tokudb[1] = (uchar) (length >> 8);
622     }
623     //
624     // copy the string
625     //
626     memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
627     return to_tokudb + length + length_bytes_in_tokudb;
628 }
629 
unpack_toku_varbinary(uchar * to_mysql,uchar * from_tokudb,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql)630 static inline uchar* unpack_toku_varbinary(
631     uchar* to_mysql,
632     uchar* from_tokudb,
633     uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
634     uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
635     )
636 {
637     uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
638 
639     //
640     // copy the length into the mysql buffer
641     //
642     switch (length_bytes_in_mysql) {
643     case (0):
644         break;
645     case (1):
646         *to_mysql = (uchar) length;
647         break;
648     case (2):
649         int2store(to_mysql, length);
650         break;
651     case (3):
652         int3store(to_mysql, length);
653         break;
654     case (4):
655         int4store(to_mysql, length);
656         break;
657     default:
658         assert_unreachable();
659     }
660     //
661     // copy the binary data
662     //
663     memcpy(to_mysql + length_bytes_in_mysql, from_tokudb + length_bytes_in_tokudb, length);
664     return from_tokudb + length_bytes_in_tokudb+ length;
665 }
666 
cmp_toku_varbinary(uchar * a_buf,uchar * b_buf,uint32_t length_bytes,uint32_t * a_bytes_read,uint32_t * b_bytes_read)667 static inline int cmp_toku_varbinary(
668     uchar* a_buf,
669     uchar* b_buf,
670     uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
671     uint32_t* a_bytes_read,
672     uint32_t* b_bytes_read
673     )
674 {
675     int ret_val = 0;
676     uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
677     uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
678     ret_val = cmp_toku_binary(
679         a_buf + length_bytes,
680         a_len,
681         b_buf + length_bytes,
682         b_len
683         );
684     *a_bytes_read = a_len + length_bytes;
685     *b_bytes_read = b_len + length_bytes;
686     return ret_val;
687 }
688 
pack_toku_blob(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes,const CHARSET_INFO * charset)689 static inline uchar* pack_toku_blob(
690     uchar* to_tokudb,
691     uchar* from_mysql,
692     uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
693     uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
694     uint32_t max_num_bytes,
695 #if MYSQL_VERSION_ID >= 50600
696     const CHARSET_INFO* charset
697 #else
698     CHARSET_INFO* charset
699 #endif
700     )
701 {
702     uint32_t length = 0;
703     uint32_t local_char_length = 0;
704     uchar* blob_buf = NULL;
705 
706     switch (length_bytes_in_mysql) {
707     case (0):
708         length = max_num_bytes;
709         break;
710     case (1):
711         length = (uint32_t)(*from_mysql);
712         break;
713     case (2):
714         length = uint2korr(from_mysql);
715         break;
716     case (3):
717         length = tokudb_uint3korr(from_mysql);
718         break;
719     case (4):
720         length = uint4korr(from_mysql);
721         break;
722     }
723     set_if_smaller(length,max_num_bytes);
724 
725     memcpy(&blob_buf,from_mysql+length_bytes_in_mysql,sizeof(uchar *));
726 
727     local_char_length= ((charset->mbmaxlen > 1) ?
728                        max_num_bytes/charset->mbmaxlen : max_num_bytes);
729     if (length > local_char_length)
730     {
731       local_char_length= charset->charpos(
732         blob_buf,
733         blob_buf+length,
734         local_char_length
735         );
736       set_if_smaller(length, local_char_length);
737     }
738 
739 
740     //
741     // copy the length bytes, assuming both are in little endian
742     //
743     to_tokudb[0] = (uchar)length & 255;
744     if (length_bytes_in_tokudb > 1) {
745         to_tokudb[1] = (uchar) (length >> 8);
746     }
747     //
748     // copy the string
749     //
750     memcpy(to_tokudb + length_bytes_in_tokudb, blob_buf, length);
751     return to_tokudb + length + length_bytes_in_tokudb;
752 }
753 
754 
unpack_toku_blob(uchar * to_mysql,uchar * from_tokudb,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql)755 static inline uchar* unpack_toku_blob(
756     uchar* to_mysql,
757     uchar* from_tokudb,
758     uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
759     uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
760     )
761 {
762     uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
763     uchar* blob_pos = NULL;
764     //
765     // copy the length into the mysql buffer
766     //
767     switch (length_bytes_in_mysql) {
768     case (0):
769         break;
770     case (1):
771         *to_mysql = (uchar) length;
772         break;
773     case (2):
774         int2store(to_mysql, length);
775         break;
776     case (3):
777         int3store(to_mysql, length);
778         break;
779     case (4):
780         int4store(to_mysql, length);
781         break;
782     default:
783         assert_unreachable();
784     }
785     //
786     // copy the binary data
787     //
788     blob_pos = from_tokudb + length_bytes_in_tokudb;
789     memcpy(to_mysql + length_bytes_in_mysql, &blob_pos, sizeof(uchar *));
790     return from_tokudb + length_bytes_in_tokudb+ length;
791 }
792 
793 
794 //
795 // partially copied from below
796 //
pack_toku_varstring_from_desc(uchar * to_tokudb,const uchar * from_desc,uint32_t key_part_length,uint32_t field_length,uint32_t charset_num)797 static uchar* pack_toku_varstring_from_desc(
798     uchar* to_tokudb,
799     const uchar* from_desc,
800     uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
801     uint32_t field_length,
802     uint32_t charset_num//length of field
803     )
804 {
805     CHARSET_INFO* charset = NULL;
806     uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
807     uint32_t length = field_length;
808     uint32_t local_char_length = 0;
809     set_if_smaller(length, key_part_length);
810 
811     charset = get_charset_from_num(charset_num);
812 
813     //
814     // copy the string
815     //
816     local_char_length= ((charset->mbmaxlen > 1) ?
817                        key_part_length/charset->mbmaxlen : key_part_length);
818     if (length > local_char_length)
819     {
820       local_char_length= charset->charpos(
821         from_desc,
822         from_desc+length,
823         local_char_length
824         );
825       set_if_smaller(length, local_char_length);
826     }
827 
828 
829     //
830     // copy the length bytes, assuming both are in little endian
831     //
832     to_tokudb[0] = (uchar)length & 255;
833     if (length_bytes_in_tokudb > 1) {
834         to_tokudb[1] = (uchar) (length >> 8);
835     }
836     //
837     // copy the string
838     //
839     memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
840     return to_tokudb + length + length_bytes_in_tokudb;
841 }
842 
pack_toku_varstring(uchar * to_tokudb,uchar * from_mysql,uint32_t length_bytes_in_tokudb,uint32_t length_bytes_in_mysql,uint32_t max_num_bytes,const CHARSET_INFO * charset)843 static inline uchar* pack_toku_varstring(
844     uchar* to_tokudb,
845     uchar* from_mysql,
846     uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
847     uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
848     uint32_t max_num_bytes,
849 #if MYSQL_VERSION_ID >= 50600
850     const CHARSET_INFO *charset
851 #else
852     CHARSET_INFO* charset
853 #endif
854     )
855 {
856     uint32_t length = 0;
857     uint32_t local_char_length = 0;
858 
859     switch (length_bytes_in_mysql) {
860     case (0):
861         length = max_num_bytes;
862         break;
863     case (1):
864         length = (uint32_t)(*from_mysql);
865         break;
866     case (2):
867         length = uint2korr(from_mysql);
868         break;
869     case (3):
870         length = tokudb_uint3korr(from_mysql);
871         break;
872     case (4):
873         length = uint4korr(from_mysql);
874         break;
875     }
876     set_if_smaller(length,max_num_bytes);
877 
878     local_char_length= ((charset->mbmaxlen > 1) ?
879                        max_num_bytes/charset->mbmaxlen : max_num_bytes);
880     if (length > local_char_length)
881     {
882       local_char_length= charset->charpos(
883         from_mysql+length_bytes_in_mysql,
884         from_mysql+length_bytes_in_mysql+length,
885         local_char_length
886         );
887       set_if_smaller(length, local_char_length);
888     }
889 
890 
891     //
892     // copy the length bytes, assuming both are in little endian
893     //
894     to_tokudb[0] = (uchar)length & 255;
895     if (length_bytes_in_tokudb > 1) {
896         to_tokudb[1] = (uchar) (length >> 8);
897     }
898     //
899     // copy the string
900     //
901     memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
902     return to_tokudb + length + length_bytes_in_tokudb;
903 }
904 
cmp_toku_string(uchar * a_buf,uint32_t a_num_bytes,uchar * b_buf,uint32_t b_num_bytes,uint32_t charset_number)905 static inline int cmp_toku_string(
906     uchar* a_buf,
907     uint32_t a_num_bytes,
908     uchar* b_buf,
909     uint32_t b_num_bytes,
910     uint32_t charset_number
911     )
912 {
913     int ret_val = 0;
914     CHARSET_INFO* charset = NULL;
915 
916     charset = get_charset_from_num(charset_number);
917 
918     ret_val = charset->strnncollsp(
919         a_buf,
920         a_num_bytes,
921         b_buf,
922         b_num_bytes
923         );
924     return ret_val;
925 }
926 
cmp_toku_varstring(uchar * a_buf,uchar * b_buf,uint32_t length_bytes,uint32_t charset_num,uint32_t * a_bytes_read,uint32_t * b_bytes_read)927 static inline int cmp_toku_varstring(
928     uchar* a_buf,
929     uchar* b_buf,
930     uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
931     uint32_t charset_num,
932     uint32_t* a_bytes_read,
933     uint32_t* b_bytes_read
934     )
935 {
936     int ret_val = 0;
937     uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
938     uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
939     ret_val = cmp_toku_string(
940         a_buf + length_bytes,
941         a_len,
942         b_buf + length_bytes,
943         b_len,
944         charset_num
945         );
946     *a_bytes_read = a_len + length_bytes;
947     *b_bytes_read = b_len + length_bytes;
948     return ret_val;
949 }
950 
tokudb_compare_two_hidden_keys(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size)951 static inline int tokudb_compare_two_hidden_keys(
952     const void* new_key_data,
953     const uint32_t new_key_size,
954     const void*  saved_key_data,
955     const uint32_t saved_key_size
956     ) {
957     assert_always(new_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
958     assert_always(saved_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
959     ulonglong a = hpk_char_to_num((uchar *) new_key_data);
960     ulonglong b = hpk_char_to_num((uchar *) saved_key_data);
961     return a < b ? -1 : (a > b ? 1 : 0);
962 }
963 
964 //
965 // Returns number of bytes used for a given TOKU_TYPE
966 // in a key descriptor. The number of bytes returned
967 // here MUST match the number of bytes used for the encoding
968 // in create_toku_key_descriptor_for_key
969 // Parameters:
970 //      [in]    row_desc - buffer that contains portion of descriptor
971 //              created in create_toku_key_descriptor_for_key. The first
972 //              byte points to the TOKU_TYPE.
973 //
skip_field_in_descriptor(uchar * row_desc)974 static uint32_t skip_field_in_descriptor(uchar* row_desc) {
975     uchar* row_desc_pos = row_desc;
976     TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
977     row_desc_pos++;
978 
979     switch (toku_type) {
980     case (toku_type_hpk):
981     case (toku_type_double):
982     case (toku_type_float):
983         break;
984     case (toku_type_int):
985         row_desc_pos += 2;
986         break;
987     case (toku_type_fixbinary):
988     case (toku_type_varbinary):
989         row_desc_pos++;
990         break;
991     case (toku_type_fixstring):
992     case (toku_type_varstring):
993     case (toku_type_blob):
994         row_desc_pos++;
995         row_desc_pos += sizeof(uint32_t);
996         break;
997     default:
998         assert_unreachable();
999     }
1000     return (uint32_t)(row_desc_pos - row_desc);
1001 }
1002 
1003 //
1004 // outputs a descriptor for key into buf. Returns number of bytes used in buf
1005 // to store the descriptor. Number of bytes used MUST match number of bytes
1006 // we would skip in skip_field_in_descriptor
1007 //
create_toku_key_descriptor_for_key(KEY * key,uchar * buf)1008 static int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
1009     uchar* pos = buf;
1010     uint32_t num_bytes_in_field = 0;
1011     uint32_t charset_num = 0;
1012     for (uint i = 0; i < key->user_defined_key_parts; i++) {
1013         Field* field = key->key_part[i].field;
1014         //
1015         // The first byte states if there is a null byte
1016         // 0 means no null byte, non-zer means there
1017         // is one
1018         //
1019         *pos = field->null_bit;
1020         pos++;
1021 
1022         //
1023         // The second byte for each field is the type
1024         //
1025         TOKU_TYPE type = mysql_to_toku_type(field);
1026         assert_always((int)type < 256);
1027         *pos = (uchar)(type & 255);
1028         pos++;
1029 
1030         //
1031         // based on the type, extra data follows afterwards
1032         //
1033         switch (type) {
1034         //
1035         // two bytes follow for ints, first one states how many
1036         // bytes the int is (1 , 2, 3, 4 or 8)
1037         // next one states if it is signed or not
1038         //
1039         case (toku_type_int):
1040             num_bytes_in_field = field->pack_length();
1041             assert_always (num_bytes_in_field < 256);
1042             *pos = (uchar)(num_bytes_in_field & 255);
1043             pos++;
1044             *pos = (field->flags & UNSIGNED_FLAG) ? 1 : 0;
1045             pos++;
1046             break;
1047         //
1048         // nothing follows floats and doubles
1049         //
1050         case (toku_type_double):
1051         case (toku_type_float):
1052             break;
1053         //
1054         // one byte follow stating the length of the field
1055         //
1056         case (toku_type_fixbinary):
1057             num_bytes_in_field = field->pack_length();
1058             set_if_smaller(num_bytes_in_field, key->key_part[i].length);
1059             assert_always(num_bytes_in_field < 256);
1060             pos[0] = (uchar)(num_bytes_in_field & 255);
1061             pos++;
1062             break;
1063         //
1064         // one byte follows: the number of bytes used to encode the length
1065         //
1066         case (toku_type_varbinary):
1067             *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1068             pos++;
1069             break;
1070         //
1071         // five bytes follow: one for the number of bytes to encode the length,
1072         //                           four for the charset number
1073         //
1074         case (toku_type_fixstring):
1075         case (toku_type_varstring):
1076         case (toku_type_blob):
1077             *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1078             pos++;
1079             charset_num = field->charset()->number;
1080             pos[0] = (uchar)(charset_num & 255);
1081             pos[1] = (uchar)((charset_num >> 8) & 255);
1082             pos[2] = (uchar)((charset_num >> 16) & 255);
1083             pos[3] = (uchar)((charset_num >> 24) & 255);
1084             pos += 4;
1085             break;
1086         default:
1087             assert_unreachable();
1088         }
1089     }
1090     return pos - buf;
1091 }
1092 
1093 
1094 //
1095 // Creates a descriptor for a DB. That contains all information necessary
1096 // to do both key comparisons and data comparisons (for dup-sort databases).
1097 //
1098 // There are two types of descriptors we care about:
1099 // 1) Primary key, (in a no-dup database)
1100 // 2) secondary keys, which are a secondary key followed by a primary key,
1101 //      but in a no-dup database.
1102 //
1103 // I realize this may be confusing, but here is how it works.
1104 // All DB's have a key compare.
1105 // The format of the descriptor must be able to handle both.
1106 //
1107 // The first four bytes store an offset into the descriptor to the second piece
1108 // used for data comparisons. So, if in the future we want to append something
1109 // to the descriptor, we can.
1110 //
1111 //
create_toku_key_descriptor(uchar * buf,bool is_first_hpk,KEY * first_key,bool is_second_hpk,KEY * second_key)1112 static int create_toku_key_descriptor(
1113     uchar* buf,
1114     bool is_first_hpk,
1115     KEY* first_key,
1116     bool is_second_hpk,
1117     KEY* second_key
1118     )
1119 {
1120     //
1121     // The first four bytes always contain the offset of where the first key
1122     // ends.
1123     //
1124     uchar* pos = buf + 4;
1125     uint32_t num_bytes = 0;
1126     uint32_t offset = 0;
1127 
1128 
1129     if (is_first_hpk) {
1130         pos[0] = 0; //say there is NO infinity byte
1131         pos[1] = 0; //field cannot be NULL, stating it
1132         pos[2] = toku_type_hpk;
1133         pos += 3;
1134     }
1135     else {
1136         //
1137         // first key is NOT a hidden primary key, so we now pack first_key
1138         //
1139         pos[0] = 1; //say there is an infinity byte
1140         pos++;
1141         num_bytes = create_toku_key_descriptor_for_key(first_key, pos);
1142         pos += num_bytes;
1143     }
1144 
1145     //
1146     // if we do not have a second key, we can jump to exit right now
1147     // we do not have a second key if it is not a hidden primary key
1148     // and if second_key is NULL
1149     //
1150     if (is_first_hpk || (!is_second_hpk && (second_key == NULL)) ) {
1151         goto exit;
1152     }
1153 
1154     //
1155     // if we have a second key, and it is an hpk, we need to pack it, and
1156     // write in the offset to this position in the first four bytes
1157     //
1158     if (is_second_hpk) {
1159         pos[0] = 0; //field cannot be NULL, stating it
1160         pos[1] = toku_type_hpk;
1161         pos += 2;
1162     }
1163     else {
1164         //
1165         // second key is NOT a hidden primary key, so we now pack second_key
1166         //
1167         num_bytes = create_toku_key_descriptor_for_key(second_key, pos);
1168         pos += num_bytes;
1169     }
1170 
1171 
1172 exit:
1173     offset = pos - buf;
1174     buf[0] = (uchar)(offset & 255);
1175     buf[1] = (uchar)((offset >> 8) & 255);
1176     buf[2] = (uchar)((offset >> 16) & 255);
1177     buf[3] = (uchar)((offset >> 24) & 255);
1178 
1179     return pos - buf;
1180 }
1181 
1182 
compare_toku_field(uchar * a_buf,uchar * b_buf,uchar * row_desc,uint32_t * a_bytes_read,uint32_t * b_bytes_read,uint32_t * row_desc_bytes_read,bool * read_string)1183 static inline int compare_toku_field(
1184     uchar* a_buf,
1185     uchar* b_buf,
1186     uchar* row_desc,
1187     uint32_t* a_bytes_read,
1188     uint32_t* b_bytes_read,
1189     uint32_t* row_desc_bytes_read,
1190     bool* read_string
1191     )
1192 {
1193     int ret_val = 0;
1194     uchar* row_desc_pos = row_desc;
1195     uint32_t num_bytes = 0;
1196     uint32_t length_bytes = 0;
1197     uint32_t charset_num = 0;
1198     bool is_unsigned = false;
1199 
1200     TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
1201     row_desc_pos++;
1202 
1203     switch (toku_type) {
1204     case (toku_type_hpk):
1205         ret_val = tokudb_compare_two_hidden_keys(
1206             a_buf,
1207             TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH,
1208             b_buf,
1209             TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH
1210             );
1211         *a_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1212         *b_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1213         break;
1214     case (toku_type_int):
1215         num_bytes = row_desc_pos[0];
1216         is_unsigned = row_desc_pos[1];
1217         ret_val = cmp_toku_int(
1218             a_buf,
1219             b_buf,
1220             is_unsigned,
1221             num_bytes
1222             );
1223         *a_bytes_read = num_bytes;
1224         *b_bytes_read = num_bytes;
1225         row_desc_pos += 2;
1226         break;
1227     case (toku_type_double):
1228         ret_val = cmp_toku_double(a_buf, b_buf);
1229         *a_bytes_read = sizeof(double);
1230         *b_bytes_read = sizeof(double);
1231         break;
1232     case (toku_type_float):
1233         ret_val = cmp_toku_float(a_buf, b_buf);
1234         *a_bytes_read = sizeof(float);
1235         *b_bytes_read = sizeof(float);
1236         break;
1237     case (toku_type_fixbinary):
1238         num_bytes = row_desc_pos[0];
1239         ret_val = cmp_toku_binary(a_buf, num_bytes, b_buf,num_bytes);
1240         *a_bytes_read = num_bytes;
1241         *b_bytes_read = num_bytes;
1242         row_desc_pos++;
1243         break;
1244     case (toku_type_varbinary):
1245         length_bytes = row_desc_pos[0];
1246         ret_val = cmp_toku_varbinary(
1247             a_buf,
1248             b_buf,
1249             length_bytes,
1250             a_bytes_read,
1251             b_bytes_read
1252             );
1253         row_desc_pos++;
1254         break;
1255     case (toku_type_fixstring):
1256     case (toku_type_varstring):
1257     case (toku_type_blob):
1258         length_bytes = row_desc_pos[0];
1259         row_desc_pos++;
1260         //
1261         // not sure we want to read charset_num like this
1262         //
1263         charset_num = *(uint32_t *)row_desc_pos;
1264         row_desc_pos += sizeof(uint32_t);
1265         ret_val = cmp_toku_varstring(
1266             a_buf,
1267             b_buf,
1268             length_bytes,
1269             charset_num,
1270             a_bytes_read,
1271             b_bytes_read
1272             );
1273         *read_string = true;
1274         break;
1275     default:
1276         assert_unreachable();
1277     }
1278 
1279     *row_desc_bytes_read = row_desc_pos - row_desc;
1280     return ret_val;
1281 }
1282 
1283 //
1284 // packs a field from a  MySQL buffer into a tokudb buffer.
1285 // Used for inserts/updates
1286 //
pack_toku_key_field(uchar * to_tokudb,uchar * from_mysql,Field * field,uint32_t key_part_length)1287 static uchar* pack_toku_key_field(
1288     uchar* to_tokudb,
1289     uchar* from_mysql,
1290     Field* field,
1291     uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1292     )
1293 {
1294     uchar* new_pos = NULL;
1295     uint32_t num_bytes = 0;
1296     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1297     switch(toku_type) {
1298     case (toku_type_int):
1299         assert_always(key_part_length == field->pack_length());
1300         new_pos = pack_toku_int(
1301             to_tokudb,
1302             from_mysql,
1303             field->pack_length()
1304             );
1305         goto exit;
1306     case (toku_type_double):
1307         assert_always(field->pack_length() == sizeof(double));
1308         assert_always(key_part_length == sizeof(double));
1309         new_pos = pack_toku_double(to_tokudb, from_mysql);
1310         goto exit;
1311     case (toku_type_float):
1312         assert_always(field->pack_length() == sizeof(float));
1313         assert_always(key_part_length == sizeof(float));
1314         new_pos = pack_toku_float(to_tokudb, from_mysql);
1315         goto exit;
1316     case (toku_type_fixbinary):
1317         num_bytes = field->pack_length();
1318         set_if_smaller(num_bytes, key_part_length);
1319         new_pos = pack_toku_binary(
1320             to_tokudb,
1321             from_mysql,
1322             num_bytes
1323             );
1324         goto exit;
1325     case (toku_type_fixstring):
1326         num_bytes = field->pack_length();
1327         set_if_smaller(num_bytes, key_part_length);
1328         new_pos = pack_toku_varstring(
1329             to_tokudb,
1330             from_mysql,
1331             get_length_bytes_from_max(key_part_length),
1332             0,
1333             num_bytes,
1334             field->charset()
1335             );
1336         goto exit;
1337     case (toku_type_varbinary):
1338         new_pos = pack_toku_varbinary(
1339             to_tokudb,
1340             from_mysql,
1341             ((Field_varstring *)field)->length_bytes,
1342             key_part_length
1343             );
1344         goto exit;
1345     case (toku_type_varstring):
1346         new_pos = pack_toku_varstring(
1347             to_tokudb,
1348             from_mysql,
1349             get_length_bytes_from_max(key_part_length),
1350             ((Field_varstring *)field)->length_bytes,
1351             key_part_length,
1352             field->charset()
1353             );
1354         goto exit;
1355     case (toku_type_blob):
1356         new_pos = pack_toku_blob(
1357             to_tokudb,
1358             from_mysql,
1359             get_length_bytes_from_max(key_part_length),
1360             ((Field_blob *)field)->row_pack_length(), //only calling this because packlength is returned
1361             key_part_length,
1362             field->charset()
1363             );
1364         goto exit;
1365     default:
1366         assert_unreachable();
1367     }
1368     assert_unreachable();
1369 exit:
1370     return new_pos;
1371 }
1372 
1373 //
1374 // packs a field from a  MySQL buffer into a tokudb buffer.
1375 // Used for queries. The only difference between this function
1376 // and pack_toku_key_field is that all variable sized columns
1377 // use 2 bytes to encode the length, regardless of the field
1378 // So varchar(4) will still use 2 bytes to encode the field
1379 //
pack_key_toku_key_field(uchar * to_tokudb,uchar * from_mysql,Field * field,uint32_t key_part_length)1380 static uchar* pack_key_toku_key_field(
1381     uchar* to_tokudb,
1382     uchar* from_mysql,
1383     Field* field,
1384     uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1385     )
1386 {
1387     uchar* new_pos = NULL;
1388     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1389     switch(toku_type) {
1390     case (toku_type_int):
1391     case (toku_type_double):
1392     case (toku_type_float):
1393     case (toku_type_fixbinary):
1394     case (toku_type_fixstring):
1395         new_pos = pack_toku_key_field(to_tokudb, from_mysql, field, key_part_length);
1396         goto exit;
1397     case (toku_type_varbinary):
1398         new_pos = pack_toku_varbinary(
1399             to_tokudb,
1400             from_mysql,
1401             2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1402             key_part_length
1403             );
1404         goto exit;
1405     case (toku_type_varstring):
1406     case (toku_type_blob):
1407         new_pos = pack_toku_varstring(
1408             to_tokudb,
1409             from_mysql,
1410             get_length_bytes_from_max(key_part_length),
1411             2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1412             key_part_length,
1413             field->charset()
1414             );
1415         goto exit;
1416     default:
1417         assert_unreachable();
1418     }
1419 
1420     assert_unreachable();
1421 exit:
1422     return new_pos;
1423 }
1424 
1425 
unpack_toku_key_field(uchar * to_mysql,uchar * from_tokudb,Field * field,uint32_t key_part_length)1426 uchar* unpack_toku_key_field(
1427     uchar* to_mysql,
1428     uchar* from_tokudb,
1429     Field* field,
1430     uint32_t key_part_length) {
1431 
1432     uchar* new_pos = NULL;
1433     uint32_t num_bytes = 0;
1434     uint32_t num_bytes_copied;
1435     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1436     switch(toku_type) {
1437     case (toku_type_int):
1438         assert_always(key_part_length == field->pack_length());
1439         new_pos = unpack_toku_int(
1440             to_mysql,
1441             from_tokudb,
1442             field->pack_length()
1443             );
1444         goto exit;
1445     case (toku_type_double):
1446         assert_always(field->pack_length() == sizeof(double));
1447         assert_always(key_part_length == sizeof(double));
1448         new_pos = unpack_toku_double(to_mysql, from_tokudb);
1449         goto exit;
1450     case (toku_type_float):
1451         assert_always(field->pack_length() == sizeof(float));
1452         assert_always(key_part_length == sizeof(float));
1453         new_pos = unpack_toku_float(to_mysql, from_tokudb);
1454         goto exit;
1455     case (toku_type_fixbinary):
1456         num_bytes = field->pack_length();
1457         set_if_smaller(num_bytes, key_part_length);
1458         new_pos = unpack_toku_binary(
1459             to_mysql,
1460             from_tokudb,
1461             num_bytes);
1462         goto exit;
1463     case (toku_type_fixstring):
1464         num_bytes = field->pack_length();
1465         new_pos = unpack_toku_varbinary(
1466             to_mysql,
1467             from_tokudb,
1468             get_length_bytes_from_max(key_part_length),
1469             0);
1470         num_bytes_copied =
1471             new_pos -
1472             (from_tokudb + get_length_bytes_from_max(key_part_length));
1473         assert_always(num_bytes_copied <= num_bytes);
1474         memset(
1475             to_mysql + num_bytes_copied,
1476             field->charset()->pad_char,
1477             num_bytes - num_bytes_copied);
1478         goto exit;
1479     case (toku_type_varbinary):
1480     case (toku_type_varstring):
1481         new_pos = unpack_toku_varbinary(
1482             to_mysql,
1483             from_tokudb,
1484             get_length_bytes_from_max(key_part_length),
1485             ((Field_varstring*)field)->length_bytes);
1486         goto exit;
1487     case (toku_type_blob):
1488         new_pos = unpack_toku_blob(
1489             to_mysql,
1490             from_tokudb,
1491             get_length_bytes_from_max(key_part_length),
1492             //only calling this because packlength is returned
1493             ((Field_blob *)field)->row_pack_length());
1494         goto exit;
1495     default:
1496         assert_unreachable();
1497     }
1498     assert_unreachable();
1499 exit:
1500     return new_pos;
1501 }
1502 
1503 
tokudb_compare_two_keys(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size,const void * row_desc,const uint32_t row_desc_size,bool cmp_prefix,bool * read_string)1504 static int tokudb_compare_two_keys(
1505     const void* new_key_data,
1506     const uint32_t new_key_size,
1507     const void*  saved_key_data,
1508     const uint32_t saved_key_size,
1509     const void*  row_desc,
1510     const uint32_t row_desc_size,
1511     bool cmp_prefix,
1512     bool* read_string) {
1513 
1514     int ret_val = 0;
1515     int8_t new_key_inf_val = COL_NEG_INF;
1516     int8_t saved_key_inf_val = COL_NEG_INF;
1517 
1518     uchar* row_desc_ptr = (uchar *)row_desc;
1519     uchar *new_key_ptr = (uchar *)new_key_data;
1520     uchar *saved_key_ptr = (uchar *)saved_key_data;
1521 
1522     uint32_t new_key_bytes_left = new_key_size;
1523     uint32_t saved_key_bytes_left = saved_key_size;
1524 
1525     //
1526     // if the keys have an infinity byte, set it
1527     //
1528     if (row_desc_ptr[0]) {
1529         new_key_inf_val = (int8_t)new_key_ptr[0];
1530         saved_key_inf_val = (int8_t)saved_key_ptr[0];
1531         new_key_ptr++;
1532         saved_key_ptr++;
1533     }
1534     row_desc_ptr++;
1535 
1536     while ((uint32_t)(new_key_ptr - (uchar*)new_key_data) < new_key_size &&
1537            (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) < saved_key_size &&
1538            (uint32_t)(row_desc_ptr - (uchar*)row_desc) < row_desc_size) {
1539         uint32_t new_key_field_length;
1540         uint32_t saved_key_field_length;
1541         uint32_t row_desc_field_length;
1542         //
1543         // if there is a null byte at this point in the key
1544         //
1545         if (row_desc_ptr[0]) {
1546             //
1547             // compare null bytes. If different, return
1548             //
1549             if (new_key_ptr[0] != saved_key_ptr[0]) {
1550                 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1551                 goto exit;
1552             }
1553             saved_key_ptr++;
1554             //
1555             // in case we just read the fact that new_key_ptr and saved_key_ptr
1556             // have NULL as their next field
1557             //
1558             if (!*new_key_ptr++) {
1559                 //
1560                 // skip row_desc_ptr[0] read in if clause
1561                 //
1562                 row_desc_ptr++;
1563                 //
1564                 // skip data that describes rest of field
1565                 //
1566                 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1567                 continue;
1568             }
1569         }
1570         row_desc_ptr++;
1571 
1572         ret_val = compare_toku_field(
1573             new_key_ptr,
1574             saved_key_ptr,
1575             row_desc_ptr,
1576             &new_key_field_length,
1577             &saved_key_field_length,
1578             &row_desc_field_length,
1579             read_string);
1580         new_key_ptr += new_key_field_length;
1581         saved_key_ptr += saved_key_field_length;
1582         row_desc_ptr += row_desc_field_length;
1583         if (ret_val) {
1584             goto exit;
1585         }
1586 
1587         assert_always(
1588             (uint32_t)(new_key_ptr - (uchar*)new_key_data) <= new_key_size);
1589         assert_always(
1590             (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) <= saved_key_size);
1591         assert_always(
1592             (uint32_t)(row_desc_ptr - (uchar*)row_desc) <= row_desc_size);
1593     }
1594     new_key_bytes_left =
1595         new_key_size - ((uint32_t)(new_key_ptr - (uchar*)new_key_data));
1596     saved_key_bytes_left =
1597         saved_key_size - ((uint32_t)(saved_key_ptr - (uchar*)saved_key_data));
1598     if (cmp_prefix) {
1599         ret_val = 0;
1600     } else if (new_key_bytes_left== 0 && saved_key_bytes_left== 0) {
1601         // in this case, read both keys to completion, now read infinity byte
1602         ret_val = new_key_inf_val - saved_key_inf_val;
1603     } else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) {
1604         // at this point, one SHOULD be 0
1605         ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1;
1606     } else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) {
1607         ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1;
1608     } else {
1609         // this should never happen, perhaps we should assert(false)
1610         assert_unreachable();
1611         ret_val = new_key_bytes_left - saved_key_bytes_left;
1612     }
1613 exit:
1614     return ret_val;
1615 }
1616 
simple_memcmp(const DBT * keya,const DBT * keyb)1617 static int simple_memcmp(const DBT *keya, const DBT *keyb) {
1618     int cmp;
1619     int num_bytes_cmp = keya->size < keyb->size ?
1620         keya->size : keyb->size;
1621     cmp = memcmp(keya->data,keyb->data,num_bytes_cmp);
1622     if (cmp == 0 && (keya->size != keyb->size)) {
1623         cmp = keya->size < keyb->size ? -1 : 1;
1624     }
1625     return cmp;
1626 }
1627 
1628 // comparison function to be used by the fractal trees.
tokudb_cmp_dbt_key(DB * file,const DBT * keya,const DBT * keyb)1629 static int tokudb_cmp_dbt_key(DB* file, const DBT *keya, const DBT *keyb) {
1630     int cmp;
1631     if (file->cmp_descriptor->dbt.size == 0) {
1632         cmp = simple_memcmp(keya, keyb);
1633     }
1634     else {
1635         bool read_string = false;
1636         cmp = tokudb_compare_two_keys(
1637             keya->data,
1638             keya->size,
1639             keyb->data,
1640             keyb->size,
1641             (uchar *)file->cmp_descriptor->dbt.data + 4,
1642             (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1643             false,
1644             &read_string
1645             );
1646         // comparison above may be case-insensitive, but fractal tree
1647         // needs to distinguish between different data, so we do this
1648         // additional check here
1649         if (read_string && (cmp == 0)) {
1650             cmp = simple_memcmp(keya, keyb);
1651         }
1652     }
1653     return cmp;
1654 }
1655 
1656 //TODO: QQQ Only do one direction for prefix.
tokudb_prefix_cmp_dbt_key(DB * file,const DBT * keya,const DBT * keyb)1657 static int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb) {
1658     // calls to this function are done by the handlerton, and are
1659     // comparing just the keys as MySQL would compare them.
1660     bool read_string = false;
1661     int cmp = tokudb_compare_two_keys(
1662         keya->data,
1663         keya->size,
1664         keyb->data,
1665         keyb->size,
1666         (uchar *)file->cmp_descriptor->dbt.data + 4,
1667         *(uint32_t *)file->cmp_descriptor->dbt.data - 4,
1668         true,
1669         &read_string
1670         );
1671     return cmp;
1672 }
1673 
tokudb_compare_two_key_parts(const void * new_key_data,const uint32_t new_key_size,const void * saved_key_data,const uint32_t saved_key_size,const void * row_desc,const uint32_t row_desc_size,uint max_parts)1674 static int tokudb_compare_two_key_parts(
1675     const void* new_key_data,
1676     const uint32_t new_key_size,
1677     const void*  saved_key_data,
1678     const uint32_t saved_key_size,
1679     const void*  row_desc,
1680     const uint32_t row_desc_size,
1681     uint max_parts
1682     )
1683 {
1684     int ret_val = 0;
1685 
1686     uchar* row_desc_ptr = (uchar *)row_desc;
1687     uchar *new_key_ptr = (uchar *)new_key_data;
1688     uchar *saved_key_ptr = (uchar *)saved_key_data;
1689 
1690     //
1691     // if the keys have an infinity byte, set it
1692     //
1693     if (row_desc_ptr[0]) {
1694         // new_key_inf_val = (int8_t)new_key_ptr[0];
1695         // saved_key_inf_val = (int8_t)saved_key_ptr[0];
1696         new_key_ptr++;
1697         saved_key_ptr++;
1698     }
1699     row_desc_ptr++;
1700 
1701     for (uint i = 0; i < max_parts; i++) {
1702         if (!((uint32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size &&
1703                (uint32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size &&
1704                (uint32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size))
1705             break;
1706         uint32_t new_key_field_length;
1707         uint32_t saved_key_field_length;
1708         uint32_t row_desc_field_length;
1709         //
1710         // if there is a null byte at this point in the key
1711         //
1712         if (row_desc_ptr[0]) {
1713             //
1714             // compare null bytes. If different, return
1715             //
1716             if (new_key_ptr[0] != saved_key_ptr[0]) {
1717                 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1718                 goto exit;
1719             }
1720             saved_key_ptr++;
1721             //
1722             // in case we just read the fact that new_key_ptr and saved_key_ptr
1723             // have NULL as their next field
1724             //
1725             if (!*new_key_ptr++) {
1726                 //
1727                 // skip row_desc_ptr[0] read in if clause
1728                 //
1729                 row_desc_ptr++;
1730                 //
1731                 // skip data that describes rest of field
1732                 //
1733                 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1734                 continue;
1735             }
1736         }
1737         row_desc_ptr++;
1738         bool read_string = false;
1739         ret_val = compare_toku_field(
1740             new_key_ptr,
1741             saved_key_ptr,
1742             row_desc_ptr,
1743             &new_key_field_length,
1744             &saved_key_field_length,
1745             &row_desc_field_length,
1746             &read_string
1747             );
1748         new_key_ptr += new_key_field_length;
1749         saved_key_ptr += saved_key_field_length;
1750         row_desc_ptr += row_desc_field_length;
1751         if (ret_val) {
1752             goto exit;
1753         }
1754 
1755         assert_always((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
1756         assert_always((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
1757         assert_always((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
1758     }
1759 
1760     ret_val = 0;
1761 exit:
1762     return ret_val;
1763 }
1764 
tokudb_cmp_dbt_key_parts(DB * file,const DBT * keya,const DBT * keyb,uint max_parts)1765 static int tokudb_cmp_dbt_key_parts(DB *file, const DBT *keya, const DBT *keyb, uint max_parts) {
1766     assert_always(file->cmp_descriptor->dbt.size);
1767     return tokudb_compare_two_key_parts(
1768             keya->data,
1769             keya->size,
1770             keyb->data,
1771             keyb->size,
1772             (uchar *)file->cmp_descriptor->dbt.data + 4,
1773             (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1774             max_parts);
1775 }
1776 
create_toku_main_key_pack_descriptor(uchar * buf)1777 static uint32_t create_toku_main_key_pack_descriptor (
1778     uchar* buf
1779     )
1780 {
1781     //
1782     // The first four bytes always contain the offset of where the first key
1783     // ends.
1784     //
1785     uchar* pos = buf + 4;
1786     uint32_t offset = 0;
1787     //
1788     // one byte states if this is the main dictionary
1789     //
1790     pos[0] = 1;
1791     pos++;
1792     goto exit;
1793 
1794 
1795 exit:
1796     offset = pos - buf;
1797     buf[0] = (uchar)(offset & 255);
1798     buf[1] = (uchar)((offset >> 8) & 255);
1799     buf[2] = (uchar)((offset >> 16) & 255);
1800     buf[3] = (uchar)((offset >> 24) & 255);
1801 
1802     return pos - buf;
1803 }
1804 
1805 #define COL_HAS_NO_CHARSET 0x44
1806 #define COL_HAS_CHARSET 0x55
1807 
1808 #define COL_FIX_PK_OFFSET 0x66
1809 #define COL_VAR_PK_OFFSET 0x77
1810 
1811 #define CK_FIX_RANGE 0x88
1812 #define CK_VAR_RANGE 0x99
1813 
1814 #define COPY_OFFSET_TO_BUF  memcpy ( \
1815     pos, \
1816     &kc_info->cp_info[pk_index][field_index].col_pack_val, \
1817     sizeof(uint32_t) \
1818     ); \
1819     pos += sizeof(uint32_t);
1820 
1821 
pack_desc_pk_info(uchar * buf,KEY_AND_COL_INFO * kc_info,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1822 static uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1823     uchar* pos = buf;
1824     uint16 field_index = key_part->field->field_index;
1825     Field* field = table_share->field[field_index];
1826     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1827     uint32_t key_part_length = key_part->length;
1828     uint32_t field_length;
1829     uchar len_bytes = 0;
1830 
1831     switch(toku_type) {
1832     case (toku_type_int):
1833     case (toku_type_double):
1834     case (toku_type_float):
1835         pos[0] = COL_FIX_FIELD;
1836         pos++;
1837         assert_always(kc_info->field_lengths[field_index] < 256);
1838         pos[0] = kc_info->field_lengths[field_index];
1839         pos++;
1840         break;
1841     case (toku_type_fixbinary):
1842         pos[0] = COL_FIX_FIELD;
1843         pos++;
1844         field_length = field->pack_length();
1845         set_if_smaller(key_part_length, field_length);
1846         assert_always(key_part_length < 256);
1847         pos[0] = (uchar)key_part_length;
1848         pos++;
1849         break;
1850     case (toku_type_fixstring):
1851     case (toku_type_varbinary):
1852     case (toku_type_varstring):
1853     case (toku_type_blob):
1854         pos[0] = COL_VAR_FIELD;
1855         pos++;
1856         len_bytes = (key_part_length > 255) ? 2 : 1;
1857         pos[0] = len_bytes;
1858         pos++;
1859         break;
1860     default:
1861         assert_unreachable();
1862     }
1863 
1864     return pos - buf;
1865 }
1866 
pack_desc_pk_offset_info(uchar * buf,KEY_PART_INFO * key_part,KEY * prim_key,uchar * pk_info)1867 static uint32_t pack_desc_pk_offset_info(uchar* buf,
1868                                          KEY_PART_INFO* key_part,
1869                                          KEY* prim_key,
1870                                          uchar* pk_info) {
1871     uchar* pos = buf;
1872     uint16 field_index = key_part->field->field_index;
1873     bool found_col_in_pk = false;
1874     uint32_t index_in_pk;
1875 
1876     bool is_constant_offset = true;
1877     uint32_t offset = 0;
1878     for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
1879         KEY_PART_INFO curr = prim_key->key_part[i];
1880         uint16 curr_field_index = curr.field->field_index;
1881 
1882         if (pk_info[2*i] == COL_VAR_FIELD) {
1883             is_constant_offset = false;
1884         }
1885 
1886         if (curr_field_index == field_index) {
1887             found_col_in_pk = true;
1888             index_in_pk = i;
1889             break;
1890         }
1891         offset += pk_info[2*i + 1];
1892     }
1893     assert_always(found_col_in_pk);
1894     if (is_constant_offset) {
1895         pos[0] = COL_FIX_PK_OFFSET;
1896         pos++;
1897 
1898         memcpy (pos, &offset, sizeof(offset));
1899         pos += sizeof(offset);
1900     }
1901     else {
1902         pos[0] = COL_VAR_PK_OFFSET;
1903         pos++;
1904 
1905         memcpy(pos, &index_in_pk, sizeof(index_in_pk));
1906         pos += sizeof(index_in_pk);
1907     }
1908     return pos - buf;
1909 }
1910 
pack_desc_offset_info(uchar * buf,KEY_AND_COL_INFO * kc_info,uint pk_index,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1911 static uint32_t pack_desc_offset_info(uchar* buf, KEY_AND_COL_INFO* kc_info, uint pk_index, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1912     uchar* pos = buf;
1913     uint16 field_index = key_part->field->field_index;
1914     Field* field = table_share->field[field_index];
1915     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1916     bool found_index = false;
1917 
1918     switch(toku_type) {
1919     case (toku_type_int):
1920     case (toku_type_double):
1921     case (toku_type_float):
1922     case (toku_type_fixbinary):
1923     case (toku_type_fixstring):
1924         pos[0] = COL_FIX_FIELD;
1925         pos++;
1926 
1927         // copy the offset
1928         COPY_OFFSET_TO_BUF;
1929         break;
1930     case (toku_type_varbinary):
1931     case (toku_type_varstring):
1932         pos[0] = COL_VAR_FIELD;
1933         pos++;
1934 
1935         // copy the offset
1936         COPY_OFFSET_TO_BUF;
1937         break;
1938     case (toku_type_blob):
1939         pos[0] = COL_BLOB_FIELD;
1940         pos++;
1941         for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
1942             uint32_t blob_index = kc_info->blob_fields[i];
1943             if (blob_index == field_index) {
1944                 uint32_t val = i;
1945                 memcpy(pos, &val, sizeof(uint32_t));
1946                 pos += sizeof(uint32_t);
1947                 found_index = true;
1948                 break;
1949             }
1950         }
1951         assert_always(found_index);
1952         break;
1953     default:
1954         assert_unreachable();
1955     }
1956 
1957     return pos - buf;
1958 }
1959 
pack_desc_key_length_info(uchar * buf,KEY_AND_COL_INFO * kc_info,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1960 static uint32_t pack_desc_key_length_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1961     uchar* pos = buf;
1962     uint16 field_index = key_part->field->field_index;
1963     Field* field = table_share->field[field_index];
1964     TOKU_TYPE toku_type = mysql_to_toku_type(field);
1965     uint32_t key_part_length = key_part->length;
1966     uint32_t field_length;
1967 
1968     switch(toku_type) {
1969     case (toku_type_int):
1970     case (toku_type_double):
1971     case (toku_type_float):
1972         // copy the key_part length
1973         field_length = kc_info->field_lengths[field_index];
1974         memcpy(pos, &field_length, sizeof(field_length));
1975         pos += sizeof(key_part_length);
1976         break;
1977     case (toku_type_fixbinary):
1978     case (toku_type_fixstring):
1979         field_length = field->pack_length();
1980         set_if_smaller(key_part_length, field_length);
1981         // fallthrough
1982     case (toku_type_varbinary):
1983     case (toku_type_varstring):
1984     case (toku_type_blob):
1985         // copy the key_part length
1986         memcpy(pos, &key_part_length, sizeof(key_part_length));
1987         pos += sizeof(key_part_length);
1988         break;
1989     default:
1990         assert_unreachable();
1991     }
1992 
1993     return pos - buf;
1994 }
1995 
pack_desc_char_info(uchar * buf,TABLE_SHARE * table_share,KEY_PART_INFO * key_part)1996 static uint32_t pack_desc_char_info(uchar* buf,
1997                                     TABLE_SHARE* table_share,
1998                                     KEY_PART_INFO* key_part) {
1999     uchar* pos = buf;
2000     uint16 field_index = key_part->field->field_index;
2001     Field* field = table_share->field[field_index];
2002     TOKU_TYPE toku_type = mysql_to_toku_type(field);
2003     uint32_t charset_num = 0;
2004 
2005     switch(toku_type) {
2006     case (toku_type_int):
2007     case (toku_type_double):
2008     case (toku_type_float):
2009     case (toku_type_fixbinary):
2010     case (toku_type_varbinary):
2011         pos[0] = COL_HAS_NO_CHARSET;
2012         pos++;
2013         break;
2014     case (toku_type_fixstring):
2015     case (toku_type_varstring):
2016     case (toku_type_blob):
2017         pos[0] = COL_HAS_CHARSET;
2018         pos++;
2019 
2020         // copy the charset
2021         charset_num = field->charset()->number;
2022         pos[0] = (uchar)(charset_num & 255);
2023         pos[1] = (uchar)((charset_num >> 8) & 255);
2024         pos[2] = (uchar)((charset_num >> 16) & 255);
2025         pos[3] = (uchar)((charset_num >> 24) & 255);
2026         pos += 4;
2027         break;
2028     default:
2029         assert_unreachable();
2030     }
2031 
2032     return pos - buf;
2033 }
2034 
pack_some_row_info(uchar * buf,uint pk_index,TABLE_SHARE * table_share,KEY_AND_COL_INFO * kc_info)2035 static uint32_t pack_some_row_info (
2036     uchar* buf,
2037     uint pk_index,
2038     TABLE_SHARE* table_share,
2039     KEY_AND_COL_INFO* kc_info
2040     )
2041 {
2042     uchar* pos = buf;
2043     uint32_t num_null_bytes = 0;
2044     //
2045     // four bytes stating number of null bytes
2046     //
2047     num_null_bytes = table_share->null_bytes;
2048     memcpy(pos, &num_null_bytes, sizeof(num_null_bytes));
2049     pos += sizeof(num_null_bytes);
2050     //
2051     // eight bytes stating mcp_info
2052     //
2053     memcpy(pos, &kc_info->mcp_info[pk_index], sizeof(MULTI_COL_PACK_INFO));
2054     pos += sizeof(MULTI_COL_PACK_INFO);
2055     //
2056     // one byte for the number of offset bytes
2057     //
2058     pos[0] = (uchar)kc_info->num_offset_bytes;
2059     pos++;
2060 
2061     return pos - buf;
2062 }
2063 
get_max_clustering_val_pack_desc_size(TABLE_SHARE * table_share)2064 static uint32_t get_max_clustering_val_pack_desc_size(
2065     TABLE_SHARE* table_share
2066     )
2067 {
2068     uint32_t ret_val = 0;
2069     //
2070     // the fixed stuff:
2071     //  first the things in pack_some_row_info
2072     //  second another mcp_info
2073     //  third a byte that states if blobs exist
2074     ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2075     ret_val += sizeof(MULTI_COL_PACK_INFO);
2076     ret_val++;
2077     //
2078     // now the variable stuff
2079     //  an upper bound is, for each field, byte stating if it is fixed or var, followed
2080     // by 8 bytes for endpoints
2081     //
2082     ret_val += (table_share->fields)*(1 + 2*sizeof(uint32_t));
2083     //
2084     // four bytes storing the length of this portion
2085     //
2086     ret_val += 4;
2087 
2088     return ret_val;
2089 }
2090 
create_toku_clustering_val_pack_descriptor(uchar * buf,uint pk_index,TABLE_SHARE * table_share,KEY_AND_COL_INFO * kc_info,uint32_t keynr,bool is_clustering)2091 static uint32_t create_toku_clustering_val_pack_descriptor (
2092     uchar* buf,
2093     uint pk_index,
2094     TABLE_SHARE* table_share,
2095     KEY_AND_COL_INFO* kc_info,
2096     uint32_t keynr,
2097     bool is_clustering
2098     )
2099 {
2100     uchar* pos = buf + 4;
2101     uint32_t offset = 0;
2102     bool start_range_set = false;
2103     uint32_t last_col = 0;
2104     //
2105     // do not need to write anything if the key is not clustering
2106     //
2107     if (!is_clustering) {
2108         goto exit;
2109     }
2110 
2111     pos += pack_some_row_info(
2112         pos,
2113         pk_index,
2114         table_share,
2115         kc_info
2116         );
2117 
2118     //
2119     // eight bytes stating mcp_info of clustering key
2120     //
2121     memcpy(pos, &kc_info->mcp_info[keynr], sizeof(MULTI_COL_PACK_INFO));
2122     pos += sizeof(MULTI_COL_PACK_INFO);
2123 
2124     //
2125     // store bit that states if blobs exist
2126     //
2127     pos[0] = (kc_info->num_blobs) ? 1 : 0;
2128     pos++;
2129 
2130     //
2131     // descriptor assumes that all fields filtered from pk are
2132     // also filtered from clustering key val. Doing check here to
2133     // make sure something unexpected does not happen
2134     //
2135     for (uint i = 0; i < table_share->fields; i++) {
2136         bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2137         bool col_filtered_in_pk = bitmap_is_set(&kc_info->key_filters[pk_index],i);
2138         if (col_filtered_in_pk) {
2139             assert_always(col_filtered);
2140         }
2141     }
2142 
2143     //
2144     // first handle the fixed fields
2145     //
2146     start_range_set = false;
2147     last_col = 0;
2148     for (uint i = 0; i < table_share->fields; i++) {
2149         bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2150         if (!is_fixed_field(kc_info, i)) {
2151             //
2152             // not a fixed field, continue
2153             //
2154             continue;
2155         }
2156         if (col_filtered && start_range_set) {
2157             //
2158             // need to set the end range
2159             //
2160             start_range_set = false;
2161             uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val + kc_info->field_lengths[last_col];
2162             memcpy(pos, &end_offset, sizeof(end_offset));
2163             pos += sizeof(end_offset);
2164         }
2165         else if (!col_filtered) {
2166             if (!start_range_set) {
2167                 pos[0] = CK_FIX_RANGE;
2168                 pos++;
2169                 start_range_set = true;
2170                 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2171                 memcpy(pos, &start_offset , sizeof(start_offset));
2172                 pos += sizeof(start_offset);
2173             }
2174             last_col = i;
2175         }
2176         else {
2177             continue;
2178         }
2179     }
2180     if (start_range_set) {
2181         //
2182         // need to set the end range
2183         //
2184         start_range_set = false;
2185         uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val+ kc_info->field_lengths[last_col];
2186         memcpy(pos, &end_offset, sizeof(end_offset));
2187         pos += sizeof(end_offset);
2188     }
2189 
2190     //
2191     // now handle the var fields
2192     //
2193     start_range_set = false;
2194     last_col = 0;
2195     for (uint i = 0; i < table_share->fields; i++) {
2196         bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2197         if (!is_variable_field(kc_info, i)) {
2198             //
2199             // not a var field, continue
2200             //
2201             continue;
2202         }
2203         if (col_filtered && start_range_set) {
2204             //
2205             // need to set the end range
2206             //
2207             start_range_set = false;
2208             uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2209             memcpy(pos, &end_offset, sizeof(end_offset));
2210             pos += sizeof(end_offset);
2211         }
2212         else if (!col_filtered) {
2213             if (!start_range_set) {
2214                 pos[0] = CK_VAR_RANGE;
2215                 pos++;
2216 
2217                 start_range_set = true;
2218                 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2219                 memcpy(pos, &start_offset , sizeof(start_offset));
2220                 pos += sizeof(start_offset);
2221             }
2222             last_col = i;
2223         }
2224         else {
2225             continue;
2226         }
2227     }
2228     if (start_range_set) {
2229         start_range_set = false;
2230         uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2231         memcpy(pos, &end_offset, sizeof(end_offset));
2232         pos += sizeof(end_offset);
2233     }
2234 
2235 exit:
2236     offset = pos - buf;
2237     buf[0] = (uchar)(offset & 255);
2238     buf[1] = (uchar)((offset >> 8) & 255);
2239     buf[2] = (uchar)((offset >> 16) & 255);
2240     buf[3] = (uchar)((offset >> 24) & 255);
2241 
2242     return pos - buf;
2243 }
2244 
pack_clustering_val_from_desc(uchar * buf,void * row_desc,uint32_t row_desc_size,const DBT * pk_val)2245 static uint32_t pack_clustering_val_from_desc(
2246     uchar* buf,
2247     void* row_desc,
2248     uint32_t row_desc_size,
2249     const DBT* pk_val
2250     )
2251 {
2252     uchar* null_bytes_src_ptr = NULL;
2253     uchar* fixed_src_ptr = NULL;
2254     uchar* var_src_offset_ptr = NULL;
2255     uchar* var_src_data_ptr = NULL;
2256     uchar* fixed_dest_ptr = NULL;
2257     uchar* var_dest_offset_ptr = NULL;
2258     uchar* var_dest_data_ptr = NULL;
2259     uchar* orig_var_dest_data_ptr = NULL;
2260     uchar* desc_pos = (uchar *)row_desc;
2261     uint32_t num_null_bytes = 0;
2262     uint32_t num_offset_bytes;
2263     MULTI_COL_PACK_INFO src_mcp_info, dest_mcp_info;
2264     uchar has_blobs;
2265 
2266     memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2267     desc_pos += sizeof(num_null_bytes);
2268 
2269     memcpy(&src_mcp_info, desc_pos, sizeof(src_mcp_info));
2270     desc_pos += sizeof(src_mcp_info);
2271 
2272     num_offset_bytes = desc_pos[0];
2273     desc_pos++;
2274 
2275     memcpy(&dest_mcp_info, desc_pos, sizeof(dest_mcp_info));
2276     desc_pos += sizeof(dest_mcp_info);
2277 
2278     has_blobs = desc_pos[0];
2279     desc_pos++;
2280 
2281     //
2282     //set the variables
2283     //
2284     null_bytes_src_ptr = (uchar *)pk_val->data;
2285     fixed_src_ptr = null_bytes_src_ptr + num_null_bytes;
2286     var_src_offset_ptr = fixed_src_ptr + src_mcp_info.fixed_field_size;
2287     var_src_data_ptr = var_src_offset_ptr + src_mcp_info.len_of_offsets;
2288 
2289     fixed_dest_ptr = buf + num_null_bytes;
2290     var_dest_offset_ptr = fixed_dest_ptr + dest_mcp_info.fixed_field_size;
2291     var_dest_data_ptr = var_dest_offset_ptr + dest_mcp_info.len_of_offsets;
2292     orig_var_dest_data_ptr = var_dest_data_ptr;
2293 
2294     //
2295     // copy the null bytes
2296     //
2297     memcpy(buf, null_bytes_src_ptr, num_null_bytes);
2298     while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2299         uint32_t start, end, length;
2300         uchar curr = desc_pos[0];
2301         desc_pos++;
2302 
2303         memcpy(&start, desc_pos, sizeof(start));
2304         desc_pos += sizeof(start);
2305 
2306         memcpy(&end, desc_pos, sizeof(end));
2307         desc_pos += sizeof(end);
2308 
2309         assert_always (start <= end);
2310 
2311         if (curr == CK_FIX_RANGE) {
2312             length = end - start;
2313 
2314             memcpy(fixed_dest_ptr, fixed_src_ptr + start, length);
2315             fixed_dest_ptr += length;
2316         }
2317         else if (curr == CK_VAR_RANGE) {
2318             uint32_t start_data_size;
2319             uint32_t start_data_offset;
2320             uint32_t end_data_size;
2321             uint32_t end_data_offset;
2322             uint32_t offset_diffs;
2323 
2324             get_var_field_info(
2325                 &start_data_size,
2326                 &start_data_offset,
2327                 start,
2328                 var_src_offset_ptr,
2329                 num_offset_bytes
2330                 );
2331             get_var_field_info(
2332                 &end_data_size,
2333                 &end_data_offset,
2334                 end,
2335                 var_src_offset_ptr,
2336                 num_offset_bytes
2337                 );
2338             length = end_data_offset + end_data_size - start_data_offset;
2339             //
2340             // copy the data
2341             //
2342             memcpy(
2343                 var_dest_data_ptr,
2344                 var_src_data_ptr + start_data_offset,
2345                 length
2346                 );
2347             var_dest_data_ptr += length;
2348 
2349             //
2350             // put in offset info
2351             //
2352             offset_diffs = (end_data_offset + end_data_size) - (uint32_t)(var_dest_data_ptr - orig_var_dest_data_ptr);
2353             for (uint32_t i = start; i <= end; i++) {
2354                 if ( num_offset_bytes == 1 ) {
2355                     assert_always(offset_diffs < 256);
2356                     var_dest_offset_ptr[0] = var_src_offset_ptr[i] - (uchar)offset_diffs;
2357                     var_dest_offset_ptr++;
2358                 } else if ( num_offset_bytes == 2 ) {
2359                     uint32_t tmp = uint2korr(var_src_offset_ptr + 2*i);
2360                     uint32_t new_offset = tmp - offset_diffs;
2361                     assert_always(new_offset < 1<<16);
2362                     int2store(var_dest_offset_ptr,new_offset);
2363                     var_dest_offset_ptr += 2;
2364                 } else {
2365                     assert_unreachable();
2366                 }
2367             }
2368         } else {
2369             assert_unreachable();
2370         }
2371     }
2372     //
2373     // copy blobs
2374     // at this point, var_dest_data_ptr is pointing to the end, where blobs should be located
2375     // so, we put the blobs at var_dest_data_ptr
2376     //
2377     if (has_blobs) {
2378         uint32_t num_blob_bytes;
2379         uint32_t start_offset;
2380         uchar* src_blob_ptr = NULL;
2381         get_blob_field_info(
2382             &start_offset,
2383             src_mcp_info.len_of_offsets,
2384             var_src_data_ptr,
2385             num_offset_bytes
2386             );
2387         src_blob_ptr = var_src_data_ptr + start_offset;
2388         num_blob_bytes = pk_val->size - (start_offset + (var_src_data_ptr - null_bytes_src_ptr));
2389         memcpy(var_dest_data_ptr, src_blob_ptr, num_blob_bytes);
2390         var_dest_data_ptr += num_blob_bytes;
2391     }
2392     return var_dest_data_ptr - buf;
2393 }
2394 
2395 
get_max_secondary_key_pack_desc_size(KEY_AND_COL_INFO * kc_info)2396 static uint32_t get_max_secondary_key_pack_desc_size(
2397     KEY_AND_COL_INFO* kc_info
2398     )
2399 {
2400     uint32_t ret_val = 0;
2401     //
2402     // the fixed stuff:
2403     //  byte that states if main dictionary
2404     //  byte that states if hpk
2405     //  the things in pack_some_row_info
2406     ret_val++;
2407     ret_val++;
2408     ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2409     //
2410     // now variable sized stuff
2411     //
2412 
2413     //  first the blobs
2414     ret_val += sizeof(kc_info->num_blobs);
2415     ret_val+= kc_info->num_blobs;
2416 
2417     // then the pk
2418     // one byte for num key parts
2419     // two bytes for each key part
2420     ret_val++;
2421     ret_val += MAX_REF_PARTS*2;
2422 
2423     // then the key
2424     // null bit, then null byte,
2425     // then 1 byte stating what it is, then 4 for offset, 4 for key length,
2426     //      1 for if charset exists, and 4 for charset
2427     ret_val += MAX_REF_PARTS*(1 + sizeof(uint32_t) + 1 + 3*sizeof(uint32_t) + 1);
2428     //
2429     // four bytes storing the length of this portion
2430     //
2431     ret_val += 4;
2432     return ret_val;
2433 }
2434 
create_toku_secondary_key_pack_descriptor(uchar * buf,bool has_hpk,uint pk_index,TABLE_SHARE * table_share,TABLE * table,KEY_AND_COL_INFO * kc_info,KEY * key_info,KEY * prim_key)2435 static uint32_t create_toku_secondary_key_pack_descriptor (
2436     uchar* buf,
2437     bool has_hpk,
2438     uint pk_index,
2439     TABLE_SHARE* table_share,
2440     TABLE* table,
2441     KEY_AND_COL_INFO* kc_info,
2442     KEY* key_info,
2443     KEY* prim_key
2444     )
2445 {
2446     //
2447     // The first four bytes always contain the offset of where the first key
2448     // ends.
2449     //
2450     uchar* pk_info = NULL;
2451     uchar* pos = buf + 4;
2452     uint32_t offset = 0;
2453 
2454     //
2455     // first byte states that it is NOT main dictionary
2456     //
2457     pos[0] = 0;
2458     pos++;
2459 
2460     //
2461     // one byte states if main dictionary has an hpk or not
2462     //
2463     if (has_hpk) {
2464         pos[0] = 1;
2465     }
2466     else {
2467         pos[0] = 0;
2468     }
2469     pos++;
2470 
2471     pos += pack_some_row_info(
2472         pos,
2473         pk_index,
2474         table_share,
2475         kc_info
2476         );
2477 
2478     //
2479     // store blob information
2480     //
2481     memcpy(pos, &kc_info->num_blobs, sizeof(kc_info->num_blobs));
2482     pos += sizeof(uint32_t);
2483     for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
2484         //
2485         // store length bytes for each blob
2486         //
2487         Field* field = table_share->field[kc_info->blob_fields[i]];
2488         pos[0] = (uchar)field->row_pack_length();
2489         pos++;
2490     }
2491 
2492     //
2493     // store the pk information
2494     //
2495     if (has_hpk) {
2496         pos[0] = 0;
2497         pos++;
2498     }
2499     else {
2500         //
2501         // store number of parts
2502         //
2503         assert_always(prim_key->user_defined_key_parts < 128);
2504         pos[0] = 2 * prim_key->user_defined_key_parts;
2505         pos++;
2506         //
2507         // for each part, store if it is a fixed field or var field
2508         // if fixed, store number of bytes, if var, store
2509         // number of length bytes
2510         // total should be two bytes per key part stored
2511         //
2512         pk_info = pos;
2513         uchar* tmp = pos;
2514         for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
2515             tmp += pack_desc_pk_info(
2516                 tmp,
2517                 kc_info,
2518                 table_share,
2519                 &prim_key->key_part[i]
2520                 );
2521         }
2522         //
2523         // asserting that we moved forward as much as we think we have
2524         //
2525         assert_always(tmp - pos == (2 * prim_key->user_defined_key_parts));
2526         pos = tmp;
2527     }
2528 
2529     for (uint i = 0; i < key_info->user_defined_key_parts; i++) {
2530         KEY_PART_INFO curr_kpi = key_info->key_part[i];
2531         uint16 field_index = curr_kpi.field->field_index;
2532         Field* field = table_share->field[field_index];
2533         bool is_col_in_pk = false;
2534 
2535         if (bitmap_is_set(&kc_info->key_filters[pk_index],field_index)) {
2536             assert_always(!has_hpk);
2537             assert_always(prim_key != nullptr);
2538             is_col_in_pk = true;
2539         }
2540         else {
2541             is_col_in_pk = false;
2542         }
2543 
2544         pos[0] = field->null_bit;
2545         pos++;
2546 
2547         if (is_col_in_pk) {
2548             //
2549             // assert that columns in pk do not have a null bit
2550             // because in MySQL, pk columns cannot be null
2551             //
2552             assert_always(!field->null_bit);
2553         }
2554 
2555         if (field->null_bit) {
2556             uint32_t null_offset = get_null_offset(table,table->field[field_index]);
2557             memcpy(pos, &null_offset, sizeof(uint32_t));
2558             pos += sizeof(uint32_t);
2559         }
2560         if (is_col_in_pk) {
2561             pos += pack_desc_pk_offset_info(pos, &curr_kpi, prim_key, pk_info);
2562         }
2563         else {
2564             pos += pack_desc_offset_info(
2565                 pos,
2566                 kc_info,
2567                 pk_index,
2568                 table_share,
2569                 &curr_kpi
2570                 );
2571         }
2572         pos += pack_desc_key_length_info(
2573             pos,
2574             kc_info,
2575             table_share,
2576             &curr_kpi
2577             );
2578         pos += pack_desc_char_info(pos, table_share, &curr_kpi);
2579     }
2580 
2581     offset = pos - buf;
2582     buf[0] = (uchar)(offset & 255);
2583     buf[1] = (uchar)((offset >> 8) & 255);
2584     buf[2] = (uchar)((offset >> 16) & 255);
2585     buf[3] = (uchar)((offset >> 24) & 255);
2586 
2587     return pos - buf;
2588 }
2589 
skip_key_in_desc(uchar * row_desc)2590 static uint32_t skip_key_in_desc(
2591     uchar* row_desc
2592     )
2593 {
2594     uchar* pos = row_desc;
2595     uchar col_bin_or_char;
2596     //
2597     // skip the byte that states if it is a fix field or var field, we do not care
2598     //
2599     pos++;
2600 
2601     //
2602     // skip the offset information
2603     //
2604     pos += sizeof(uint32_t);
2605 
2606     //
2607     // skip the key_part_length info
2608     //
2609     pos += sizeof(uint32_t);
2610     col_bin_or_char = pos[0];
2611     pos++;
2612     if (col_bin_or_char == COL_HAS_NO_CHARSET) {
2613         goto exit;
2614     }
2615     //
2616     // skip the charset info
2617     //
2618     pos += 4;
2619 
2620 
2621 exit:
2622     return (uint32_t)(pos-row_desc);
2623 }
2624 
2625 
max_key_size_from_desc(void * row_desc,uint32_t row_desc_size)2626 static uint32_t max_key_size_from_desc(
2627     void* row_desc,
2628     uint32_t row_desc_size
2629     )
2630 {
2631     uchar* desc_pos = (uchar *)row_desc;
2632     uint32_t num_blobs;
2633     uint32_t num_pk_columns;
2634     //
2635     // start at 1 for the infinity byte
2636     //
2637     uint32_t max_size = 1;
2638 
2639     // skip byte that states if main dictionary
2640     bool is_main_dictionary = desc_pos[0];
2641     desc_pos++;
2642     assert_always(!is_main_dictionary);
2643 
2644     // skip hpk byte
2645     desc_pos++;
2646 
2647     // skip num_null_bytes
2648     desc_pos += sizeof(uint32_t);
2649 
2650     // skip mcp_info
2651     desc_pos += sizeof(MULTI_COL_PACK_INFO);
2652 
2653     // skip offset_bytes
2654     desc_pos++;
2655 
2656     // skip over blobs
2657     memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2658     desc_pos += sizeof(num_blobs);
2659     desc_pos += num_blobs;
2660 
2661     // skip over pk info
2662     num_pk_columns = desc_pos[0]/2;
2663     desc_pos++;
2664     desc_pos += 2*num_pk_columns;
2665 
2666     while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2667         uchar has_charset;
2668         uint32_t key_length = 0;
2669 
2670         uchar null_bit = desc_pos[0];
2671         desc_pos++;
2672 
2673         if (null_bit) {
2674             //
2675             // column is NULLable, skip null_offset, and add a null byte
2676             //
2677             max_size++;
2678             desc_pos += sizeof(uint32_t);
2679         }
2680         //
2681         // skip over byte that states if fix or var
2682         //
2683         desc_pos++;
2684 
2685         // skip over offset
2686         desc_pos += sizeof(uint32_t);
2687 
2688         //
2689         // get the key length and add it to return value
2690         //
2691         memcpy(&key_length, desc_pos, sizeof(key_length));
2692         desc_pos += sizeof(key_length);
2693         max_size += key_length;
2694         max_size += 2; // 2 bytes for a potential length bytes, we are upperbounding, does not need to be super tight
2695 
2696         has_charset = desc_pos[0];
2697         desc_pos++;
2698 
2699         uint32_t charset_num;
2700         if (has_charset == COL_HAS_CHARSET) {
2701             // skip over charsent num
2702             desc_pos += sizeof(charset_num);
2703         }
2704         else {
2705             assert_always(has_charset == COL_HAS_NO_CHARSET);
2706         }
2707     }
2708     return max_size;
2709 }
2710 
pack_key_from_desc(uchar * buf,void * row_desc,uint32_t row_desc_size,const DBT * pk_key,const DBT * pk_val)2711 static uint32_t pack_key_from_desc(
2712     uchar* buf,
2713     void* row_desc,
2714     uint32_t row_desc_size,
2715     const DBT* pk_key,
2716     const DBT* pk_val) {
2717 
2718     MULTI_COL_PACK_INFO mcp_info;
2719     uint32_t num_null_bytes;
2720     uint32_t num_blobs;
2721     uint32_t num_pk_columns;
2722     uchar* blob_lengths = NULL;
2723     uchar* pk_info = NULL;
2724     uchar* pk_data_ptr = NULL;
2725     uchar* null_bytes_ptr = NULL;
2726     uchar* fixed_field_ptr = NULL;
2727     uchar* var_field_offset_ptr = NULL;
2728     const uchar* var_field_data_ptr = NULL;
2729     uint32_t num_offset_bytes;
2730     uchar* packed_key_pos = buf;
2731     uchar* desc_pos = (uchar *)row_desc;
2732 
2733     bool is_main_dictionary = desc_pos[0];
2734     desc_pos++;
2735     assert_always(!is_main_dictionary);
2736 
2737     //
2738     // get the constant info out of descriptor
2739     //
2740     bool hpk = desc_pos[0];
2741     desc_pos++;
2742 
2743     memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2744     desc_pos += sizeof(num_null_bytes);
2745 
2746     memcpy(&mcp_info, desc_pos, sizeof(mcp_info));
2747     desc_pos += sizeof(mcp_info);
2748 
2749     num_offset_bytes = desc_pos[0];
2750     desc_pos++;
2751 
2752     memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2753     desc_pos += sizeof(num_blobs);
2754 
2755     blob_lengths = desc_pos;
2756     desc_pos += num_blobs;
2757 
2758     num_pk_columns = desc_pos[0]/2;
2759     desc_pos++;
2760     pk_info = desc_pos;
2761     desc_pos += 2*num_pk_columns;
2762 
2763     //
2764     // now start packing the key
2765     //
2766 
2767     //
2768     // pack the infinity byte
2769     //
2770     packed_key_pos[0] = COL_ZERO;
2771     packed_key_pos++;
2772     //
2773     // now start packing each column of the key, as described in descriptor
2774     //
2775     if (!hpk) {
2776         // +1 for the infinity byte
2777         pk_data_ptr = (uchar *)pk_key->data + 1;
2778     }
2779     null_bytes_ptr = (uchar *)pk_val->data;
2780     fixed_field_ptr = null_bytes_ptr + num_null_bytes;
2781     var_field_offset_ptr = fixed_field_ptr + mcp_info.fixed_field_size;
2782     var_field_data_ptr = var_field_offset_ptr + mcp_info.len_of_offsets;
2783     while ((uint32_t)(desc_pos - (uchar*)row_desc) < row_desc_size) {
2784         uchar col_fix_val;
2785         uchar has_charset;
2786         uint32_t col_pack_val = 0;
2787         uint32_t key_length = 0;
2788 
2789         uchar null_bit = desc_pos[0];
2790         desc_pos++;
2791 
2792         if (null_bit) {
2793             //
2794             // column is NULLable, need to check the null bytes to see if it is NULL
2795             //
2796             uint32_t null_offset = 0;
2797             bool is_field_null;
2798             memcpy(&null_offset, desc_pos, sizeof(null_offset));
2799             desc_pos += sizeof(null_offset);
2800 
2801             is_field_null = (null_bytes_ptr[null_offset] & null_bit) ? true: false;
2802             if (is_field_null) {
2803                 packed_key_pos[0] = NULL_COL_VAL;
2804                 packed_key_pos++;
2805                 desc_pos += skip_key_in_desc(desc_pos);
2806                 continue;
2807             } else {
2808                 packed_key_pos[0] = NONNULL_COL_VAL;
2809                 packed_key_pos++;
2810             }
2811         }
2812         //
2813         // now pack the column (unless it was NULL, and we continued)
2814         //
2815         col_fix_val = desc_pos[0];
2816         desc_pos++;
2817 
2818         memcpy(&col_pack_val, desc_pos, sizeof(col_pack_val));
2819         desc_pos += sizeof(col_pack_val);
2820 
2821         memcpy(&key_length, desc_pos, sizeof(key_length));
2822         desc_pos += sizeof(key_length);
2823 
2824         has_charset = desc_pos[0];
2825         desc_pos++;
2826 
2827         uint32_t charset_num = 0;
2828         if (has_charset == COL_HAS_CHARSET) {
2829             memcpy(&charset_num, desc_pos, sizeof(charset_num));
2830             desc_pos += sizeof(charset_num);
2831         } else {
2832             assert_always(has_charset == COL_HAS_NO_CHARSET);
2833         }
2834         //
2835         // case where column is in pk val
2836         //
2837         if (col_fix_val == COL_FIX_FIELD ||
2838             col_fix_val == COL_VAR_FIELD ||
2839             col_fix_val == COL_BLOB_FIELD) {
2840             if (col_fix_val == COL_FIX_FIELD &&
2841                 has_charset == COL_HAS_NO_CHARSET) {
2842                 memcpy(
2843                     packed_key_pos,
2844                     &fixed_field_ptr[col_pack_val],
2845                     key_length);
2846                 packed_key_pos += key_length;
2847             } else if (col_fix_val == COL_VAR_FIELD &&
2848                        has_charset == COL_HAS_NO_CHARSET) {
2849                 uint32_t data_start_offset = 0;
2850 
2851                 uint32_t data_size = 0;
2852                 get_var_field_info(
2853                     &data_size,
2854                     &data_start_offset,
2855                     col_pack_val,
2856                     var_field_offset_ptr,
2857                     num_offset_bytes);
2858 
2859                 //
2860                 // length of this field in this row is data_size
2861                 // data is located beginning at var_field_data_ptr + data_start_offset
2862                 //
2863                 packed_key_pos = pack_toku_varbinary_from_desc(
2864                     packed_key_pos,
2865                     var_field_data_ptr + data_start_offset,
2866                     //number of bytes to use to encode the length in to_tokudb
2867                     key_length,
2868                     //length of field
2869                     data_size);
2870             } else {
2871                 const uchar* data_start = NULL;
2872                 uint32_t data_start_offset = 0;
2873                 uint32_t data_size = 0;
2874 
2875                 if (col_fix_val == COL_FIX_FIELD) {
2876                     data_start_offset = col_pack_val;
2877                     data_size = key_length;
2878                     data_start = fixed_field_ptr + data_start_offset;
2879                 } else if (col_fix_val == COL_VAR_FIELD){
2880                     get_var_field_info(
2881                         &data_size,
2882                         &data_start_offset,
2883                         col_pack_val,
2884                         var_field_offset_ptr,
2885                         num_offset_bytes);
2886                     data_start = var_field_data_ptr + data_start_offset;
2887                 } else if (col_fix_val == COL_BLOB_FIELD) {
2888                     uint32_t blob_index = col_pack_val;
2889                     uint32_t blob_offset;
2890                     const uchar* blob_ptr = NULL;
2891                     uint32_t field_len;
2892                     uint32_t field_len_bytes = blob_lengths[blob_index];
2893                     get_blob_field_info(
2894                         &blob_offset,
2895                         mcp_info.len_of_offsets,
2896                         var_field_data_ptr,
2897                         num_offset_bytes);
2898                     blob_ptr = var_field_data_ptr + blob_offset;
2899                     assert_always(num_blobs > 0);
2900 
2901                     // skip over other blobs to get to the one we want to
2902                     // make a key out of
2903                     for (uint32_t i = 0; i < blob_index; i++) {
2904                         blob_ptr = unpack_toku_field_blob(
2905                             NULL,
2906                             blob_ptr,
2907                             blob_lengths[i],
2908                             true);
2909                     }
2910                     // at this point, blob_ptr is pointing to the blob we
2911                     // want to make a key from
2912                     field_len = get_blob_field_len(blob_ptr, field_len_bytes);
2913                     // now we set the variables to make the key
2914                     data_start = blob_ptr + field_len_bytes;
2915                     data_size = field_len;
2916                 } else {
2917                     assert_unreachable();
2918                 }
2919 
2920                 packed_key_pos = pack_toku_varstring_from_desc(packed_key_pos,
2921                     data_start,
2922                     key_length,
2923                     data_size,
2924                     charset_num);
2925             }
2926         } else {
2927             // case where column is in pk key
2928             if (col_fix_val == COL_FIX_PK_OFFSET) {
2929                 memcpy(packed_key_pos, &pk_data_ptr[col_pack_val], key_length);
2930                 packed_key_pos += key_length;
2931             } else if (col_fix_val == COL_VAR_PK_OFFSET) {
2932                 uchar* tmp_pk_data_ptr = pk_data_ptr;
2933                 uint32_t index_in_pk = col_pack_val;
2934                 //
2935                 // skip along in pk to the right column
2936                 //
2937                 for (uint32_t i = 0; i < index_in_pk; i++) {
2938                     if (pk_info[2*i] == COL_FIX_FIELD) {
2939                         tmp_pk_data_ptr += pk_info[2*i + 1];
2940                     } else if (pk_info[2*i] == COL_VAR_FIELD) {
2941                         uint32_t len_bytes = pk_info[2*i + 1];
2942                         uint32_t len;
2943                         if (len_bytes == 1) {
2944                             len = tmp_pk_data_ptr[0];
2945                             tmp_pk_data_ptr++;
2946                         } else if (len_bytes == 2) {
2947                             len = uint2korr(tmp_pk_data_ptr);
2948                             tmp_pk_data_ptr += 2;
2949                         } else {
2950                             assert_unreachable();
2951                         }
2952                         tmp_pk_data_ptr += len;
2953                     } else {
2954                         assert_unreachable();
2955                     }
2956                 }
2957                 //
2958                 // at this point, tmp_pk_data_ptr is pointing at the column
2959                 //
2960                 uint32_t is_fix_field = pk_info[2*index_in_pk];
2961                 if (is_fix_field == COL_FIX_FIELD) {
2962                     memcpy(packed_key_pos, tmp_pk_data_ptr, key_length);
2963                     packed_key_pos += key_length;
2964                 } else if (is_fix_field == COL_VAR_FIELD) {
2965                     const uchar* data_start = NULL;
2966                     uint32_t data_size = 0;
2967                     uint32_t len_bytes = pk_info[2*index_in_pk + 1];
2968                     if (len_bytes == 1) {
2969                         data_size = tmp_pk_data_ptr[0];
2970                         tmp_pk_data_ptr++;
2971                     } else if (len_bytes == 2) {
2972                         data_size = uint2korr(tmp_pk_data_ptr);
2973                         tmp_pk_data_ptr += 2;
2974                     } else {
2975                         assert_unreachable();
2976                     }
2977                     data_start = tmp_pk_data_ptr;
2978 
2979                     if (has_charset == COL_HAS_CHARSET) {
2980                         packed_key_pos = pack_toku_varstring_from_desc(
2981                             packed_key_pos,
2982                             data_start,
2983                             key_length,
2984                             data_size,
2985                             charset_num);
2986                     } else if (has_charset == COL_HAS_NO_CHARSET) {
2987                         packed_key_pos = pack_toku_varbinary_from_desc(
2988                             packed_key_pos,
2989                             data_start,
2990                             key_length,
2991                             data_size);
2992                     } else {
2993                         assert_unreachable();
2994                     }
2995                 } else {
2996                     assert_unreachable();
2997                 }
2998             } else {
2999                 assert_unreachable();
3000             }
3001         }
3002 
3003     }
3004     assert_always( (uint32_t)(desc_pos - (uchar *)row_desc) == row_desc_size);
3005 
3006     //
3007     // now append the primary key to the end of the key
3008     //
3009     if (hpk) {
3010         memcpy(packed_key_pos, pk_key->data, pk_key->size);
3011         packed_key_pos += pk_key->size;
3012     } else {
3013         memcpy(packed_key_pos, (uchar *)pk_key->data + 1, pk_key->size - 1);
3014         packed_key_pos += (pk_key->size - 1);
3015     }
3016 
3017     return (uint32_t)(packed_key_pos - buf);
3018 }
3019 
fields_have_same_name(Field * a,Field * b)3020 static bool fields_have_same_name(Field* a, Field* b) {
3021     return strcmp(a->field_name.str, b->field_name.str) == 0;
3022 }
3023 
fields_are_same_type(Field * a,Field * b)3024 static bool fields_are_same_type(Field* a, Field* b) {
3025     bool retval = true;
3026     enum_field_types a_mysql_type = a->real_type();
3027     enum_field_types b_mysql_type = b->real_type();
3028     TOKU_TYPE a_toku_type = mysql_to_toku_type(a);
3029     TOKU_TYPE b_toku_type = mysql_to_toku_type(b);
3030     // make sure have same names
3031     // make sure have same types
3032     if (a_mysql_type != b_mysql_type) {
3033         retval = false;
3034         goto cleanup;
3035     }
3036     // Thanks to MariaDB 5.5, we can have two fields
3037     // be the same MySQL type but not the same toku type,
3038     // This is an issue introduced with MariaDB's fractional time
3039     // implementation
3040     if (a_toku_type != b_toku_type) {
3041         retval = false;
3042         goto cleanup;
3043     }
3044     // make sure that either both are nullable, or both not nullable
3045     if ((a->null_bit && !b->null_bit) || (!a->null_bit && b->null_bit)) {
3046         retval = false;
3047         goto cleanup;
3048     }
3049     switch (a_mysql_type) {
3050     case MYSQL_TYPE_TINY:
3051     case MYSQL_TYPE_SHORT:
3052     case MYSQL_TYPE_INT24:
3053     case MYSQL_TYPE_LONG:
3054     case MYSQL_TYPE_LONGLONG:
3055         // length, unsigned, auto increment
3056         if (a->pack_length() != b->pack_length() ||
3057             (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3058             (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3059             retval = false;
3060             goto cleanup;
3061         }
3062         break;
3063     case MYSQL_TYPE_DOUBLE:
3064     case MYSQL_TYPE_FLOAT:
3065         // length, unsigned, auto increment
3066         if (a->pack_length() != b->pack_length() ||
3067             (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3068             (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3069             retval = false;
3070             goto cleanup;
3071         }
3072         break;
3073     case MYSQL_TYPE_NEWDECIMAL:
3074         // length, unsigned
3075         if (a->pack_length() != b->pack_length() ||
3076             (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG)) {
3077             retval = false;
3078             goto cleanup;
3079         }
3080         break;
3081     case MYSQL_TYPE_ENUM: {
3082         Field_enum *a_enum = static_cast<Field_enum *>(a);
3083         if (!a_enum->eq_def(b)) {
3084             retval = false;
3085             goto cleanup;
3086         }
3087         break;
3088     }
3089     case MYSQL_TYPE_SET: {
3090         Field_set *a_set = static_cast<Field_set *>(a);
3091         if (!a_set->eq_def(b)) {
3092             retval = false;
3093             goto cleanup;
3094         }
3095         break;
3096     }
3097     case MYSQL_TYPE_BIT:
3098         // length
3099         if (a->pack_length() != b->pack_length()) {
3100             retval = false;
3101             goto cleanup;
3102         }
3103         break;
3104     case MYSQL_TYPE_DATE:
3105     case MYSQL_TYPE_DATETIME:
3106     case MYSQL_TYPE_YEAR:
3107     case MYSQL_TYPE_NEWDATE:
3108     case MYSQL_TYPE_TIME:
3109     case MYSQL_TYPE_TIMESTAMP:
3110 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
3111     (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
3112     (100000 <= MYSQL_VERSION_ID)
3113     case MYSQL_TYPE_DATETIME2:
3114     case MYSQL_TYPE_TIMESTAMP2:
3115     case MYSQL_TYPE_TIME2:
3116 #endif
3117         // length
3118         if (a->pack_length() != b->pack_length()) {
3119             retval = false;
3120             goto cleanup;
3121         }
3122         break;
3123     case MYSQL_TYPE_TINY_BLOB:
3124     case MYSQL_TYPE_MEDIUM_BLOB:
3125     case MYSQL_TYPE_BLOB:
3126     case MYSQL_TYPE_LONG_BLOB:
3127         // test the charset
3128         if (a->charset()->number != b->charset()->number) {
3129             retval = false;
3130             goto cleanup;
3131         }
3132         if (a->row_pack_length() != b->row_pack_length()) {
3133             retval = false;
3134             goto cleanup;
3135         }
3136         break;
3137     case MYSQL_TYPE_STRING:
3138         if (a->pack_length() != b->pack_length()) {
3139             retval = false;
3140             goto cleanup;
3141         }
3142         // if both are binary, we know have same pack lengths,
3143         // so we can goto end
3144         if (a->binary() && b->binary()) {
3145             // nothing to do, we are good
3146         }
3147         else if (!a->binary() && !b->binary()) {
3148             // test the charset
3149             if (a->charset()->number != b->charset()->number) {
3150                 retval = false;
3151                 goto cleanup;
3152             }
3153         }
3154         else {
3155             // one is binary and the other is not, so not the same
3156             retval = false;
3157             goto cleanup;
3158         }
3159         break;
3160     case MYSQL_TYPE_VARCHAR:
3161         if (a->field_length != b->field_length) {
3162             retval = false;
3163             goto cleanup;
3164         }
3165         // if both are binary, we know have same pack lengths,
3166         // so we can goto end
3167         if (a->binary() && b->binary()) {
3168             // nothing to do, we are good
3169         }
3170         else if (!a->binary() && !b->binary()) {
3171             // test the charset
3172             if (a->charset()->number != b->charset()->number) {
3173                 retval = false;
3174                 goto cleanup;
3175             }
3176         }
3177         else {
3178             // one is binary and the other is not, so not the same
3179             retval = false;
3180             goto cleanup;
3181         }
3182         break;
3183     //
3184     // I believe these are old types that are no longer
3185     // in any 5.1 tables, so tokudb does not need
3186     // to worry about them
3187     // Putting in this assert in case I am wrong.
3188     // Do not support geometry yet.
3189     //
3190     case MYSQL_TYPE_GEOMETRY:
3191     case MYSQL_TYPE_DECIMAL:
3192     case MYSQL_TYPE_VAR_STRING:
3193     case MYSQL_TYPE_NULL:
3194     case MYSQL_TYPE_VARCHAR_COMPRESSED:
3195     case MYSQL_TYPE_BLOB_COMPRESSED:
3196         assert_unreachable();
3197     }
3198 
3199 cleanup:
3200     return retval;
3201 }
3202 
are_two_fields_same(Field * a,Field * b)3203 static bool are_two_fields_same(Field* a, Field* b) {
3204     return fields_have_same_name(a, b) && fields_are_same_type(a, b);
3205 }
3206 
3207 
3208