1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2 Copyright (c) 2020, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 /* Functions to compressed records */
18
19 #include "fulltext.h"
20
21 #define IS_CHAR ((uint) 32768) /* Bit if char (not offset) in tree */
22
23 /* Some definitions to keep in sync with myisampack.c */
24 #define HEAD_LENGTH 32 /* Length of fixed header */
25
26 #if INT_MAX > 32767
27 #define BITS_SAVED 32
28 #define MAX_QUICK_TABLE_BITS 9 /* Because we may shift in 24 bits */
29 #else
30 #define BITS_SAVED 16
31 #define MAX_QUICK_TABLE_BITS 6
32 #endif
33
34 #define get_bit(BU) ((BU)->bits ? \
35 (BU)->current_byte & ((mi_bit_type) 1 << --(BU)->bits) :\
36 (fill_buffer(BU), (BU)->bits= BITS_SAVED-1,\
37 (BU)->current_byte & ((mi_bit_type) 1 << (BITS_SAVED-1))))
38 #define skip_to_next_byte(BU) ((BU)->bits&=~7)
39 #define get_bits(BU,count) (((BU)->bits >= count) ? (((BU)->current_byte >> ((BU)->bits-=count)) & mask[count]) : fill_and_get_bits(BU,count))
40
41 #define decode_bytes_test_bit(bit) \
42 if (low_byte & (1 << (7-bit))) \
43 pos++; \
44 if (*pos & IS_CHAR) \
45 { bits-=(bit+1); break; } \
46 pos+= *pos
47
48 /* Size in uint16 of a Huffman tree for byte compression of 256 byte values. */
49 #define OFFSET_TABLE_SIZE 512
50
51 static uint read_huff_table(MI_BIT_BUFF *bit_buff,MI_DECODE_TREE *decode_tree,
52 uint16 **decode_table,uchar **intervall_buff,
53 uint16 *tmp_buff);
54 static void make_quick_table(uint16 *to_table,uint16 *decode_table,
55 uint *next_free,uint value,uint bits,
56 uint max_bits);
57 static void fill_quick_table(uint16 *table,uint bits, uint max_bits,
58 uint value);
59 static uint copy_decode_table(uint16 *to_pos,uint offset,
60 uint16 *decode_table);
61 static uint find_longest_bitstream(uint16 *table, uint16 *end);
62 static void (*get_unpack_function(MI_COLUMNDEF *rec))(MI_COLUMNDEF *field,
63 MI_BIT_BUFF *buff,
64 uchar *to,
65 uchar *end);
66 static void uf_zerofill_skip_zero(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
67 uchar *to,uchar *end);
68 static void uf_skip_zero(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
69 uchar *to,uchar *end);
70 static void uf_space_normal(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
71 uchar *to,uchar *end);
72 static void uf_space_endspace_selected(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
73 uchar *to, uchar *end);
74 static void uf_endspace_selected(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
75 uchar *to,uchar *end);
76 static void uf_space_endspace(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
77 uchar *to,uchar *end);
78 static void uf_endspace(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
79 uchar *to,uchar *end);
80 static void uf_space_prespace_selected(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
81 uchar *to, uchar *end);
82 static void uf_prespace_selected(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
83 uchar *to,uchar *end);
84 static void uf_space_prespace(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
85 uchar *to,uchar *end);
86 static void uf_prespace(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
87 uchar *to,uchar *end);
88 static void uf_zerofill_normal(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
89 uchar *to,uchar *end);
90 static void uf_constant(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
91 uchar *to,uchar *end);
92 static void uf_intervall(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
93 uchar *to,uchar *end);
94 static void uf_zero(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
95 uchar *to,uchar *end);
96 static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
97 uchar *to, uchar *end);
98 static void uf_varchar1(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
99 uchar *to, uchar *end);
100 static void uf_varchar2(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
101 uchar *to, uchar *end);
102 static void decode_bytes(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
103 uchar *to,uchar *end);
104 static uint decode_pos(MI_BIT_BUFF *bit_buff,MI_DECODE_TREE *decode_tree);
105 static void init_bit_buffer(MI_BIT_BUFF *bit_buff,uchar *buffer,uint length);
106 static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count);
107 static void fill_buffer(MI_BIT_BUFF *bit_buff);
108 static uint max_bit(uint value);
109 static uint read_pack_length(uint version, const uchar *buf, ulong *length);
110 #ifdef HAVE_MMAP
111 static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
112 MI_BLOCK_INFO *info, uchar **rec_buff_p,
113 uchar *header);
114 #endif
115
116 static mi_bit_type mask[]=
117 {
118 0x00000000,
119 0x00000001, 0x00000003, 0x00000007, 0x0000000f,
120 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
121 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
122 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
123 #if BITS_SAVED > 16
124 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
125 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
126 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
127 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff,
128 #endif
129 };
130
131
132 /* Read all packed info, allocate memory and fix field structs */
133
_mi_read_pack_info(MI_INFO * info,pbool fix_keys)134 my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys)
135 {
136 File file;
137 int diff_length;
138 uint i,trees,huff_tree_bits,rec_reflength,length;
139 uint16 *decode_table,*tmp_buff;
140 ulong elements,intervall_length;
141 uchar *disk_cache;
142 uchar *intervall_buff;
143 uchar header[HEAD_LENGTH];
144 MYISAM_SHARE *share=info->s;
145 MI_BIT_BUFF bit_buff;
146 DBUG_ENTER("_mi_read_pack_info");
147
148 if (myisam_quick_table_bits < 4)
149 myisam_quick_table_bits=4;
150 else if (myisam_quick_table_bits > MAX_QUICK_TABLE_BITS)
151 myisam_quick_table_bits=MAX_QUICK_TABLE_BITS;
152
153 file=info->dfile;
154 my_errno=0;
155 if (mysql_file_read(file, (uchar*) header, sizeof(header), MYF(MY_NABP)))
156 {
157 if (!my_errno)
158 my_errno=HA_ERR_END_OF_FILE;
159 goto err0;
160 }
161 /* Only the first three bytes of magic number are independent of version. */
162 if (memcmp((uchar*) header, (uchar*) myisam_pack_file_magic, 3))
163 {
164 my_errno=HA_ERR_WRONG_IN_RECORD;
165 goto err0;
166 }
167 share->pack.version= header[3]; /* fourth byte of magic number */
168 share->pack.header_length= uint4korr(header+4);
169 share->min_pack_length=(uint) uint4korr(header+8);
170 share->max_pack_length=(uint) uint4korr(header+12);
171 elements=uint4korr(header+16);
172 intervall_length=uint4korr(header+20);
173 trees=uint2korr(header+24);
174 share->pack.ref_length=header[26];
175 rec_reflength=header[27];
176 diff_length=(int) rec_reflength - (int) share->base.rec_reflength;
177 if (fix_keys)
178 share->rec_reflength=rec_reflength;
179 share->base.min_block_length=share->min_pack_length+1;
180 if (share->min_pack_length > 254)
181 share->base.min_block_length+=2;
182 DBUG_PRINT("info", ("fixed header length: %u", HEAD_LENGTH));
183 DBUG_PRINT("info", ("total header length: %lu", share->pack.header_length));
184 DBUG_PRINT("info", ("pack file version: %u", share->pack.version));
185 DBUG_PRINT("info", ("min pack length: %lu", share->min_pack_length));
186 DBUG_PRINT("info", ("max pack length: %lu", share->max_pack_length));
187 DBUG_PRINT("info", ("elements of all trees: %lu", elements));
188 DBUG_PRINT("info", ("distinct values bytes: %lu", intervall_length));
189 DBUG_PRINT("info", ("number of code trees: %u", trees));
190 DBUG_PRINT("info", ("bytes for record lgt: %u", share->pack.ref_length));
191 DBUG_PRINT("info", ("record pointer length: %u", rec_reflength));
192
193 /*
194 Memory segment #1:
195 - Decode tree heads
196 - Distinct column values
197 */
198 if (!(share->decode_trees=(MI_DECODE_TREE*)
199 my_malloc((uint) (trees*sizeof(MI_DECODE_TREE)+
200 intervall_length*sizeof(uchar)),
201 MYF(MY_WME))))
202 goto err0;
203 intervall_buff=(uchar*) (share->decode_trees+trees);
204
205 /*
206 Memory segment #2:
207 - Decode tables
208 - Quick decode tables
209 - Temporary decode table
210 - Compressed data file header cache
211 This segment will be reallocated after construction of the tables.
212 */
213 length=(uint) (elements*2+trees*(1 << myisam_quick_table_bits));
214 /*
215 To keep some algorithms simpler, we accept that they access
216 bytes beyond the end of the input data. This can affect up to
217 one byte less than the "word size" size used in this file,
218 which is BITS_SAVED / 8. To avoid accessing non-allocated
219 data, we add (BITS_SAVED / 8) - 1 bytes to the buffer size.
220 */
221 if (!(share->decode_tables=(uint16*)
222 my_malloc((length + OFFSET_TABLE_SIZE) * sizeof(uint16) +
223 (uint) (share->pack.header_length - sizeof(header) +
224 (BITS_SAVED / 8) - 1), MYF(MY_WME | MY_ZEROFILL))))
225 goto err1;
226 tmp_buff=share->decode_tables+length;
227 disk_cache= (uchar*) (tmp_buff+OFFSET_TABLE_SIZE);
228
229 if (mysql_file_read(file, disk_cache,
230 (uint) (share->pack.header_length-sizeof(header)),
231 MYF(MY_NABP)))
232 goto err2;
233
234 huff_tree_bits=max_bit(trees ? trees-1 : 0);
235 init_bit_buffer(&bit_buff, disk_cache,
236 (uint) (share->pack.header_length-sizeof(header)));
237 /* Read new info for each field */
238 for (i=0 ; i < share->base.fields ; i++)
239 {
240 share->rec[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,5);
241 share->rec[i].pack_type=(uint) get_bits(&bit_buff,6);
242 share->rec[i].space_length_bits=get_bits(&bit_buff,5);
243 share->rec[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff,
244 huff_tree_bits);
245 share->rec[i].unpack=get_unpack_function(share->rec+i);
246 DBUG_PRINT("info", ("col: %2u type: %2u pack: %u slbits: %2u",
247 i, share->rec[i].base_type, share->rec[i].pack_type,
248 share->rec[i].space_length_bits));
249 }
250 skip_to_next_byte(&bit_buff);
251 /*
252 Construct the decoding tables from the file header. Keep track of
253 the used memory.
254 */
255 decode_table=share->decode_tables;
256 for (i=0 ; i < trees ; i++)
257 if (read_huff_table(&bit_buff,share->decode_trees+i,&decode_table,
258 &intervall_buff,tmp_buff))
259 goto err3;
260 /* Reallocate the decoding tables to the used size. */
261 decode_table=(uint16*)
262 my_realloc((uchar*) share->decode_tables,
263 (uint) ((uchar*) decode_table - (uchar*) share->decode_tables),
264 MYF(MY_HOLD_ON_ERROR));
265 /* Fix the table addresses in the tree heads. */
266 {
267 my_ptrdiff_t diff=PTR_BYTE_DIFF(decode_table,share->decode_tables);
268 share->decode_tables=decode_table;
269 for (i=0 ; i < trees ; i++)
270 share->decode_trees[i].table=ADD_TO_PTR(share->decode_trees[i].table,
271 diff, uint16*);
272 }
273
274 /* Fix record-ref-length for keys */
275 if (fix_keys)
276 {
277 for (i=0 ; i < share->base.keys ; i++)
278 {
279 MI_KEYDEF *keyinfo= &share->keyinfo[i];
280 keyinfo->keylength+= (uint16) diff_length;
281 keyinfo->minlength+= (uint16) diff_length;
282 keyinfo->maxlength+= (uint16) diff_length;
283 keyinfo->seg[keyinfo->flag & HA_FULLTEXT ?
284 FT_SEGS : keyinfo->keysegs].length= (uint16) rec_reflength;
285 }
286 if (share->ft2_keyinfo.seg)
287 {
288 MI_KEYDEF *ft2_keyinfo= &share->ft2_keyinfo;
289 ft2_keyinfo->keylength+= (uint16) diff_length;
290 ft2_keyinfo->minlength+= (uint16) diff_length;
291 ft2_keyinfo->maxlength+= (uint16) diff_length;
292 }
293 }
294
295 if (bit_buff.error || bit_buff.pos < bit_buff.end)
296 goto err3;
297
298 DBUG_RETURN(0);
299
300 err3:
301 my_errno=HA_ERR_WRONG_IN_RECORD;
302 err2:
303 my_free(share->decode_tables);
304 err1:
305 my_free(share->decode_trees);
306 err0:
307 DBUG_RETURN(1);
308 }
309
310
311 /*
312 Read a huff-code-table from datafile.
313
314 SYNOPSIS
315 read_huff_table()
316 bit_buff Bit buffer pointing at start of the
317 decoding table in the file header cache.
318 decode_tree Pointer to the decode tree head.
319 decode_table IN/OUT Address of a pointer to the next free space.
320 intervall_buff IN/OUT Address of a pointer to the next unused values.
321 tmp_buff Buffer for temporary extraction of a full
322 decoding table as read from bit_buff.
323
324 RETURN
325 0 OK.
326 1 Error.
327 */
328
read_huff_table(MI_BIT_BUFF * bit_buff,MI_DECODE_TREE * decode_tree,uint16 ** decode_table,uchar ** intervall_buff,uint16 * tmp_buff)329 static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree,
330 uint16 **decode_table, uchar **intervall_buff,
331 uint16 *tmp_buff)
332 {
333 uint min_chr,elements,char_bits,offset_bits,size,intervall_length,table_bits,
334 next_free_offset;
335 uint16 *ptr,*end;
336 DBUG_ENTER("read_huff_table");
337
338 if (!get_bits(bit_buff,1))
339 {
340 /* Byte value compression. */
341 min_chr=get_bits(bit_buff,8);
342 elements=get_bits(bit_buff,9);
343 char_bits=get_bits(bit_buff,5);
344 offset_bits=get_bits(bit_buff,5);
345 intervall_length=0;
346 ptr=tmp_buff;
347 DBUG_PRINT("info", ("byte value compression"));
348 DBUG_PRINT("info", ("minimum byte value: %u", min_chr));
349 DBUG_PRINT("info", ("number of tree nodes: %u", elements));
350 DBUG_PRINT("info", ("bits for values: %u", char_bits));
351 DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits));
352 if (elements > 256)
353 {
354 DBUG_PRINT("error", ("ERROR: illegal number of tree elements: %u",
355 elements));
356 DBUG_RETURN(1);
357 }
358 }
359 else
360 {
361 /* Distinct column value compression. */
362 min_chr=0;
363 elements=get_bits(bit_buff,15);
364 intervall_length=get_bits(bit_buff,16);
365 char_bits=get_bits(bit_buff,5);
366 offset_bits=get_bits(bit_buff,5);
367 decode_tree->quick_table_bits=0;
368 ptr= *decode_table;
369 DBUG_PRINT("info", ("distinct column value compression"));
370 DBUG_PRINT("info", ("number of tree nodes: %u", elements));
371 DBUG_PRINT("info", ("value buffer length: %u", intervall_length));
372 DBUG_PRINT("info", ("bits for value index: %u", char_bits));
373 DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits));
374 }
375 size=elements*2-2;
376 DBUG_PRINT("info", ("tree size in uint16: %u", size));
377 DBUG_PRINT("info", ("tree size in bytes: %u",
378 size * (uint) sizeof(uint16)));
379
380 for (end=ptr+size ; ptr < end ; ptr++)
381 {
382 if (get_bit(bit_buff))
383 {
384 *ptr= (uint16) get_bits(bit_buff,offset_bits);
385 if ((ptr + *ptr >= end) || !*ptr)
386 {
387 DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree"));
388 DBUG_RETURN(1);
389 }
390 }
391 else
392 *ptr= (uint16) (IS_CHAR + (get_bits(bit_buff,char_bits) + min_chr));
393 }
394 skip_to_next_byte(bit_buff);
395
396 decode_tree->table= *decode_table;
397 decode_tree->intervalls= *intervall_buff;
398 if (! intervall_length)
399 {
400 /* Byte value compression. ptr started from tmp_buff. */
401 /* Find longest Huffman code from begin to end of tree in bits. */
402 table_bits= find_longest_bitstream(tmp_buff, ptr);
403 if (table_bits >= OFFSET_TABLE_SIZE)
404 DBUG_RETURN(1);
405 if (table_bits > myisam_quick_table_bits)
406 table_bits=myisam_quick_table_bits;
407 DBUG_PRINT("info", ("table bits: %u", table_bits));
408
409 next_free_offset= (1 << table_bits);
410 make_quick_table(*decode_table,tmp_buff,&next_free_offset,0,table_bits,
411 table_bits);
412 (*decode_table)+= next_free_offset;
413 decode_tree->quick_table_bits=table_bits;
414 }
415 else
416 {
417 /* Distinct column value compression. ptr started from *decode_table */
418 (*decode_table)=end;
419 /*
420 get_bits() moves some bytes to a cache buffer in advance. May need
421 to step back.
422 */
423 bit_buff->pos-= bit_buff->bits/8;
424 /* Copy the distinct column values from the buffer. */
425 memcpy(*intervall_buff,bit_buff->pos,(size_t) intervall_length);
426 (*intervall_buff)+=intervall_length;
427 bit_buff->pos+=intervall_length;
428 bit_buff->bits=0;
429 }
430 DBUG_RETURN(0);
431 }
432
433
434 /*
435 Make a quick_table for faster decoding.
436
437 SYNOPSIS
438 make_quick_table()
439 to_table Target quick_table and remaining decode table.
440 decode_table Source Huffman (sub-)tree within tmp_buff.
441 next_free_offset IN/OUT Next free offset from to_table.
442 Starts behind quick_table on the top-level.
443 value Huffman bits found so far.
444 bits Remaining bits to be collected.
445 max_bits Total number of bits to collect (table_bits).
446
447 DESCRIPTION
448
449 The quick table is an array of 16-bit values. There exists one value
450 for each possible code representable by max_bits (table_bits) bits.
451 In most cases table_bits is 9. So there are 512 16-bit values.
452
453 If the high-order bit (16) is set (IS_CHAR) then the array slot for
454 this value is a valid Huffman code for a resulting byte value.
455
456 The low-order 8 bits (1..8) are the resulting byte value.
457
458 Bits 9..14 are the length of the Huffman code for this byte value.
459 This means so many bits from the input stream were needed to
460 represent this byte value. The remaining bits belong to later
461 Huffman codes. This also means that for every Huffman code shorter
462 than table_bits there are multiple entires in the array, which
463 differ just in the unused bits.
464
465 If the high-order bit (16) is clear (0) then the remaining bits are
466 the position of the remaining Huffman decode tree segment behind the
467 quick table.
468
469 RETURN
470 void
471 */
472
make_quick_table(uint16 * to_table,uint16 * decode_table,uint * next_free_offset,uint value,uint bits,uint max_bits)473 static void make_quick_table(uint16 *to_table, uint16 *decode_table,
474 uint *next_free_offset, uint value, uint bits,
475 uint max_bits)
476 {
477 DBUG_ENTER("make_quick_table");
478
479 /*
480 When down the table to the requested maximum, copy the rest of the
481 Huffman table.
482 */
483 if (!bits--)
484 {
485 /*
486 Remaining left Huffman tree segment starts behind quick table.
487 Remaining right Huffman tree segment starts behind left segment.
488 */
489 to_table[value]= (uint16) *next_free_offset;
490 /*
491 Re-construct the remaining Huffman tree segment at
492 next_free_offset in to_table.
493 */
494 *next_free_offset= copy_decode_table(to_table, *next_free_offset,
495 decode_table);
496 DBUG_VOID_RETURN;
497 }
498
499 /* Descent on the left side. Left side bits are clear (0). */
500 if (!(*decode_table & IS_CHAR))
501 {
502 /* Not a leaf. Follow the pointer. */
503 make_quick_table(to_table, decode_table + *decode_table,
504 next_free_offset, value, bits, max_bits);
505 }
506 else
507 {
508 /*
509 A leaf. A Huffman code is complete. Fill the quick_table
510 array for all possible bit strings starting with this Huffman
511 code.
512 */
513 fill_quick_table(to_table + value, bits, max_bits, (uint) *decode_table);
514 }
515
516 /* Descent on the right side. Right side bits are set (1). */
517 decode_table++;
518 value|= (1 << bits);
519 if (!(*decode_table & IS_CHAR))
520 {
521 /* Not a leaf. Follow the pointer. */
522 make_quick_table(to_table, decode_table + *decode_table,
523 next_free_offset, value, bits, max_bits);
524 }
525 else
526 {
527 /*
528 A leaf. A Huffman code is complete. Fill the quick_table
529 array for all possible bit strings starting with this Huffman
530 code.
531 */
532 fill_quick_table(to_table + value, bits, max_bits, (uint) *decode_table);
533 }
534
535 DBUG_VOID_RETURN;
536 }
537
538
539 /*
540 Fill quick_table for all possible values starting with this Huffman code.
541
542 SYNOPSIS
543 fill_quick_table()
544 table Target quick_table position.
545 bits Unused bits from max_bits.
546 max_bits Total number of bits to collect (table_bits).
547 value The byte encoded by the found Huffman code.
548
549 DESCRIPTION
550
551 Fill the segment (all slots) of the quick_table array with the
552 resulting value for the found Huffman code. There are as many slots
553 as there are combinations representable by the unused bits.
554
555 In most cases we use 9 table bits. Assume a 3-bit Huffman code. Then
556 there are 6 unused bits. Hence we fill 2**6 = 64 slots with the
557 value.
558
559 RETURN
560 void
561 */
562
fill_quick_table(uint16 * table,uint bits,uint max_bits,uint value)563 static void fill_quick_table(uint16 *table, uint bits, uint max_bits,
564 uint value)
565 {
566 uint16 *end;
567 DBUG_ENTER("fill_quick_table");
568
569 /*
570 Bits 1..8 of value represent the decoded byte value.
571 Bits 9..14 become the length of the Huffman code for this byte value.
572 Bit 16 flags a valid code (IS_CHAR).
573 */
574 value|= (max_bits - bits) << 8 | IS_CHAR;
575
576 for (end= table + ((my_ptrdiff_t) 1 << bits); table < end; table++)
577 {
578 *table= (uint16) value;
579 }
580 DBUG_VOID_RETURN;
581 }
582
583
584 /*
585 Reconstruct a decode subtree at the target position.
586
587 SYNOPSIS
588 copy_decode_table()
589 to_pos Target quick_table and remaining decode table.
590 offset Next free offset from to_pos.
591 decode_table Source Huffman subtree within tmp_buff.
592
593 NOTE
594 Pointers in the decode tree are relative to the pointers position.
595
596 RETURN
597 next free offset from to_pos.
598 */
599
copy_decode_table(uint16 * to_pos,uint offset,uint16 * decode_table)600 static uint copy_decode_table(uint16 *to_pos, uint offset,
601 uint16 *decode_table)
602 {
603 uint prev_offset= offset;
604 DBUG_ENTER("copy_decode_table");
605
606 /* Descent on the left side. */
607 if (!(*decode_table & IS_CHAR))
608 {
609 /* Set a pointer to the next target node. */
610 to_pos[offset]=2;
611 /* Copy the left hand subtree there. */
612 offset=copy_decode_table(to_pos,offset+2,decode_table+ *decode_table);
613 }
614 else
615 {
616 /* Copy the byte value. */
617 to_pos[offset]= *decode_table;
618 /* Step behind this node. */
619 offset+=2;
620 }
621
622 /* Descent on the right side. */
623 decode_table++;
624 if (!(*decode_table & IS_CHAR))
625 {
626 /* Set a pointer to the next free target node. */
627 to_pos[prev_offset+1]=(uint16) (offset-prev_offset-1);
628 /* Copy the right hand subtree to the entry of that node. */
629 offset=copy_decode_table(to_pos,offset,decode_table+ *decode_table);
630 }
631 else
632 {
633 /* Copy the byte value. */
634 to_pos[prev_offset+1]= *decode_table;
635 }
636 DBUG_RETURN(offset);
637 }
638
639
640 /*
641 Find the length of the longest Huffman code in this table in bits.
642
643 SYNOPSIS
644 find_longest_bitstream()
645 table Code (sub-)table start.
646 end End of code table.
647
648 IMPLEMENTATION
649
650 Recursively follow the branch(es) of the code pair on every level of
651 the tree until two byte values (and no branch) are found. Add one to
652 each level when returning back from each recursion stage.
653
654 'end' is used for error checking only. A clean tree terminates
655 before reaching 'end'. Hence the exact value of 'end' is not too
656 important. However having it higher than necessary could lead to
657 misbehaviour should 'next' jump into the dirty area.
658
659 RETURN
660 length Length of longest Huffman code in bits.
661 >= OFFSET_TABLE_SIZE Error, broken tree. It does not end before 'end'.
662 */
663
find_longest_bitstream(uint16 * table,uint16 * end)664 static uint find_longest_bitstream(uint16 *table, uint16 *end)
665 {
666 uint length= 1;
667 uint length2;
668
669 if (!(*table & IS_CHAR))
670 {
671 uint16 *next= table + *table;
672 if (next > end || next == table)
673 {
674 DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree"));
675 return OFFSET_TABLE_SIZE;
676 }
677 length= find_longest_bitstream(next, end) + 1;
678 }
679 table++;
680 if (!(*table & IS_CHAR))
681 {
682 uint16 *next= table + *table;
683 if (next > end || next == table)
684 {
685 DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree"));
686 return OFFSET_TABLE_SIZE;
687 }
688 length2= find_longest_bitstream(next, end) + 1;
689 length=MY_MAX(length,length2);
690 }
691 return length;
692 }
693
694
695 /*
696 Read record from datafile.
697
698 SYNOPSIS
699 _mi_read_pack_record()
700 info A pointer to MI_INFO.
701 filepos File offset of the record.
702 buf RETURN The buffer to receive the record.
703
704 RETURN
705 0 on success
706 HA_ERR_WRONG_IN_RECORD or -1 on error
707 */
708
_mi_read_pack_record(MI_INFO * info,my_off_t filepos,uchar * buf)709 int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, uchar *buf)
710 {
711 MI_BLOCK_INFO block_info;
712 File file;
713 DBUG_ENTER("mi_read_pack_record");
714
715 if (filepos == HA_OFFSET_ERROR)
716 DBUG_RETURN(-1); /* _search() didn't find record */
717
718 file=info->dfile;
719 if (_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
720 &info->rec_buff, file, filepos))
721 goto err;
722 if (mysql_file_read(file, (uchar*) info->rec_buff + block_info.offset,
723 block_info.rec_len - block_info.offset, MYF(MY_NABP)))
724 goto panic;
725 info->update|= HA_STATE_AKTIV;
726 DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf,
727 info->rec_buff, block_info.rec_len));
728 panic:
729 my_errno=HA_ERR_WRONG_IN_RECORD;
730 err:
731 DBUG_RETURN(-1);
732 }
733
734
735
_mi_pack_rec_unpack(register MI_INFO * info,MI_BIT_BUFF * bit_buff,register uchar * to,uchar * from,ulong reclength)736 int _mi_pack_rec_unpack(register MI_INFO *info, MI_BIT_BUFF *bit_buff,
737 register uchar *to, uchar *from, ulong reclength)
738 {
739 uchar *end_field;
740 reg3 MI_COLUMNDEF *end;
741 MI_COLUMNDEF *current_field;
742 MYISAM_SHARE *share=info->s;
743 DBUG_ENTER("_mi_pack_rec_unpack");
744
745 init_bit_buffer(bit_buff, (uchar*) from, reclength);
746
747 for (current_field=share->rec, end=current_field+share->base.fields ;
748 current_field < end ;
749 current_field++,to=end_field)
750 {
751 end_field=to+current_field->length;
752 (*current_field->unpack)(current_field, bit_buff, (uchar*) to,
753 (uchar*) end_field);
754 }
755 if (!bit_buff->error &&
756 bit_buff->pos - bit_buff->bits / 8 == bit_buff->end)
757 DBUG_RETURN(0);
758 info->update&= ~HA_STATE_AKTIV;
759 DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD);
760 } /* _mi_pack_rec_unpack */
761
762
763 /* Return function to unpack field */
764
get_unpack_function(MI_COLUMNDEF * rec)765 static void (*get_unpack_function(MI_COLUMNDEF *rec))
766 (MI_COLUMNDEF *, MI_BIT_BUFF *, uchar *, uchar *)
767 {
768 switch (rec->base_type) {
769 case FIELD_SKIP_ZERO:
770 if (rec->pack_type & PACK_TYPE_ZERO_FILL)
771 return &uf_zerofill_skip_zero;
772 return &uf_skip_zero;
773 case FIELD_NORMAL:
774 if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
775 return &uf_space_normal;
776 if (rec->pack_type & PACK_TYPE_ZERO_FILL)
777 return &uf_zerofill_normal;
778 return &decode_bytes;
779 case FIELD_SKIP_ENDSPACE:
780 if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
781 {
782 if (rec->pack_type & PACK_TYPE_SELECTED)
783 return &uf_space_endspace_selected;
784 return &uf_space_endspace;
785 }
786 if (rec->pack_type & PACK_TYPE_SELECTED)
787 return &uf_endspace_selected;
788 return &uf_endspace;
789 case FIELD_SKIP_PRESPACE:
790 if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
791 {
792 if (rec->pack_type & PACK_TYPE_SELECTED)
793 return &uf_space_prespace_selected;
794 return &uf_space_prespace;
795 }
796 if (rec->pack_type & PACK_TYPE_SELECTED)
797 return &uf_prespace_selected;
798 return &uf_prespace;
799 case FIELD_CONSTANT:
800 return &uf_constant;
801 case FIELD_INTERVALL:
802 return &uf_intervall;
803 case FIELD_ZERO:
804 case FIELD_CHECK:
805 return &uf_zero;
806 case FIELD_BLOB:
807 return &uf_blob;
808 case FIELD_VARCHAR:
809 if (rec->length <= 256) /* 255 + 1 byte length */
810 return &uf_varchar1;
811 return &uf_varchar2;
812 case FIELD_LAST:
813 default:
814 return 0; /* This should never happen */
815 }
816 }
817
818 /* The different functions to unpack a field */
819
uf_zerofill_skip_zero(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)820 static void uf_zerofill_skip_zero(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
821 uchar *to, uchar *end)
822 {
823 if (get_bit(bit_buff))
824 bzero((char*) to,(uint) (end-to));
825 else
826 {
827 end-=rec->space_length_bits;
828 decode_bytes(rec,bit_buff,to,end);
829 bzero((char*) end,rec->space_length_bits);
830 }
831 }
832
uf_skip_zero(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)833 static void uf_skip_zero(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
834 uchar *end)
835 {
836 if (get_bit(bit_buff))
837 bzero((char*) to,(uint) (end-to));
838 else
839 decode_bytes(rec,bit_buff,to,end);
840 }
841
uf_space_normal(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)842 static void uf_space_normal(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
843 uchar *end)
844 {
845 if (get_bit(bit_buff))
846 bfill((uchar*) to,(end-to),' ');
847 else
848 decode_bytes(rec,bit_buff,to,end);
849 }
850
uf_space_endspace_selected(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)851 static void uf_space_endspace_selected(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
852 uchar *to, uchar *end)
853 {
854 uint spaces;
855 if (get_bit(bit_buff))
856 bfill((uchar*) to,(end-to),' ');
857 else
858 {
859 if (get_bit(bit_buff))
860 {
861 if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
862 {
863 bit_buff->error=1;
864 return;
865 }
866 if (to+spaces != end)
867 decode_bytes(rec,bit_buff,to,end-spaces);
868 bfill((uchar*) end-spaces,spaces,' ');
869 }
870 else
871 decode_bytes(rec,bit_buff,to,end);
872 }
873 }
874
uf_endspace_selected(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)875 static void uf_endspace_selected(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
876 uchar *to, uchar *end)
877 {
878 uint spaces;
879 if (get_bit(bit_buff))
880 {
881 if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
882 {
883 bit_buff->error=1;
884 return;
885 }
886 if (to+spaces != end)
887 decode_bytes(rec,bit_buff,to,end-spaces);
888 bfill((uchar*) end-spaces,spaces,' ');
889 }
890 else
891 decode_bytes(rec,bit_buff,to,end);
892 }
893
uf_space_endspace(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)894 static void uf_space_endspace(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
895 uchar *end)
896 {
897 uint spaces;
898 if (get_bit(bit_buff))
899 bfill((uchar*) to,(end-to),' ');
900 else
901 {
902 if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
903 {
904 bit_buff->error=1;
905 return;
906 }
907 if (to+spaces != end)
908 decode_bytes(rec,bit_buff,to,end-spaces);
909 bfill((uchar*) end-spaces,spaces,' ');
910 }
911 }
912
uf_endspace(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)913 static void uf_endspace(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
914 uchar *end)
915 {
916 uint spaces;
917 if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
918 {
919 bit_buff->error=1;
920 return;
921 }
922 if (to+spaces != end)
923 decode_bytes(rec,bit_buff,to,end-spaces);
924 bfill((uchar*) end-spaces,spaces,' ');
925 }
926
uf_space_prespace_selected(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)927 static void uf_space_prespace_selected(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
928 uchar *to, uchar *end)
929 {
930 uint spaces;
931 if (get_bit(bit_buff))
932 bfill((uchar*) to,(end-to),' ');
933 else
934 {
935 if (get_bit(bit_buff))
936 {
937 if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
938 {
939 bit_buff->error=1;
940 return;
941 }
942 bfill((uchar*) to,spaces,' ');
943 if (to+spaces != end)
944 decode_bytes(rec,bit_buff,to+spaces,end);
945 }
946 else
947 decode_bytes(rec,bit_buff,to,end);
948 }
949 }
950
951
uf_prespace_selected(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)952 static void uf_prespace_selected(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
953 uchar *to, uchar *end)
954 {
955 uint spaces;
956 if (get_bit(bit_buff))
957 {
958 if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
959 {
960 bit_buff->error=1;
961 return;
962 }
963 bfill((uchar*) to,spaces,' ');
964 if (to+spaces != end)
965 decode_bytes(rec,bit_buff,to+spaces,end);
966 }
967 else
968 decode_bytes(rec,bit_buff,to,end);
969 }
970
971
uf_space_prespace(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)972 static void uf_space_prespace(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
973 uchar *end)
974 {
975 uint spaces;
976 if (get_bit(bit_buff))
977 bfill((uchar*) to,(end-to),' ');
978 else
979 {
980 if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
981 {
982 bit_buff->error=1;
983 return;
984 }
985 bfill((uchar*) to,spaces,' ');
986 if (to+spaces != end)
987 decode_bytes(rec,bit_buff,to+spaces,end);
988 }
989 }
990
uf_prespace(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)991 static void uf_prespace(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
992 uchar *end)
993 {
994 uint spaces;
995 if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
996 {
997 bit_buff->error=1;
998 return;
999 }
1000 bfill((uchar*) to,spaces,' ');
1001 if (to+spaces != end)
1002 decode_bytes(rec,bit_buff,to+spaces,end);
1003 }
1004
uf_zerofill_normal(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1005 static void uf_zerofill_normal(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
1006 uchar *end)
1007 {
1008 end-=rec->space_length_bits;
1009 decode_bytes(rec,bit_buff,(uchar*) to,end);
1010 bzero((char*) end,rec->space_length_bits);
1011 }
1012
uf_constant(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1013 static void uf_constant(MI_COLUMNDEF *rec,
1014 MI_BIT_BUFF *bit_buff __attribute__((unused)),
1015 uchar *to,
1016 uchar *end)
1017 {
1018 memcpy(to,rec->huff_tree->intervalls,(size_t) (end-to));
1019 }
1020
uf_intervall(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1021 static void uf_intervall(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
1022 uchar *end)
1023 {
1024 reg1 uint field_length=(uint) (end-to);
1025 memcpy(to,rec->huff_tree->intervalls+field_length*decode_pos(bit_buff,
1026 rec->huff_tree),
1027 (size_t) field_length);
1028 }
1029
1030
1031 /*ARGSUSED*/
uf_zero(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1032 static void uf_zero(MI_COLUMNDEF *rec __attribute__((unused)),
1033 MI_BIT_BUFF *bit_buff __attribute__((unused)),
1034 uchar *to, uchar *end)
1035 {
1036 bzero((char*) to,(uint) (end-to));
1037 }
1038
uf_blob(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1039 static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
1040 uchar *to, uchar *end)
1041 {
1042 if (get_bit(bit_buff))
1043 bzero((uchar*) to,(end-to));
1044 else
1045 {
1046 ulong length=get_bits(bit_buff,rec->space_length_bits);
1047 uint pack_length=(uint) (end-to)-portable_sizeof_char_ptr;
1048 if (bit_buff->blob_pos+length > bit_buff->blob_end)
1049 {
1050 bit_buff->error=1;
1051 bzero((uchar*) to,(end-to));
1052 return;
1053 }
1054 decode_bytes(rec,bit_buff,bit_buff->blob_pos,bit_buff->blob_pos+length);
1055 _mi_store_blob_length((uchar*) to,pack_length,length);
1056 memcpy(to+pack_length, &bit_buff->blob_pos, sizeof(char*));
1057 bit_buff->blob_pos+=length;
1058 }
1059 }
1060
1061
uf_varchar1(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1062 static void uf_varchar1(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
1063 uchar *to, uchar *end __attribute__((unused)))
1064 {
1065 if (get_bit(bit_buff))
1066 to[0]= 0; /* Zero lengths */
1067 else
1068 {
1069 ulong length=get_bits(bit_buff,rec->space_length_bits);
1070 *to= (uchar) length;
1071 decode_bytes(rec,bit_buff,to+1,to+1+length);
1072 }
1073 }
1074
1075
uf_varchar2(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1076 static void uf_varchar2(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
1077 uchar *to, uchar *end __attribute__((unused)))
1078 {
1079 if (get_bit(bit_buff))
1080 to[0]=to[1]=0; /* Zero lengths */
1081 else
1082 {
1083 ulong length=get_bits(bit_buff,rec->space_length_bits);
1084 int2store(to,length);
1085 decode_bytes(rec,bit_buff,to+2,to+2+length);
1086 }
1087 }
1088
1089 /* Functions to decode of buffer of bits */
1090
1091 #if BITS_SAVED == 64
1092
decode_bytes(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1093 static void decode_bytes(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,uchar *to,
1094 uchar *end)
1095 {
1096 reg1 uint bits,low_byte;
1097 reg3 uint16 *pos;
1098 reg4 uint table_bits,table_and;
1099 MI_DECODE_TREE *decode_tree;
1100
1101 decode_tree=rec->decode_tree;
1102 bits=bit_buff->bits; /* Save in reg for quicker access */
1103 table_bits=decode_tree->quick_table_bits;
1104 table_and= (1 << table_bits)-1;
1105
1106 do
1107 {
1108 if (bits <= 32)
1109 {
1110 if (bit_buff->pos > bit_buff->end+4)
1111 {
1112 bit_buff->error=1;
1113 return; /* Can't be right */
1114 }
1115 bit_buff->current_byte= (bit_buff->current_byte << 32) |
1116 ((((uint) bit_buff->pos[3])) |
1117 (((uint) bit_buff->pos[2]) << 8) |
1118 (((uint) bit_buff->pos[1]) << 16) |
1119 (((uint) bit_buff->pos[0]) << 24));
1120 bit_buff->pos+=4;
1121 bits+=32;
1122 }
1123 /*
1124 First use info in quick_table.
1125
1126 The quick table is an array of 16-bit values. There exists one
1127 value for each possible code representable by table_bits bits.
1128 In most cases table_bits is 9. So there are 512 16-bit values.
1129
1130 If the high-order bit (16) is set (IS_CHAR) then the array slot
1131 for this value is a valid Huffman code for a resulting byte value.
1132
1133 The low-order 8 bits (1..8) are the resulting byte value.
1134
1135 Bits 9..14 are the length of the Huffman code for this byte value.
1136 This means so many bits from the input stream were needed to
1137 represent this byte value. The remaining bits belong to later
1138 Huffman codes. This also means that for every Huffman code shorter
1139 than table_bits there are multiple entires in the array, which
1140 differ just in the unused bits.
1141
1142 If the high-order bit (16) is clear (0) then the remaining bits are
1143 the position of the remaining Huffman decode tree segment behind the
1144 quick table.
1145 */
1146 low_byte=(uint) (bit_buff->current_byte >> (bits - table_bits)) & table_and;
1147 low_byte=decode_tree->table[low_byte];
1148 if (low_byte & IS_CHAR)
1149 {
1150 /*
1151 All Huffman codes of less or equal table_bits length are in the
1152 quick table. This is one of them.
1153 */
1154 *to++ = (low_byte & 255); /* Found char in quick table */
1155 bits-= ((low_byte >> 8) & 31); /* Remove bits used */
1156 }
1157 else
1158 { /* Map through rest of decode-table */
1159 /* This means that the Huffman code must be longer than table_bits. */
1160 pos=decode_tree->table+low_byte;
1161 bits-=table_bits;
1162 /* NOTE: decode_bytes_test_bit() is a macro which contains a break !!! */
1163 for (;;)
1164 {
1165 low_byte=(uint) (bit_buff->current_byte >> (bits-8));
1166 decode_bytes_test_bit(0);
1167 decode_bytes_test_bit(1);
1168 decode_bytes_test_bit(2);
1169 decode_bytes_test_bit(3);
1170 decode_bytes_test_bit(4);
1171 decode_bytes_test_bit(5);
1172 decode_bytes_test_bit(6);
1173 decode_bytes_test_bit(7);
1174 bits-=8;
1175 }
1176 *to++ = *pos;
1177 }
1178 } while (to != end);
1179
1180 bit_buff->bits=bits;
1181 return;
1182 }
1183
1184 #else
1185
decode_bytes(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1186 static void decode_bytes(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
1187 uchar *end)
1188 {
1189 reg1 uint bits,low_byte;
1190 reg3 uint16 *pos;
1191 reg4 uint table_bits,table_and;
1192 MI_DECODE_TREE *decode_tree;
1193
1194 decode_tree=rec->huff_tree;
1195 bits=bit_buff->bits; /* Save in reg for quicker access */
1196 table_bits=decode_tree->quick_table_bits;
1197 table_and= (1 << table_bits)-1;
1198
1199 do
1200 {
1201 if (bits < table_bits)
1202 {
1203 if (bit_buff->pos > bit_buff->end+1)
1204 {
1205 bit_buff->error=1;
1206 return; /* Can't be right */
1207 }
1208 #if BITS_SAVED == 32
1209 bit_buff->current_byte= (bit_buff->current_byte << 24) |
1210 (((uint) ((uchar) bit_buff->pos[2]))) |
1211 (((uint) ((uchar) bit_buff->pos[1])) << 8) |
1212 (((uint) ((uchar) bit_buff->pos[0])) << 16);
1213 bit_buff->pos+=3;
1214 bits+=24;
1215 #else
1216 if (bits) /* We must have at leasts 9 bits */
1217 {
1218 bit_buff->current_byte= (bit_buff->current_byte << 8) |
1219 (uint) ((uchar) bit_buff->pos[0]);
1220 bit_buff->pos++;
1221 bits+=8;
1222 }
1223 else
1224 {
1225 bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) |
1226 ((uint) ((uchar) bit_buff->pos[1]));
1227 bit_buff->pos+=2;
1228 bits+=16;
1229 }
1230 #endif
1231 }
1232 /* First use info in quick_table */
1233 low_byte=(bit_buff->current_byte >> (bits - table_bits)) & table_and;
1234 low_byte=decode_tree->table[low_byte];
1235 if (low_byte & IS_CHAR)
1236 {
1237 *to++ = (low_byte & 255); /* Found char in quick table */
1238 bits-= ((low_byte >> 8) & 31); /* Remove bits used */
1239 }
1240 else
1241 { /* Map through rest of decode-table */
1242 pos=decode_tree->table+low_byte;
1243 bits-=table_bits;
1244 for (;;)
1245 {
1246 if (bits < 8)
1247 { /* We don't need to check end */
1248 #if BITS_SAVED == 32
1249 bit_buff->current_byte= (bit_buff->current_byte << 24) |
1250 (((uint) ((uchar) bit_buff->pos[2]))) |
1251 (((uint) ((uchar) bit_buff->pos[1])) << 8) |
1252 (((uint) ((uchar) bit_buff->pos[0])) << 16);
1253 bit_buff->pos+=3;
1254 bits+=24;
1255 #else
1256 bit_buff->current_byte= (bit_buff->current_byte << 8) |
1257 (uint) ((uchar) bit_buff->pos[0]);
1258 bit_buff->pos+=1;
1259 bits+=8;
1260 #endif
1261 }
1262 low_byte=(uint) (bit_buff->current_byte >> (bits-8));
1263 decode_bytes_test_bit(0);
1264 decode_bytes_test_bit(1);
1265 decode_bytes_test_bit(2);
1266 decode_bytes_test_bit(3);
1267 decode_bytes_test_bit(4);
1268 decode_bytes_test_bit(5);
1269 decode_bytes_test_bit(6);
1270 decode_bytes_test_bit(7);
1271 bits-=8;
1272 }
1273 *to++ = (uchar) *pos;
1274 }
1275 } while (to != end);
1276
1277 bit_buff->bits=bits;
1278 return;
1279 }
1280 #endif /* BIT_SAVED == 64 */
1281
1282
decode_pos(MI_BIT_BUFF * bit_buff,MI_DECODE_TREE * decode_tree)1283 static uint decode_pos(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree)
1284 {
1285 uint16 *pos=decode_tree->table;
1286 for (;;)
1287 {
1288 if (get_bit(bit_buff))
1289 pos++;
1290 if (*pos & IS_CHAR)
1291 return (uint) (*pos & ~IS_CHAR);
1292 pos+= *pos;
1293 }
1294 }
1295
1296
_mi_read_rnd_pack_record(MI_INFO * info,uchar * buf,register my_off_t filepos,my_bool skip_deleted_blocks)1297 int _mi_read_rnd_pack_record(MI_INFO *info, uchar *buf,
1298 register my_off_t filepos,
1299 my_bool skip_deleted_blocks)
1300 {
1301 uint b_type;
1302 MI_BLOCK_INFO block_info;
1303 MYISAM_SHARE *share=info->s;
1304 DBUG_ENTER("_mi_read_rnd_pack_record");
1305
1306 if (filepos >= info->state->data_file_length)
1307 {
1308 my_errno= HA_ERR_END_OF_FILE;
1309 goto err;
1310 }
1311
1312 if (info->opt_flag & READ_CACHE_USED)
1313 {
1314 if (_mi_read_cache(&info->rec_cache, (uchar*) block_info.header,
1315 filepos, share->pack.ref_length,
1316 skip_deleted_blocks ? READING_NEXT : 0))
1317 goto err;
1318 b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
1319 &info->rec_buff, -1, filepos);
1320 }
1321 else
1322 b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
1323 &info->rec_buff, info->dfile, filepos);
1324 if (b_type)
1325 goto err; /* Error code is already set */
1326 #ifndef DBUG_OFF
1327 if (block_info.rec_len > share->max_pack_length)
1328 {
1329 my_errno=HA_ERR_WRONG_IN_RECORD;
1330 goto err;
1331 }
1332 #endif
1333
1334 if (info->opt_flag & READ_CACHE_USED)
1335 {
1336 if (_mi_read_cache(&info->rec_cache, (uchar*) info->rec_buff,
1337 block_info.filepos, block_info.rec_len,
1338 skip_deleted_blocks ? READING_NEXT : 0))
1339 goto err;
1340 }
1341 else
1342 {
1343 if (mysql_file_read(info->dfile,
1344 (uchar*) info->rec_buff + block_info.offset,
1345 block_info.rec_len-block_info.offset, MYF(MY_NABP)))
1346 goto err;
1347 }
1348 info->packed_length=block_info.rec_len;
1349 info->lastpos=filepos;
1350 info->nextpos=block_info.filepos+block_info.rec_len;
1351 info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
1352
1353 DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf,
1354 info->rec_buff, block_info.rec_len));
1355 err:
1356 DBUG_RETURN(my_errno);
1357 }
1358
1359
1360 /* Read and process header from a huff-record-file */
1361
_mi_pack_get_block_info(MI_INFO * myisam,MI_BIT_BUFF * bit_buff,MI_BLOCK_INFO * info,uchar ** rec_buff_p,File file,my_off_t filepos)1362 uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
1363 MI_BLOCK_INFO *info, uchar **rec_buff_p,
1364 File file, my_off_t filepos)
1365 {
1366 uchar *header=info->header;
1367 uint head_length, UNINIT_VAR(ref_length);
1368
1369 if (file >= 0)
1370 {
1371 ref_length=myisam->s->pack.ref_length;
1372 /*
1373 We can't use mysql_file_pread() here because mi_read_rnd_pack_record assumes
1374 position is ok
1375 */
1376 mysql_file_seek(file, filepos, MY_SEEK_SET, MYF(0));
1377 if (mysql_file_read(file, header, ref_length, MYF(MY_NABP)))
1378 return BLOCK_FATAL_ERROR;
1379 DBUG_DUMP("header",(uchar*) header,ref_length);
1380 }
1381 head_length= read_pack_length((uint) myisam->s->pack.version, header,
1382 &info->rec_len);
1383 if (myisam->s->base.blobs)
1384 {
1385 head_length+= read_pack_length((uint) myisam->s->pack.version,
1386 header + head_length, &info->blob_len);
1387 /*
1388 Ensure that the record buffer is big enough for the compressed
1389 record plus all expanded blobs. [We do not have an extra buffer
1390 for the resulting blobs. Sigh.]
1391 */
1392 if (!(mi_alloc_rec_buff(myisam,info->rec_len + info->blob_len,
1393 rec_buff_p)))
1394 return BLOCK_FATAL_ERROR; /* not enough memory */
1395 bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len;
1396 bit_buff->blob_end= bit_buff->blob_pos + info->blob_len;
1397 myisam->blob_length=info->blob_len;
1398 }
1399 info->filepos=filepos+head_length;
1400 if (file > 0)
1401 {
1402 info->offset=MY_MIN(info->rec_len, ref_length - head_length);
1403 memcpy(*rec_buff_p, header + head_length, info->offset);
1404 }
1405 return 0;
1406 }
1407
1408
1409 /* rutines for bit buffer */
1410 /* Note buffer must be 6 byte bigger than longest row */
1411
init_bit_buffer(MI_BIT_BUFF * bit_buff,uchar * buffer,uint length)1412 static void init_bit_buffer(MI_BIT_BUFF *bit_buff, uchar *buffer, uint length)
1413 {
1414 bit_buff->pos=buffer;
1415 bit_buff->end=buffer+length;
1416 bit_buff->bits=bit_buff->error=0;
1417 bit_buff->current_byte=0; /* Avoid purify errors */
1418 }
1419
fill_and_get_bits(MI_BIT_BUFF * bit_buff,uint count)1420 static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff, uint count)
1421 {
1422 uint tmp;
1423 count-=bit_buff->bits;
1424 tmp=(bit_buff->current_byte & mask[bit_buff->bits]) << count;
1425 fill_buffer(bit_buff);
1426 bit_buff->bits=BITS_SAVED - count;
1427 return tmp+(bit_buff->current_byte >> (BITS_SAVED - count));
1428 }
1429
1430 /* Fill in empty bit_buff->current_byte from buffer */
1431 /* Sets bit_buff->error if buffer is exhausted */
1432
fill_buffer(MI_BIT_BUFF * bit_buff)1433 static void fill_buffer(MI_BIT_BUFF *bit_buff)
1434 {
1435 if (bit_buff->pos >= bit_buff->end)
1436 {
1437 bit_buff->error= 1;
1438 bit_buff->current_byte=0;
1439 return;
1440 }
1441
1442 #if BITS_SAVED == 64
1443 bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) |
1444 (((uint) ((uchar) bit_buff->pos[6])) << 8) |
1445 (((uint) ((uchar) bit_buff->pos[5])) << 16) |
1446 (((uint) ((uchar) bit_buff->pos[4])) << 24) |
1447 ((ulonglong)
1448 ((((uint) ((uchar) bit_buff->pos[3]))) |
1449 (((uint) ((uchar) bit_buff->pos[2])) << 8) |
1450 (((uint) ((uchar) bit_buff->pos[1])) << 16) |
1451 (((uint) ((uchar) bit_buff->pos[0])) << 24)) << 32));
1452 bit_buff->pos+=8;
1453 #else
1454 #if BITS_SAVED == 32
1455 bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) |
1456 (((uint) ((uchar) bit_buff->pos[2])) << 8) |
1457 (((uint) ((uchar) bit_buff->pos[1])) << 16) |
1458 (((uint) ((uchar) bit_buff->pos[0])) << 24));
1459 bit_buff->pos+=4;
1460 #else
1461 bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1])) |
1462 (((uint) ((uchar) bit_buff->pos[0])) << 8));
1463 bit_buff->pos+=2;
1464 #endif
1465 #endif
1466 }
1467
1468 /* Get number of bits neaded to represent value */
1469
max_bit(register uint value)1470 static uint max_bit(register uint value)
1471 {
1472 reg2 uint power=1;
1473
1474 while ((value>>=1))
1475 power++;
1476 return (power);
1477 }
1478
1479
1480 /*****************************************************************************
1481 Some redefined functions to handle files when we are using memmap
1482 *****************************************************************************/
1483
1484 #ifdef HAVE_MMAP
1485
1486 static int _mi_read_mempack_record(MI_INFO *info,my_off_t filepos,uchar *buf);
1487 static int _mi_read_rnd_mempack_record(MI_INFO*, uchar *,my_off_t, my_bool);
1488
_mi_memmap_file(MI_INFO * info)1489 my_bool _mi_memmap_file(MI_INFO *info)
1490 {
1491 MYISAM_SHARE *share=info->s;
1492 my_bool eom;
1493
1494 DBUG_ENTER("mi_memmap_file");
1495
1496 if (!info->s->file_map)
1497 {
1498 my_off_t data_file_length= share->state.state.data_file_length;
1499
1500 if (myisam_mmap_size != SIZE_T_MAX)
1501 {
1502 mysql_mutex_lock(&THR_LOCK_myisam_mmap);
1503 eom= data_file_length > myisam_mmap_size - myisam_mmap_used - MEMMAP_EXTRA_MARGIN;
1504 if (!eom)
1505 myisam_mmap_used+= data_file_length + MEMMAP_EXTRA_MARGIN;
1506 mysql_mutex_unlock(&THR_LOCK_myisam_mmap);
1507 }
1508 else
1509 eom= data_file_length > myisam_mmap_size - MEMMAP_EXTRA_MARGIN;
1510
1511 if (eom)
1512 {
1513 DBUG_PRINT("warning", ("File is too large for mmap"));
1514 DBUG_RETURN(0);
1515 }
1516 if (mysql_file_seek(info->dfile, 0L, MY_SEEK_END, MYF(0)) <
1517 share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN)
1518 {
1519 DBUG_PRINT("warning",("File isn't extended for memmap"));
1520 if (myisam_mmap_size != SIZE_T_MAX)
1521 {
1522 mysql_mutex_lock(&THR_LOCK_myisam_mmap);
1523 myisam_mmap_used-= data_file_length + MEMMAP_EXTRA_MARGIN;
1524 mysql_mutex_unlock(&THR_LOCK_myisam_mmap);
1525 }
1526 DBUG_RETURN(0);
1527 }
1528 if (mi_dynmap_file(info,
1529 share->state.state.data_file_length +
1530 MEMMAP_EXTRA_MARGIN))
1531 {
1532 if (myisam_mmap_size != SIZE_T_MAX)
1533 {
1534 mysql_mutex_lock(&THR_LOCK_myisam_mmap);
1535 myisam_mmap_used-= data_file_length + MEMMAP_EXTRA_MARGIN;
1536 mysql_mutex_unlock(&THR_LOCK_myisam_mmap);
1537 }
1538 DBUG_RETURN(0);
1539 }
1540 }
1541 info->opt_flag|= MEMMAP_USED;
1542 info->read_record= share->read_record= _mi_read_mempack_record;
1543 share->read_rnd= _mi_read_rnd_mempack_record;
1544 DBUG_RETURN(1);
1545 }
1546
1547
_mi_unmap_file(MI_INFO * info)1548 void _mi_unmap_file(MI_INFO *info)
1549 {
1550 DBUG_ASSERT(info->s->options & HA_OPTION_COMPRESS_RECORD);
1551
1552 (void) my_munmap((char*) info->s->file_map, info->s->mmaped_length);
1553
1554 if (myisam_mmap_size != SIZE_T_MAX)
1555 {
1556 mysql_mutex_lock(&THR_LOCK_myisam_mmap);
1557 myisam_mmap_used-= info->s->mmaped_length;
1558 mysql_mutex_unlock(&THR_LOCK_myisam_mmap);
1559 }
1560 }
1561
1562
_mi_mempack_get_block_info(MI_INFO * myisam,MI_BIT_BUFF * bit_buff,MI_BLOCK_INFO * info,uchar ** rec_buff_p,uchar * header)1563 static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
1564 MI_BLOCK_INFO *info, uchar **rec_buff_p,
1565 uchar *header)
1566 {
1567 header+= read_pack_length((uint) myisam->s->pack.version, header,
1568 &info->rec_len);
1569 if (myisam->s->base.blobs)
1570 {
1571 header+= read_pack_length((uint) myisam->s->pack.version, header,
1572 &info->blob_len);
1573 /* mi_alloc_rec_buff sets my_errno on error */
1574 if (!(mi_alloc_rec_buff(myisam, info->blob_len,
1575 rec_buff_p)))
1576 return 0; /* not enough memory */
1577 bit_buff->blob_pos= (uchar*) *rec_buff_p;
1578 bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len;
1579 }
1580 return header;
1581 }
1582
1583
_mi_read_mempack_record(MI_INFO * info,my_off_t filepos,uchar * buf)1584 static int _mi_read_mempack_record(MI_INFO *info, my_off_t filepos, uchar *buf)
1585 {
1586 MI_BLOCK_INFO block_info;
1587 MYISAM_SHARE *share=info->s;
1588 uchar *pos;
1589 DBUG_ENTER("mi_read_mempack_record");
1590
1591 if (filepos == HA_OFFSET_ERROR)
1592 DBUG_RETURN(-1); /* _search() didn't find record */
1593
1594 if (!(pos= (uchar*) _mi_mempack_get_block_info(info, &info->bit_buff,
1595 &block_info, &info->rec_buff,
1596 (uchar*) share->file_map+
1597 filepos)))
1598 DBUG_RETURN(-1);
1599 DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf,
1600 pos, block_info.rec_len));
1601 }
1602
1603
1604 /*ARGSUSED*/
_mi_read_rnd_mempack_record(MI_INFO * info,uchar * buf,register my_off_t filepos,my_bool skip_deleted_blocks)1605 static int _mi_read_rnd_mempack_record(MI_INFO *info, uchar *buf,
1606 register my_off_t filepos,
1607 my_bool skip_deleted_blocks
1608 __attribute__((unused)))
1609 {
1610 MI_BLOCK_INFO block_info;
1611 MYISAM_SHARE *share=info->s;
1612 uchar *pos,*start;
1613 DBUG_ENTER("_mi_read_rnd_mempack_record");
1614
1615 if (filepos >= share->state.state.data_file_length)
1616 {
1617 my_errno=HA_ERR_END_OF_FILE;
1618 goto err;
1619 }
1620 if (!(pos= (uchar*) _mi_mempack_get_block_info(info, &info->bit_buff,
1621 &block_info, &info->rec_buff,
1622 (uchar*)
1623 (start=share->file_map+
1624 filepos))))
1625 goto err;
1626 #ifndef DBUG_OFF
1627 if (block_info.rec_len > info->s->max_pack_length)
1628 {
1629 my_errno=HA_ERR_WRONG_IN_RECORD;
1630 goto err;
1631 }
1632 #endif
1633 info->packed_length=block_info.rec_len;
1634 info->lastpos=filepos;
1635 info->nextpos=filepos+(uint) (pos-start)+block_info.rec_len;
1636 info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
1637
1638 DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf,
1639 pos, block_info.rec_len));
1640 err:
1641 DBUG_RETURN(my_errno);
1642 }
1643
1644 #endif /* HAVE_MMAP */
1645
1646 /* Save length of row */
1647
save_pack_length(uint version,uchar * block_buff,ulong length)1648 uint save_pack_length(uint version, uchar *block_buff, ulong length)
1649 {
1650 if (length < 254)
1651 {
1652 *(uchar*) block_buff= (uchar) length;
1653 return 1;
1654 }
1655 if (length <= 65535)
1656 {
1657 *(uchar*) block_buff=254;
1658 int2store(block_buff+1,(uint) length);
1659 return 3;
1660 }
1661 *(uchar*) block_buff=255;
1662 if (version == 1) /* old format */
1663 {
1664 DBUG_ASSERT(length <= 0xFFFFFF);
1665 int3store(block_buff + 1, (ulong) length);
1666 return 4;
1667 }
1668 else
1669 {
1670 int4store(block_buff + 1, (ulong) length);
1671 return 5;
1672 }
1673 }
1674
1675
read_pack_length(uint version,const uchar * buf,ulong * length)1676 static uint read_pack_length(uint version, const uchar *buf, ulong *length)
1677 {
1678 if (buf[0] < 254)
1679 {
1680 *length= buf[0];
1681 return 1;
1682 }
1683 else if (buf[0] == 254)
1684 {
1685 *length= uint2korr(buf + 1);
1686 return 3;
1687 }
1688 if (version == 1) /* old format */
1689 {
1690 *length= uint3korr(buf + 1);
1691 return 4;
1692 }
1693 else
1694 {
1695 *length= uint4korr(buf + 1);
1696 return 5;
1697 }
1698 }
1699
1700
calc_pack_length(uint version,ulong length)1701 uint calc_pack_length(uint version, ulong length)
1702 {
1703 return (length < 254) ? 1 : (length < 65536) ? 3 : (version == 1) ? 4 : 5;
1704 }
1705