1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2    Copyright (c) 2020, MariaDB Corporation.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
16 
17 	/* Functions to compressed records */
18 
19 #include "fulltext.h"
20 
21 #define IS_CHAR ((uint) 32768)		/* Bit if char (not offset) in tree */
22 
23 /* Some definitions to keep in sync with myisampack.c */
24 #define HEAD_LENGTH	32              /* Length of fixed header */
25 
26 #if INT_MAX > 32767
27 #define BITS_SAVED 32
28 #define MAX_QUICK_TABLE_BITS 9		/* Because we may shift in 24 bits */
29 #else
30 #define BITS_SAVED 16
31 #define MAX_QUICK_TABLE_BITS 6
32 #endif
33 
34 #define get_bit(BU) ((BU)->bits ? \
35 		     (BU)->current_byte & ((mi_bit_type) 1 << --(BU)->bits) :\
36 		     (fill_buffer(BU), (BU)->bits= BITS_SAVED-1,\
37 		      (BU)->current_byte & ((mi_bit_type) 1 << (BITS_SAVED-1))))
38 #define skip_to_next_byte(BU) ((BU)->bits&=~7)
39 #define get_bits(BU,count) (((BU)->bits >= count) ? (((BU)->current_byte >> ((BU)->bits-=count)) & mask[count]) : fill_and_get_bits(BU,count))
40 
41 #define decode_bytes_test_bit(bit) \
42   if (low_byte & (1 << (7-bit))) \
43     pos++; \
44   if (*pos & IS_CHAR) \
45   { bits-=(bit+1); break; } \
46   pos+= *pos
47 
48 /* Size in uint16 of a Huffman tree for byte compression of 256 byte values. */
49 #define OFFSET_TABLE_SIZE 512
50 
51 static uint read_huff_table(MI_BIT_BUFF *bit_buff,MI_DECODE_TREE *decode_tree,
52 			    uint16 **decode_table,uchar **intervall_buff,
53 			    uint16 *tmp_buff);
54 static void make_quick_table(uint16 *to_table,uint16 *decode_table,
55 			     uint *next_free,uint value,uint bits,
56 			     uint max_bits);
57 static void fill_quick_table(uint16 *table,uint bits, uint max_bits,
58 			     uint value);
59 static uint copy_decode_table(uint16 *to_pos,uint offset,
60 			      uint16 *decode_table);
61 static uint find_longest_bitstream(uint16 *table, uint16 *end);
62 static void (*get_unpack_function(MI_COLUMNDEF *rec))(MI_COLUMNDEF *field,
63 						    MI_BIT_BUFF *buff,
64 						    uchar *to,
65 						    uchar *end);
66 static void uf_zerofill_skip_zero(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
67 				   uchar *to,uchar *end);
68 static void uf_skip_zero(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
69 			  uchar *to,uchar *end);
70 static void uf_space_normal(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
71 			    uchar *to,uchar *end);
72 static void uf_space_endspace_selected(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
73 				       uchar *to, uchar *end);
74 static void uf_endspace_selected(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
75 				 uchar *to,uchar *end);
76 static void uf_space_endspace(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
77 			      uchar *to,uchar *end);
78 static void uf_endspace(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
79 			uchar *to,uchar *end);
80 static void uf_space_prespace_selected(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
81 				       uchar *to, uchar *end);
82 static void uf_prespace_selected(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
83 				 uchar *to,uchar *end);
84 static void uf_space_prespace(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
85 			      uchar *to,uchar *end);
86 static void uf_prespace(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
87 			uchar *to,uchar *end);
88 static void uf_zerofill_normal(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
89 			       uchar *to,uchar *end);
90 static void uf_constant(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
91 			uchar *to,uchar *end);
92 static void uf_intervall(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
93 			 uchar *to,uchar *end);
94 static void uf_zero(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
95 		    uchar *to,uchar *end);
96 static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
97 		    uchar *to, uchar *end);
98 static void uf_varchar1(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
99                         uchar *to, uchar *end);
100 static void uf_varchar2(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
101                         uchar *to, uchar *end);
102 static void decode_bytes(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,
103 			 uchar *to,uchar *end);
104 static uint decode_pos(MI_BIT_BUFF *bit_buff,MI_DECODE_TREE *decode_tree);
105 static void init_bit_buffer(MI_BIT_BUFF *bit_buff,uchar *buffer,uint length);
106 static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count);
107 static void fill_buffer(MI_BIT_BUFF *bit_buff);
108 static uint max_bit(uint value);
109 static uint read_pack_length(uint version, const uchar *buf, ulong *length);
110 #ifdef HAVE_MMAP
111 static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
112                                          MI_BLOCK_INFO *info, uchar **rec_buff_p,
113 					 uchar *header);
114 #endif
115 
116 static mi_bit_type mask[]=
117 {
118    0x00000000,
119    0x00000001, 0x00000003, 0x00000007, 0x0000000f,
120    0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
121    0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
122    0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
123 #if BITS_SAVED > 16
124    0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
125    0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
126    0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
127    0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff,
128 #endif
129  };
130 
131 
132 	/* Read all packed info, allocate memory and fix field structs */
133 
_mi_read_pack_info(MI_INFO * info,pbool fix_keys)134 my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys)
135 {
136   File file;
137   int diff_length;
138   uint i,trees,huff_tree_bits,rec_reflength,length;
139   uint16 *decode_table,*tmp_buff;
140   ulong elements,intervall_length;
141   uchar *disk_cache;
142   uchar *intervall_buff;
143   uchar header[HEAD_LENGTH];
144   MYISAM_SHARE *share=info->s;
145   MI_BIT_BUFF bit_buff;
146   DBUG_ENTER("_mi_read_pack_info");
147 
148   if (myisam_quick_table_bits < 4)
149     myisam_quick_table_bits=4;
150   else if (myisam_quick_table_bits > MAX_QUICK_TABLE_BITS)
151     myisam_quick_table_bits=MAX_QUICK_TABLE_BITS;
152 
153   file=info->dfile;
154   my_errno=0;
155   if (mysql_file_read(file, (uchar*) header, sizeof(header), MYF(MY_NABP)))
156   {
157     if (!my_errno)
158       my_errno=HA_ERR_END_OF_FILE;
159     goto err0;
160   }
161   /* Only the first three bytes of magic number are independent of version. */
162   if (memcmp((uchar*) header, (uchar*) myisam_pack_file_magic, 3))
163   {
164     my_errno=HA_ERR_WRONG_IN_RECORD;
165     goto err0;
166   }
167   share->pack.version= header[3]; /* fourth byte of magic number */
168   share->pack.header_length=	uint4korr(header+4);
169   share->min_pack_length=(uint) uint4korr(header+8);
170   share->max_pack_length=(uint) uint4korr(header+12);
171   elements=uint4korr(header+16);
172   intervall_length=uint4korr(header+20);
173   trees=uint2korr(header+24);
174   share->pack.ref_length=header[26];
175   rec_reflength=header[27];
176   diff_length=(int) rec_reflength - (int) share->base.rec_reflength;
177   if (fix_keys)
178     share->rec_reflength=rec_reflength;
179   share->base.min_block_length=share->min_pack_length+1;
180   if (share->min_pack_length > 254)
181     share->base.min_block_length+=2;
182   DBUG_PRINT("info", ("fixed header length:   %u", HEAD_LENGTH));
183   DBUG_PRINT("info", ("total header length:   %lu", share->pack.header_length));
184   DBUG_PRINT("info", ("pack file version:     %u", share->pack.version));
185   DBUG_PRINT("info", ("min pack length:       %lu", share->min_pack_length));
186   DBUG_PRINT("info", ("max pack length:       %lu", share->max_pack_length));
187   DBUG_PRINT("info", ("elements of all trees: %lu", elements));
188   DBUG_PRINT("info", ("distinct values bytes: %lu", intervall_length));
189   DBUG_PRINT("info", ("number of code trees:  %u", trees));
190   DBUG_PRINT("info", ("bytes for record lgt:  %u", share->pack.ref_length));
191   DBUG_PRINT("info", ("record pointer length: %u", rec_reflength));
192 
193   /*
194     Memory segment #1:
195     - Decode tree heads
196     - Distinct column values
197   */
198   if (!(share->decode_trees=(MI_DECODE_TREE*)
199 	my_malloc((uint) (trees*sizeof(MI_DECODE_TREE)+
200 			  intervall_length*sizeof(uchar)),
201 		  MYF(MY_WME))))
202     goto err0;
203   intervall_buff=(uchar*) (share->decode_trees+trees);
204 
205   /*
206     Memory segment #2:
207     - Decode tables
208     - Quick decode tables
209     - Temporary decode table
210     - Compressed data file header cache
211     This segment will be reallocated after construction of the tables.
212   */
213   length=(uint) (elements*2+trees*(1 << myisam_quick_table_bits));
214   /*
215     To keep some algorithms simpler, we accept that they access
216     bytes beyond the end of the input data. This can affect up to
217     one byte less than the "word size" size used in this file,
218     which is BITS_SAVED / 8. To avoid accessing non-allocated
219     data, we add (BITS_SAVED / 8) - 1 bytes to the buffer size.
220   */
221   if (!(share->decode_tables=(uint16*)
222         my_malloc((length + OFFSET_TABLE_SIZE) * sizeof(uint16) +
223                   (uint) (share->pack.header_length - sizeof(header) +
224                   (BITS_SAVED / 8) - 1), MYF(MY_WME | MY_ZEROFILL))))
225     goto err1;
226   tmp_buff=share->decode_tables+length;
227   disk_cache= (uchar*) (tmp_buff+OFFSET_TABLE_SIZE);
228 
229   if (mysql_file_read(file, disk_cache,
230                       (uint) (share->pack.header_length-sizeof(header)),
231                       MYF(MY_NABP)))
232     goto err2;
233 
234   huff_tree_bits=max_bit(trees ? trees-1 : 0);
235   init_bit_buffer(&bit_buff, disk_cache,
236 		  (uint) (share->pack.header_length-sizeof(header)));
237   /* Read new info for each field */
238   for (i=0 ; i < share->base.fields ; i++)
239   {
240     share->rec[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,5);
241     share->rec[i].pack_type=(uint) get_bits(&bit_buff,6);
242     share->rec[i].space_length_bits=get_bits(&bit_buff,5);
243     share->rec[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff,
244 								huff_tree_bits);
245     share->rec[i].unpack=get_unpack_function(share->rec+i);
246     DBUG_PRINT("info", ("col: %2u  type: %2u  pack: %u  slbits: %2u",
247                         i, share->rec[i].base_type, share->rec[i].pack_type,
248                         share->rec[i].space_length_bits));
249   }
250   skip_to_next_byte(&bit_buff);
251   /*
252     Construct the decoding tables from the file header. Keep track of
253     the used memory.
254   */
255   decode_table=share->decode_tables;
256   for (i=0 ; i < trees ; i++)
257     if (read_huff_table(&bit_buff,share->decode_trees+i,&decode_table,
258                         &intervall_buff,tmp_buff))
259       goto err3;
260   /* Reallocate the decoding tables to the used size. */
261   decode_table=(uint16*)
262     my_realloc((uchar*) share->decode_tables,
263 	       (uint) ((uchar*) decode_table - (uchar*) share->decode_tables),
264 	       MYF(MY_HOLD_ON_ERROR));
265   /* Fix the table addresses in the tree heads. */
266   {
267     my_ptrdiff_t diff=PTR_BYTE_DIFF(decode_table,share->decode_tables);
268     share->decode_tables=decode_table;
269     for (i=0 ; i < trees ; i++)
270       share->decode_trees[i].table=ADD_TO_PTR(share->decode_trees[i].table,
271                                               diff, uint16*);
272   }
273 
274   /* Fix record-ref-length for keys */
275   if (fix_keys)
276   {
277     for (i=0 ; i < share->base.keys ; i++)
278     {
279       MI_KEYDEF *keyinfo= &share->keyinfo[i];
280       keyinfo->keylength+= (uint16) diff_length;
281       keyinfo->minlength+= (uint16) diff_length;
282       keyinfo->maxlength+= (uint16) diff_length;
283       keyinfo->seg[keyinfo->flag & HA_FULLTEXT ?
284                    FT_SEGS : keyinfo->keysegs].length= (uint16) rec_reflength;
285     }
286     if (share->ft2_keyinfo.seg)
287     {
288       MI_KEYDEF *ft2_keyinfo= &share->ft2_keyinfo;
289       ft2_keyinfo->keylength+= (uint16) diff_length;
290       ft2_keyinfo->minlength+= (uint16) diff_length;
291       ft2_keyinfo->maxlength+= (uint16) diff_length;
292     }
293   }
294 
295   if (bit_buff.error || bit_buff.pos < bit_buff.end)
296     goto err3;
297 
298   DBUG_RETURN(0);
299 
300 err3:
301   my_errno=HA_ERR_WRONG_IN_RECORD;
302 err2:
303   my_free(share->decode_tables);
304 err1:
305   my_free(share->decode_trees);
306 err0:
307   DBUG_RETURN(1);
308 }
309 
310 
311 /*
312   Read a huff-code-table from datafile.
313 
314   SYNOPSIS
315     read_huff_table()
316       bit_buff                  Bit buffer pointing at start of the
317                                 decoding table in the file header cache.
318       decode_tree               Pointer to the decode tree head.
319       decode_table      IN/OUT  Address of a pointer to the next free space.
320       intervall_buff    IN/OUT  Address of a pointer to the next unused values.
321       tmp_buff                  Buffer for temporary extraction of a full
322                                 decoding table as read from bit_buff.
323 
324   RETURN
325     0           OK.
326     1           Error.
327 */
328 
read_huff_table(MI_BIT_BUFF * bit_buff,MI_DECODE_TREE * decode_tree,uint16 ** decode_table,uchar ** intervall_buff,uint16 * tmp_buff)329 static uint read_huff_table(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree,
330 			    uint16 **decode_table, uchar **intervall_buff,
331 			    uint16 *tmp_buff)
332 {
333   uint min_chr,elements,char_bits,offset_bits,size,intervall_length,table_bits,
334   next_free_offset;
335   uint16 *ptr,*end;
336   DBUG_ENTER("read_huff_table");
337 
338   if (!get_bits(bit_buff,1))
339   {
340     /* Byte value compression. */
341     min_chr=get_bits(bit_buff,8);
342     elements=get_bits(bit_buff,9);
343     char_bits=get_bits(bit_buff,5);
344     offset_bits=get_bits(bit_buff,5);
345     intervall_length=0;
346     ptr=tmp_buff;
347     DBUG_PRINT("info", ("byte value compression"));
348     DBUG_PRINT("info", ("minimum byte value:    %u", min_chr));
349     DBUG_PRINT("info", ("number of tree nodes:  %u", elements));
350     DBUG_PRINT("info", ("bits for values:       %u", char_bits));
351     DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits));
352     if (elements > 256)
353     {
354       DBUG_PRINT("error", ("ERROR: illegal number of tree elements: %u",
355                            elements));
356       DBUG_RETURN(1);
357     }
358   }
359   else
360   {
361     /* Distinct column value compression. */
362     min_chr=0;
363     elements=get_bits(bit_buff,15);
364     intervall_length=get_bits(bit_buff,16);
365     char_bits=get_bits(bit_buff,5);
366     offset_bits=get_bits(bit_buff,5);
367     decode_tree->quick_table_bits=0;
368     ptr= *decode_table;
369     DBUG_PRINT("info", ("distinct column value compression"));
370     DBUG_PRINT("info", ("number of tree nodes:  %u", elements));
371     DBUG_PRINT("info", ("value buffer length:   %u", intervall_length));
372     DBUG_PRINT("info", ("bits for value index:  %u", char_bits));
373     DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits));
374   }
375   size=elements*2-2;
376   DBUG_PRINT("info", ("tree size in uint16:   %u", size));
377   DBUG_PRINT("info", ("tree size in bytes:    %u",
378                       size * (uint) sizeof(uint16)));
379 
380   for (end=ptr+size ; ptr < end ; ptr++)
381   {
382     if (get_bit(bit_buff))
383     {
384       *ptr= (uint16) get_bits(bit_buff,offset_bits);
385       if ((ptr + *ptr >= end) || !*ptr)
386       {
387         DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree"));
388         DBUG_RETURN(1);
389       }
390     }
391     else
392       *ptr= (uint16) (IS_CHAR + (get_bits(bit_buff,char_bits) + min_chr));
393   }
394   skip_to_next_byte(bit_buff);
395 
396   decode_tree->table= *decode_table;
397   decode_tree->intervalls= *intervall_buff;
398   if (! intervall_length)
399   {
400     /* Byte value compression. ptr started from tmp_buff. */
401     /* Find longest Huffman code from begin to end of tree in bits. */
402     table_bits= find_longest_bitstream(tmp_buff, ptr);
403     if (table_bits >= OFFSET_TABLE_SIZE)
404       DBUG_RETURN(1);
405     if (table_bits > myisam_quick_table_bits)
406       table_bits=myisam_quick_table_bits;
407     DBUG_PRINT("info", ("table bits:            %u", table_bits));
408 
409     next_free_offset= (1 << table_bits);
410     make_quick_table(*decode_table,tmp_buff,&next_free_offset,0,table_bits,
411 		     table_bits);
412     (*decode_table)+= next_free_offset;
413     decode_tree->quick_table_bits=table_bits;
414   }
415   else
416   {
417     /* Distinct column value compression. ptr started from *decode_table */
418     (*decode_table)=end;
419     /*
420       get_bits() moves some bytes to a cache buffer in advance. May need
421       to step back.
422     */
423     bit_buff->pos-= bit_buff->bits/8;
424     /* Copy the distinct column values from the buffer. */
425     memcpy(*intervall_buff,bit_buff->pos,(size_t) intervall_length);
426     (*intervall_buff)+=intervall_length;
427     bit_buff->pos+=intervall_length;
428     bit_buff->bits=0;
429   }
430   DBUG_RETURN(0);
431 }
432 
433 
434 /*
435   Make a quick_table for faster decoding.
436 
437   SYNOPSIS
438     make_quick_table()
439       to_table                  Target quick_table and remaining decode table.
440       decode_table              Source Huffman (sub-)tree within tmp_buff.
441       next_free_offset   IN/OUT Next free offset from to_table.
442                                 Starts behind quick_table on the top-level.
443       value                     Huffman bits found so far.
444       bits                      Remaining bits to be collected.
445       max_bits                  Total number of bits to collect (table_bits).
446 
447   DESCRIPTION
448 
449     The quick table is an array of 16-bit values. There exists one value
450     for each possible code representable by max_bits (table_bits) bits.
451     In most cases table_bits is 9. So there are 512 16-bit values.
452 
453     If the high-order bit (16) is set (IS_CHAR) then the array slot for
454     this value is a valid Huffman code for a resulting byte value.
455 
456     The low-order 8 bits (1..8) are the resulting byte value.
457 
458     Bits 9..14 are the length of the Huffman code for this byte value.
459     This means so many bits from the input stream were needed to
460     represent this byte value. The remaining bits belong to later
461     Huffman codes. This also means that for every Huffman code shorter
462     than table_bits there are multiple entires in the array, which
463     differ just in the unused bits.
464 
465     If the high-order bit (16) is clear (0) then the remaining bits are
466     the position of the remaining Huffman decode tree segment behind the
467     quick table.
468 
469   RETURN
470     void
471 */
472 
make_quick_table(uint16 * to_table,uint16 * decode_table,uint * next_free_offset,uint value,uint bits,uint max_bits)473 static void make_quick_table(uint16 *to_table, uint16 *decode_table,
474 			     uint *next_free_offset, uint value, uint bits,
475 			     uint max_bits)
476 {
477   DBUG_ENTER("make_quick_table");
478 
479   /*
480     When down the table to the requested maximum, copy the rest of the
481     Huffman table.
482   */
483   if (!bits--)
484   {
485     /*
486       Remaining left  Huffman tree segment starts behind quick table.
487       Remaining right Huffman tree segment starts behind left segment.
488     */
489     to_table[value]= (uint16) *next_free_offset;
490     /*
491       Re-construct the remaining Huffman tree segment at
492       next_free_offset in to_table.
493     */
494     *next_free_offset= copy_decode_table(to_table, *next_free_offset,
495                                          decode_table);
496     DBUG_VOID_RETURN;
497   }
498 
499   /* Descent on the left side. Left side bits are clear (0). */
500   if (!(*decode_table & IS_CHAR))
501   {
502     /* Not a leaf. Follow the pointer. */
503     make_quick_table(to_table, decode_table + *decode_table,
504                      next_free_offset, value, bits, max_bits);
505   }
506   else
507   {
508     /*
509       A leaf. A Huffman code is complete. Fill the quick_table
510       array for all possible bit strings starting with this Huffman
511       code.
512     */
513     fill_quick_table(to_table + value, bits, max_bits, (uint) *decode_table);
514   }
515 
516   /* Descent on the right side. Right side bits are set (1). */
517   decode_table++;
518   value|= (1 << bits);
519   if (!(*decode_table & IS_CHAR))
520   {
521     /* Not a leaf. Follow the pointer. */
522     make_quick_table(to_table, decode_table + *decode_table,
523                      next_free_offset, value, bits, max_bits);
524   }
525   else
526   {
527     /*
528       A leaf. A Huffman code is complete. Fill the quick_table
529       array for all possible bit strings starting with this Huffman
530       code.
531     */
532     fill_quick_table(to_table + value, bits, max_bits, (uint) *decode_table);
533   }
534 
535   DBUG_VOID_RETURN;
536 }
537 
538 
539 /*
540   Fill quick_table for all possible values starting with this Huffman code.
541 
542   SYNOPSIS
543     fill_quick_table()
544       table                     Target quick_table position.
545       bits                      Unused bits from max_bits.
546       max_bits                  Total number of bits to collect (table_bits).
547       value                     The byte encoded by the found Huffman code.
548 
549   DESCRIPTION
550 
551     Fill the segment (all slots) of the quick_table array with the
552     resulting value for the found Huffman code. There are as many slots
553     as there are combinations representable by the unused bits.
554 
555     In most cases we use 9 table bits. Assume a 3-bit Huffman code. Then
556     there are 6 unused bits. Hence we fill 2**6 = 64 slots with the
557     value.
558 
559   RETURN
560     void
561 */
562 
fill_quick_table(uint16 * table,uint bits,uint max_bits,uint value)563 static void fill_quick_table(uint16 *table, uint bits, uint max_bits,
564 			     uint value)
565 {
566   uint16 *end;
567   DBUG_ENTER("fill_quick_table");
568 
569   /*
570     Bits 1..8 of value represent the decoded byte value.
571     Bits 9..14 become the length of the Huffman code for this byte value.
572     Bit 16 flags a valid code (IS_CHAR).
573   */
574   value|= (max_bits - bits) << 8 | IS_CHAR;
575 
576   for (end= table + ((my_ptrdiff_t) 1 << bits); table < end; table++)
577   {
578     *table= (uint16) value;
579   }
580   DBUG_VOID_RETURN;
581 }
582 
583 
584 /*
585   Reconstruct a decode subtree at the target position.
586 
587   SYNOPSIS
588     copy_decode_table()
589       to_pos                    Target quick_table and remaining decode table.
590       offset                    Next free offset from to_pos.
591       decode_table              Source Huffman subtree within tmp_buff.
592 
593   NOTE
594     Pointers in the decode tree are relative to the pointers position.
595 
596   RETURN
597     next free offset from to_pos.
598 */
599 
copy_decode_table(uint16 * to_pos,uint offset,uint16 * decode_table)600 static uint copy_decode_table(uint16 *to_pos, uint offset,
601 			      uint16 *decode_table)
602 {
603   uint prev_offset= offset;
604   DBUG_ENTER("copy_decode_table");
605 
606   /* Descent on the left side. */
607   if (!(*decode_table & IS_CHAR))
608   {
609     /* Set a pointer to the next target node. */
610     to_pos[offset]=2;
611     /* Copy the left hand subtree there. */
612     offset=copy_decode_table(to_pos,offset+2,decode_table+ *decode_table);
613   }
614   else
615   {
616     /* Copy the byte value. */
617     to_pos[offset]= *decode_table;
618     /* Step behind this node. */
619     offset+=2;
620   }
621 
622   /* Descent on the right side. */
623   decode_table++;
624   if (!(*decode_table & IS_CHAR))
625   {
626     /* Set a pointer to the next free target node. */
627     to_pos[prev_offset+1]=(uint16) (offset-prev_offset-1);
628     /* Copy the right hand subtree to the entry of that node. */
629     offset=copy_decode_table(to_pos,offset,decode_table+ *decode_table);
630   }
631   else
632   {
633     /* Copy the byte value. */
634     to_pos[prev_offset+1]= *decode_table;
635   }
636   DBUG_RETURN(offset);
637 }
638 
639 
640 /*
641   Find the length of the longest Huffman code in this table in bits.
642 
643   SYNOPSIS
644     find_longest_bitstream()
645       table                     Code (sub-)table start.
646       end                       End of code table.
647 
648   IMPLEMENTATION
649 
650     Recursively follow the branch(es) of the code pair on every level of
651     the tree until two byte values (and no branch) are found. Add one to
652     each level when returning back from each recursion stage.
653 
654     'end' is used for error checking only. A clean tree terminates
655     before reaching 'end'. Hence the exact value of 'end' is not too
656     important. However having it higher than necessary could lead to
657     misbehaviour should 'next' jump into the dirty area.
658 
659   RETURN
660     length                  Length of longest Huffman code in bits.
661     >= OFFSET_TABLE_SIZE    Error, broken tree. It does not end before 'end'.
662 */
663 
find_longest_bitstream(uint16 * table,uint16 * end)664 static uint find_longest_bitstream(uint16 *table, uint16 *end)
665 {
666   uint length= 1;
667   uint length2;
668 
669   if (!(*table & IS_CHAR))
670   {
671     uint16 *next= table + *table;
672     if (next > end || next == table)
673     {
674       DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree"));
675       return OFFSET_TABLE_SIZE;
676     }
677     length= find_longest_bitstream(next, end) + 1;
678   }
679   table++;
680   if (!(*table & IS_CHAR))
681   {
682     uint16 *next= table + *table;
683     if (next > end || next == table)
684     {
685       DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree"));
686       return OFFSET_TABLE_SIZE;
687     }
688     length2= find_longest_bitstream(next, end) + 1;
689     length=MY_MAX(length,length2);
690   }
691   return length;
692 }
693 
694 
695 /*
696   Read record from datafile.
697 
698   SYNOPSIS
699     _mi_read_pack_record()
700     info                        A pointer to MI_INFO.
701     filepos                     File offset of the record.
702     buf                 RETURN  The buffer to receive the record.
703 
704   RETURN
705     0                                   on success
706     HA_ERR_WRONG_IN_RECORD or -1        on error
707 */
708 
_mi_read_pack_record(MI_INFO * info,my_off_t filepos,uchar * buf)709 int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, uchar *buf)
710 {
711   MI_BLOCK_INFO block_info;
712   File file;
713   DBUG_ENTER("mi_read_pack_record");
714 
715   if (filepos == HA_OFFSET_ERROR)
716     DBUG_RETURN(-1);			/* _search() didn't find record */
717 
718   file=info->dfile;
719   if (_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
720                               &info->rec_buff, file, filepos))
721     goto err;
722   if (mysql_file_read(file, (uchar*) info->rec_buff + block_info.offset,
723                       block_info.rec_len - block_info.offset, MYF(MY_NABP)))
724     goto panic;
725   info->update|= HA_STATE_AKTIV;
726   DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf,
727                                   info->rec_buff, block_info.rec_len));
728 panic:
729   my_errno=HA_ERR_WRONG_IN_RECORD;
730 err:
731   DBUG_RETURN(-1);
732 }
733 
734 
735 
_mi_pack_rec_unpack(register MI_INFO * info,MI_BIT_BUFF * bit_buff,register uchar * to,uchar * from,ulong reclength)736 int _mi_pack_rec_unpack(register MI_INFO *info, MI_BIT_BUFF *bit_buff,
737                         register uchar *to, uchar *from, ulong reclength)
738 {
739   uchar *end_field;
740   reg3 MI_COLUMNDEF *end;
741   MI_COLUMNDEF *current_field;
742   MYISAM_SHARE *share=info->s;
743   DBUG_ENTER("_mi_pack_rec_unpack");
744 
745   init_bit_buffer(bit_buff, (uchar*) from, reclength);
746 
747   for (current_field=share->rec, end=current_field+share->base.fields ;
748        current_field < end ;
749        current_field++,to=end_field)
750   {
751     end_field=to+current_field->length;
752     (*current_field->unpack)(current_field, bit_buff, (uchar*) to,
753 			     (uchar*) end_field);
754   }
755   if (!bit_buff->error &&
756       bit_buff->pos - bit_buff->bits / 8 == bit_buff->end)
757     DBUG_RETURN(0);
758   info->update&= ~HA_STATE_AKTIV;
759   DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD);
760 } /* _mi_pack_rec_unpack */
761 
762 
763 	/* Return function to unpack field */
764 
get_unpack_function(MI_COLUMNDEF * rec)765 static void (*get_unpack_function(MI_COLUMNDEF *rec))
766 (MI_COLUMNDEF *, MI_BIT_BUFF *, uchar *, uchar *)
767 {
768   switch (rec->base_type) {
769   case FIELD_SKIP_ZERO:
770     if (rec->pack_type & PACK_TYPE_ZERO_FILL)
771       return &uf_zerofill_skip_zero;
772     return &uf_skip_zero;
773   case FIELD_NORMAL:
774     if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
775       return &uf_space_normal;
776     if (rec->pack_type & PACK_TYPE_ZERO_FILL)
777       return &uf_zerofill_normal;
778     return &decode_bytes;
779   case FIELD_SKIP_ENDSPACE:
780     if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
781     {
782       if (rec->pack_type & PACK_TYPE_SELECTED)
783 	return &uf_space_endspace_selected;
784       return &uf_space_endspace;
785     }
786     if (rec->pack_type & PACK_TYPE_SELECTED)
787       return &uf_endspace_selected;
788     return &uf_endspace;
789   case FIELD_SKIP_PRESPACE:
790     if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
791     {
792       if (rec->pack_type & PACK_TYPE_SELECTED)
793 	return &uf_space_prespace_selected;
794       return &uf_space_prespace;
795     }
796     if (rec->pack_type & PACK_TYPE_SELECTED)
797       return &uf_prespace_selected;
798     return &uf_prespace;
799   case FIELD_CONSTANT:
800     return &uf_constant;
801   case FIELD_INTERVALL:
802     return &uf_intervall;
803   case FIELD_ZERO:
804   case FIELD_CHECK:
805     return &uf_zero;
806   case FIELD_BLOB:
807     return &uf_blob;
808   case FIELD_VARCHAR:
809     if (rec->length <= 256)                      /* 255 + 1 byte length */
810       return &uf_varchar1;
811     return &uf_varchar2;
812   case FIELD_LAST:
813   default:
814     return 0;			/* This should never happen */
815   }
816 }
817 
818 	/* The different functions to unpack a field */
819 
uf_zerofill_skip_zero(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)820 static void uf_zerofill_skip_zero(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
821 				   uchar *to, uchar *end)
822 {
823   if (get_bit(bit_buff))
824     bzero((char*) to,(uint) (end-to));
825   else
826   {
827     end-=rec->space_length_bits;
828     decode_bytes(rec,bit_buff,to,end);
829     bzero((char*) end,rec->space_length_bits);
830   }
831 }
832 
uf_skip_zero(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)833 static void uf_skip_zero(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
834 			  uchar *end)
835 {
836   if (get_bit(bit_buff))
837     bzero((char*) to,(uint) (end-to));
838   else
839     decode_bytes(rec,bit_buff,to,end);
840 }
841 
uf_space_normal(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)842 static void uf_space_normal(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
843 			    uchar *end)
844 {
845   if (get_bit(bit_buff))
846     bfill((uchar*) to,(end-to),' ');
847   else
848     decode_bytes(rec,bit_buff,to,end);
849 }
850 
uf_space_endspace_selected(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)851 static void uf_space_endspace_selected(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
852 				       uchar *to, uchar *end)
853 {
854   uint spaces;
855   if (get_bit(bit_buff))
856     bfill((uchar*) to,(end-to),' ');
857   else
858   {
859     if (get_bit(bit_buff))
860     {
861       if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
862       {
863 	bit_buff->error=1;
864 	return;
865       }
866       if (to+spaces != end)
867 	decode_bytes(rec,bit_buff,to,end-spaces);
868       bfill((uchar*) end-spaces,spaces,' ');
869     }
870     else
871       decode_bytes(rec,bit_buff,to,end);
872   }
873 }
874 
uf_endspace_selected(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)875 static void uf_endspace_selected(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
876 				 uchar *to, uchar *end)
877 {
878   uint spaces;
879   if (get_bit(bit_buff))
880   {
881     if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
882     {
883       bit_buff->error=1;
884       return;
885     }
886     if (to+spaces != end)
887       decode_bytes(rec,bit_buff,to,end-spaces);
888     bfill((uchar*) end-spaces,spaces,' ');
889   }
890   else
891     decode_bytes(rec,bit_buff,to,end);
892 }
893 
uf_space_endspace(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)894 static void uf_space_endspace(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
895 			      uchar *end)
896 {
897   uint spaces;
898   if (get_bit(bit_buff))
899     bfill((uchar*) to,(end-to),' ');
900   else
901   {
902     if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
903     {
904       bit_buff->error=1;
905       return;
906     }
907     if (to+spaces != end)
908       decode_bytes(rec,bit_buff,to,end-spaces);
909     bfill((uchar*) end-spaces,spaces,' ');
910   }
911 }
912 
uf_endspace(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)913 static void uf_endspace(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
914 			uchar *end)
915 {
916   uint spaces;
917   if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
918   {
919     bit_buff->error=1;
920     return;
921   }
922   if (to+spaces != end)
923     decode_bytes(rec,bit_buff,to,end-spaces);
924   bfill((uchar*) end-spaces,spaces,' ');
925 }
926 
uf_space_prespace_selected(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)927 static void uf_space_prespace_selected(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
928 				       uchar *to, uchar *end)
929 {
930   uint spaces;
931   if (get_bit(bit_buff))
932     bfill((uchar*) to,(end-to),' ');
933   else
934   {
935     if (get_bit(bit_buff))
936     {
937       if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
938       {
939 	bit_buff->error=1;
940 	return;
941       }
942       bfill((uchar*) to,spaces,' ');
943       if (to+spaces != end)
944 	decode_bytes(rec,bit_buff,to+spaces,end);
945     }
946     else
947       decode_bytes(rec,bit_buff,to,end);
948   }
949 }
950 
951 
uf_prespace_selected(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)952 static void uf_prespace_selected(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
953 				 uchar *to, uchar *end)
954 {
955   uint spaces;
956   if (get_bit(bit_buff))
957   {
958     if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
959     {
960       bit_buff->error=1;
961       return;
962     }
963     bfill((uchar*) to,spaces,' ');
964     if (to+spaces != end)
965       decode_bytes(rec,bit_buff,to+spaces,end);
966   }
967   else
968     decode_bytes(rec,bit_buff,to,end);
969 }
970 
971 
uf_space_prespace(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)972 static void uf_space_prespace(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
973 			      uchar *end)
974 {
975   uint spaces;
976   if (get_bit(bit_buff))
977     bfill((uchar*) to,(end-to),' ');
978   else
979   {
980     if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
981     {
982       bit_buff->error=1;
983       return;
984     }
985     bfill((uchar*) to,spaces,' ');
986     if (to+spaces != end)
987       decode_bytes(rec,bit_buff,to+spaces,end);
988   }
989 }
990 
uf_prespace(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)991 static void uf_prespace(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
992 			uchar *end)
993 {
994   uint spaces;
995   if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
996   {
997     bit_buff->error=1;
998     return;
999   }
1000   bfill((uchar*) to,spaces,' ');
1001   if (to+spaces != end)
1002     decode_bytes(rec,bit_buff,to+spaces,end);
1003 }
1004 
uf_zerofill_normal(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1005 static void uf_zerofill_normal(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
1006 			       uchar *end)
1007 {
1008   end-=rec->space_length_bits;
1009   decode_bytes(rec,bit_buff,(uchar*) to,end);
1010   bzero((char*) end,rec->space_length_bits);
1011 }
1012 
uf_constant(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1013 static void uf_constant(MI_COLUMNDEF *rec,
1014 			MI_BIT_BUFF *bit_buff __attribute__((unused)),
1015 			uchar *to,
1016 			uchar *end)
1017 {
1018   memcpy(to,rec->huff_tree->intervalls,(size_t) (end-to));
1019 }
1020 
uf_intervall(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1021 static void uf_intervall(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
1022 			 uchar *end)
1023 {
1024   reg1 uint field_length=(uint) (end-to);
1025   memcpy(to,rec->huff_tree->intervalls+field_length*decode_pos(bit_buff,
1026 							       rec->huff_tree),
1027 	 (size_t) field_length);
1028 }
1029 
1030 
1031 /*ARGSUSED*/
uf_zero(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1032 static void uf_zero(MI_COLUMNDEF *rec __attribute__((unused)),
1033 		    MI_BIT_BUFF *bit_buff __attribute__((unused)),
1034 		    uchar *to, uchar *end)
1035 {
1036   bzero((char*) to,(uint) (end-to));
1037 }
1038 
uf_blob(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1039 static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
1040 		    uchar *to, uchar *end)
1041 {
1042   if (get_bit(bit_buff))
1043     bzero((uchar*) to,(end-to));
1044   else
1045   {
1046     ulong length=get_bits(bit_buff,rec->space_length_bits);
1047     uint pack_length=(uint) (end-to)-portable_sizeof_char_ptr;
1048     if (bit_buff->blob_pos+length > bit_buff->blob_end)
1049     {
1050       bit_buff->error=1;
1051       bzero((uchar*) to,(end-to));
1052       return;
1053     }
1054     decode_bytes(rec,bit_buff,bit_buff->blob_pos,bit_buff->blob_pos+length);
1055     _mi_store_blob_length((uchar*) to,pack_length,length);
1056     memcpy(to+pack_length, &bit_buff->blob_pos, sizeof(char*));
1057     bit_buff->blob_pos+=length;
1058   }
1059 }
1060 
1061 
uf_varchar1(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1062 static void uf_varchar1(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
1063 		       uchar *to, uchar *end __attribute__((unused)))
1064 {
1065   if (get_bit(bit_buff))
1066     to[0]= 0;				/* Zero lengths */
1067   else
1068   {
1069     ulong length=get_bits(bit_buff,rec->space_length_bits);
1070     *to= (uchar) length;
1071     decode_bytes(rec,bit_buff,to+1,to+1+length);
1072   }
1073 }
1074 
1075 
uf_varchar2(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1076 static void uf_varchar2(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
1077 		       uchar *to, uchar *end __attribute__((unused)))
1078 {
1079   if (get_bit(bit_buff))
1080     to[0]=to[1]=0;				/* Zero lengths */
1081   else
1082   {
1083     ulong length=get_bits(bit_buff,rec->space_length_bits);
1084     int2store(to,length);
1085     decode_bytes(rec,bit_buff,to+2,to+2+length);
1086   }
1087 }
1088 
1089 	/* Functions to decode of buffer of bits */
1090 
1091 #if BITS_SAVED == 64
1092 
decode_bytes(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1093 static void decode_bytes(MI_COLUMNDEF *rec,MI_BIT_BUFF *bit_buff,uchar *to,
1094 			 uchar *end)
1095 {
1096   reg1 uint bits,low_byte;
1097   reg3 uint16 *pos;
1098   reg4 uint table_bits,table_and;
1099   MI_DECODE_TREE *decode_tree;
1100 
1101   decode_tree=rec->decode_tree;
1102   bits=bit_buff->bits;			/* Save in reg for quicker access */
1103   table_bits=decode_tree->quick_table_bits;
1104   table_and= (1 << table_bits)-1;
1105 
1106   do
1107   {
1108     if (bits <= 32)
1109     {
1110       if (bit_buff->pos > bit_buff->end+4)
1111       {
1112 	bit_buff->error=1;
1113 	return;				/* Can't be right */
1114       }
1115       bit_buff->current_byte= (bit_buff->current_byte << 32) |
1116 	((((uint) bit_buff->pos[3])) |
1117 	 (((uint) bit_buff->pos[2]) << 8) |
1118 	 (((uint) bit_buff->pos[1]) << 16) |
1119 	 (((uint) bit_buff->pos[0]) << 24));
1120       bit_buff->pos+=4;
1121       bits+=32;
1122     }
1123     /*
1124       First use info in quick_table.
1125 
1126       The quick table is an array of 16-bit values. There exists one
1127       value for each possible code representable by table_bits bits.
1128       In most cases table_bits is 9. So there are 512 16-bit values.
1129 
1130       If the high-order bit (16) is set (IS_CHAR) then the array slot
1131       for this value is a valid Huffman code for a resulting byte value.
1132 
1133       The low-order 8 bits (1..8) are the resulting byte value.
1134 
1135       Bits 9..14 are the length of the Huffman code for this byte value.
1136       This means so many bits from the input stream were needed to
1137       represent this byte value. The remaining bits belong to later
1138       Huffman codes. This also means that for every Huffman code shorter
1139       than table_bits there are multiple entires in the array, which
1140       differ just in the unused bits.
1141 
1142       If the high-order bit (16) is clear (0) then the remaining bits are
1143       the position of the remaining Huffman decode tree segment behind the
1144       quick table.
1145     */
1146     low_byte=(uint) (bit_buff->current_byte >> (bits - table_bits)) & table_and;
1147     low_byte=decode_tree->table[low_byte];
1148     if (low_byte & IS_CHAR)
1149     {
1150       /*
1151         All Huffman codes of less or equal table_bits length are in the
1152         quick table. This is one of them.
1153       */
1154       *to++ = (low_byte & 255);		/* Found char in quick table */
1155       bits-=  ((low_byte >> 8) & 31);	/* Remove bits used */
1156     }
1157     else
1158     {					/* Map through rest of decode-table */
1159       /* This means that the Huffman code must be longer than table_bits. */
1160       pos=decode_tree->table+low_byte;
1161       bits-=table_bits;
1162       /* NOTE: decode_bytes_test_bit() is a macro which contains a break !!! */
1163       for (;;)
1164       {
1165 	low_byte=(uint) (bit_buff->current_byte >> (bits-8));
1166 	decode_bytes_test_bit(0);
1167 	decode_bytes_test_bit(1);
1168 	decode_bytes_test_bit(2);
1169 	decode_bytes_test_bit(3);
1170 	decode_bytes_test_bit(4);
1171 	decode_bytes_test_bit(5);
1172 	decode_bytes_test_bit(6);
1173 	decode_bytes_test_bit(7);
1174 	bits-=8;
1175       }
1176       *to++ = *pos;
1177     }
1178   } while (to != end);
1179 
1180   bit_buff->bits=bits;
1181   return;
1182 }
1183 
1184 #else
1185 
decode_bytes(MI_COLUMNDEF * rec,MI_BIT_BUFF * bit_buff,uchar * to,uchar * end)1186 static void decode_bytes(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, uchar *to,
1187 			 uchar *end)
1188 {
1189   reg1 uint bits,low_byte;
1190   reg3 uint16 *pos;
1191   reg4 uint table_bits,table_and;
1192   MI_DECODE_TREE *decode_tree;
1193 
1194   decode_tree=rec->huff_tree;
1195   bits=bit_buff->bits;			/* Save in reg for quicker access */
1196   table_bits=decode_tree->quick_table_bits;
1197   table_and= (1 << table_bits)-1;
1198 
1199   do
1200   {
1201     if (bits < table_bits)
1202     {
1203       if (bit_buff->pos > bit_buff->end+1)
1204       {
1205 	bit_buff->error=1;
1206 	return;				/* Can't be right */
1207       }
1208 #if BITS_SAVED == 32
1209       bit_buff->current_byte= (bit_buff->current_byte << 24) |
1210 	(((uint) ((uchar) bit_buff->pos[2]))) |
1211 	  (((uint) ((uchar) bit_buff->pos[1])) << 8) |
1212 	    (((uint) ((uchar) bit_buff->pos[0])) << 16);
1213       bit_buff->pos+=3;
1214       bits+=24;
1215 #else
1216       if (bits)				/* We must have at leasts 9 bits */
1217       {
1218 	bit_buff->current_byte=  (bit_buff->current_byte << 8) |
1219 	  (uint) ((uchar) bit_buff->pos[0]);
1220 	bit_buff->pos++;
1221 	bits+=8;
1222       }
1223       else
1224       {
1225 	bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) |
1226 	  ((uint) ((uchar) bit_buff->pos[1]));
1227 	bit_buff->pos+=2;
1228 	bits+=16;
1229       }
1230 #endif
1231     }
1232 	/* First use info in quick_table */
1233     low_byte=(bit_buff->current_byte >> (bits - table_bits)) & table_and;
1234     low_byte=decode_tree->table[low_byte];
1235     if (low_byte & IS_CHAR)
1236     {
1237       *to++ = (low_byte & 255);		/* Found char in quick table */
1238       bits-=  ((low_byte >> 8) & 31);	/* Remove bits used */
1239     }
1240     else
1241     {					/* Map through rest of decode-table */
1242       pos=decode_tree->table+low_byte;
1243       bits-=table_bits;
1244       for (;;)
1245       {
1246 	if (bits < 8)
1247 	{				/* We don't need to check end */
1248 #if BITS_SAVED == 32
1249 	  bit_buff->current_byte= (bit_buff->current_byte << 24) |
1250 	    (((uint) ((uchar) bit_buff->pos[2]))) |
1251 	      (((uint) ((uchar) bit_buff->pos[1])) << 8) |
1252 		(((uint) ((uchar) bit_buff->pos[0])) << 16);
1253 	  bit_buff->pos+=3;
1254 	  bits+=24;
1255 #else
1256 	  bit_buff->current_byte=  (bit_buff->current_byte << 8) |
1257 	    (uint) ((uchar) bit_buff->pos[0]);
1258 	  bit_buff->pos+=1;
1259 	  bits+=8;
1260 #endif
1261 	}
1262 	low_byte=(uint) (bit_buff->current_byte >> (bits-8));
1263 	decode_bytes_test_bit(0);
1264 	decode_bytes_test_bit(1);
1265 	decode_bytes_test_bit(2);
1266 	decode_bytes_test_bit(3);
1267 	decode_bytes_test_bit(4);
1268 	decode_bytes_test_bit(5);
1269 	decode_bytes_test_bit(6);
1270 	decode_bytes_test_bit(7);
1271 	bits-=8;
1272       }
1273       *to++ = (uchar) *pos;
1274     }
1275   } while (to != end);
1276 
1277   bit_buff->bits=bits;
1278   return;
1279 }
1280 #endif /* BIT_SAVED == 64 */
1281 
1282 
decode_pos(MI_BIT_BUFF * bit_buff,MI_DECODE_TREE * decode_tree)1283 static uint decode_pos(MI_BIT_BUFF *bit_buff, MI_DECODE_TREE *decode_tree)
1284 {
1285   uint16 *pos=decode_tree->table;
1286   for (;;)
1287   {
1288     if (get_bit(bit_buff))
1289       pos++;
1290     if (*pos & IS_CHAR)
1291       return (uint) (*pos & ~IS_CHAR);
1292     pos+= *pos;
1293   }
1294 }
1295 
1296 
_mi_read_rnd_pack_record(MI_INFO * info,uchar * buf,register my_off_t filepos,my_bool skip_deleted_blocks)1297 int _mi_read_rnd_pack_record(MI_INFO *info, uchar *buf,
1298 			     register my_off_t filepos,
1299 			     my_bool skip_deleted_blocks)
1300 {
1301   uint b_type;
1302   MI_BLOCK_INFO block_info;
1303   MYISAM_SHARE *share=info->s;
1304   DBUG_ENTER("_mi_read_rnd_pack_record");
1305 
1306   if (filepos >= info->state->data_file_length)
1307   {
1308     my_errno= HA_ERR_END_OF_FILE;
1309     goto err;
1310   }
1311 
1312   if (info->opt_flag & READ_CACHE_USED)
1313   {
1314     if (_mi_read_cache(&info->rec_cache, (uchar*) block_info.header,
1315                        filepos, share->pack.ref_length,
1316                        skip_deleted_blocks ? READING_NEXT : 0))
1317       goto err;
1318     b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
1319                                    &info->rec_buff, -1, filepos);
1320   }
1321   else
1322     b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info,
1323                                    &info->rec_buff, info->dfile, filepos);
1324   if (b_type)
1325     goto err;					/* Error code is already set */
1326 #ifndef DBUG_OFF
1327   if (block_info.rec_len > share->max_pack_length)
1328   {
1329     my_errno=HA_ERR_WRONG_IN_RECORD;
1330     goto err;
1331   }
1332 #endif
1333 
1334   if (info->opt_flag & READ_CACHE_USED)
1335   {
1336     if (_mi_read_cache(&info->rec_cache, (uchar*) info->rec_buff,
1337                        block_info.filepos, block_info.rec_len,
1338                        skip_deleted_blocks ? READING_NEXT : 0))
1339       goto err;
1340   }
1341   else
1342   {
1343     if (mysql_file_read(info->dfile,
1344                         (uchar*) info->rec_buff + block_info.offset,
1345                         block_info.rec_len-block_info.offset, MYF(MY_NABP)))
1346       goto err;
1347   }
1348   info->packed_length=block_info.rec_len;
1349   info->lastpos=filepos;
1350   info->nextpos=block_info.filepos+block_info.rec_len;
1351   info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
1352 
1353   DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf,
1354                                    info->rec_buff, block_info.rec_len));
1355  err:
1356   DBUG_RETURN(my_errno);
1357 }
1358 
1359 
1360 	/* Read and process header from a huff-record-file */
1361 
_mi_pack_get_block_info(MI_INFO * myisam,MI_BIT_BUFF * bit_buff,MI_BLOCK_INFO * info,uchar ** rec_buff_p,File file,my_off_t filepos)1362 uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
1363                              MI_BLOCK_INFO *info, uchar **rec_buff_p,
1364                              File file, my_off_t filepos)
1365 {
1366   uchar *header=info->header;
1367   uint head_length, UNINIT_VAR(ref_length);
1368 
1369   if (file >= 0)
1370   {
1371     ref_length=myisam->s->pack.ref_length;
1372     /*
1373       We can't use mysql_file_pread() here because mi_read_rnd_pack_record assumes
1374       position is ok
1375     */
1376     mysql_file_seek(file, filepos, MY_SEEK_SET, MYF(0));
1377     if (mysql_file_read(file, header, ref_length, MYF(MY_NABP)))
1378       return BLOCK_FATAL_ERROR;
1379     DBUG_DUMP("header",(uchar*) header,ref_length);
1380   }
1381   head_length= read_pack_length((uint) myisam->s->pack.version, header,
1382                                 &info->rec_len);
1383   if (myisam->s->base.blobs)
1384   {
1385     head_length+= read_pack_length((uint) myisam->s->pack.version,
1386                                    header + head_length, &info->blob_len);
1387     /*
1388       Ensure that the record buffer is big enough for the compressed
1389       record plus all expanded blobs. [We do not have an extra buffer
1390       for the resulting blobs. Sigh.]
1391     */
1392     if (!(mi_alloc_rec_buff(myisam,info->rec_len + info->blob_len,
1393 			    rec_buff_p)))
1394       return BLOCK_FATAL_ERROR;			/* not enough memory */
1395     bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len;
1396     bit_buff->blob_end= bit_buff->blob_pos + info->blob_len;
1397     myisam->blob_length=info->blob_len;
1398   }
1399   info->filepos=filepos+head_length;
1400   if (file > 0)
1401   {
1402     info->offset=MY_MIN(info->rec_len, ref_length - head_length);
1403     memcpy(*rec_buff_p, header + head_length, info->offset);
1404   }
1405   return 0;
1406 }
1407 
1408 
1409 	/* rutines for bit buffer */
1410 	/* Note buffer must be 6 byte bigger than longest row */
1411 
init_bit_buffer(MI_BIT_BUFF * bit_buff,uchar * buffer,uint length)1412 static void init_bit_buffer(MI_BIT_BUFF *bit_buff, uchar *buffer, uint length)
1413 {
1414   bit_buff->pos=buffer;
1415   bit_buff->end=buffer+length;
1416   bit_buff->bits=bit_buff->error=0;
1417   bit_buff->current_byte=0;			/* Avoid purify errors */
1418 }
1419 
fill_and_get_bits(MI_BIT_BUFF * bit_buff,uint count)1420 static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff, uint count)
1421 {
1422   uint tmp;
1423   count-=bit_buff->bits;
1424   tmp=(bit_buff->current_byte & mask[bit_buff->bits]) << count;
1425   fill_buffer(bit_buff);
1426   bit_buff->bits=BITS_SAVED - count;
1427   return tmp+(bit_buff->current_byte >> (BITS_SAVED - count));
1428 }
1429 
1430 	/* Fill in empty bit_buff->current_byte from buffer */
1431 	/* Sets bit_buff->error if buffer is exhausted */
1432 
fill_buffer(MI_BIT_BUFF * bit_buff)1433 static void fill_buffer(MI_BIT_BUFF *bit_buff)
1434 {
1435   if (bit_buff->pos >= bit_buff->end)
1436   {
1437     bit_buff->error= 1;
1438     bit_buff->current_byte=0;
1439     return;
1440   }
1441 
1442 #if BITS_SAVED == 64
1443   bit_buff->current_byte=  ((((uint) ((uchar) bit_buff->pos[7]))) |
1444 			     (((uint) ((uchar) bit_buff->pos[6])) << 8) |
1445 			     (((uint) ((uchar) bit_buff->pos[5])) << 16) |
1446 			     (((uint) ((uchar) bit_buff->pos[4])) << 24) |
1447 			     ((ulonglong)
1448 			      ((((uint) ((uchar) bit_buff->pos[3]))) |
1449 			       (((uint) ((uchar) bit_buff->pos[2])) << 8) |
1450 			       (((uint) ((uchar) bit_buff->pos[1])) << 16) |
1451 			       (((uint) ((uchar) bit_buff->pos[0])) << 24)) << 32));
1452   bit_buff->pos+=8;
1453 #else
1454 #if BITS_SAVED == 32
1455   bit_buff->current_byte=  (((uint) ((uchar) bit_buff->pos[3])) |
1456 			     (((uint) ((uchar) bit_buff->pos[2])) << 8) |
1457 			     (((uint) ((uchar) bit_buff->pos[1])) << 16) |
1458 			     (((uint) ((uchar) bit_buff->pos[0])) << 24));
1459   bit_buff->pos+=4;
1460 #else
1461   bit_buff->current_byte=  (uint) (((uint) ((uchar) bit_buff->pos[1])) |
1462 				    (((uint) ((uchar) bit_buff->pos[0])) << 8));
1463   bit_buff->pos+=2;
1464 #endif
1465 #endif
1466 }
1467 
1468 	/* Get number of bits neaded to represent value */
1469 
max_bit(register uint value)1470 static uint max_bit(register uint value)
1471 {
1472   reg2 uint power=1;
1473 
1474   while ((value>>=1))
1475     power++;
1476   return (power);
1477 }
1478 
1479 
1480 /*****************************************************************************
1481 	Some redefined functions to handle files when we are using memmap
1482 *****************************************************************************/
1483 
1484 #ifdef HAVE_MMAP
1485 
1486 static int _mi_read_mempack_record(MI_INFO *info,my_off_t filepos,uchar *buf);
1487 static int _mi_read_rnd_mempack_record(MI_INFO*, uchar *,my_off_t, my_bool);
1488 
_mi_memmap_file(MI_INFO * info)1489 my_bool _mi_memmap_file(MI_INFO *info)
1490 {
1491   MYISAM_SHARE *share=info->s;
1492   my_bool eom;
1493 
1494   DBUG_ENTER("mi_memmap_file");
1495 
1496   if (!info->s->file_map)
1497   {
1498     my_off_t data_file_length= share->state.state.data_file_length;
1499 
1500     if (myisam_mmap_size != SIZE_T_MAX)
1501     {
1502       mysql_mutex_lock(&THR_LOCK_myisam_mmap);
1503       eom= data_file_length > myisam_mmap_size - myisam_mmap_used - MEMMAP_EXTRA_MARGIN;
1504       if (!eom)
1505         myisam_mmap_used+= data_file_length + MEMMAP_EXTRA_MARGIN;
1506       mysql_mutex_unlock(&THR_LOCK_myisam_mmap);
1507     }
1508     else
1509       eom= data_file_length > myisam_mmap_size - MEMMAP_EXTRA_MARGIN;
1510 
1511     if (eom)
1512     {
1513       DBUG_PRINT("warning", ("File is too large for mmap"));
1514       DBUG_RETURN(0);
1515     }
1516     if (mysql_file_seek(info->dfile, 0L, MY_SEEK_END, MYF(0)) <
1517         share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN)
1518     {
1519       DBUG_PRINT("warning",("File isn't extended for memmap"));
1520       if (myisam_mmap_size != SIZE_T_MAX)
1521       {
1522         mysql_mutex_lock(&THR_LOCK_myisam_mmap);
1523         myisam_mmap_used-= data_file_length + MEMMAP_EXTRA_MARGIN;
1524         mysql_mutex_unlock(&THR_LOCK_myisam_mmap);
1525       }
1526       DBUG_RETURN(0);
1527     }
1528     if (mi_dynmap_file(info,
1529                        share->state.state.data_file_length +
1530                          MEMMAP_EXTRA_MARGIN))
1531     {
1532       if (myisam_mmap_size != SIZE_T_MAX)
1533       {
1534         mysql_mutex_lock(&THR_LOCK_myisam_mmap);
1535         myisam_mmap_used-= data_file_length + MEMMAP_EXTRA_MARGIN;
1536         mysql_mutex_unlock(&THR_LOCK_myisam_mmap);
1537       }
1538       DBUG_RETURN(0);
1539     }
1540   }
1541   info->opt_flag|= MEMMAP_USED;
1542   info->read_record= share->read_record= _mi_read_mempack_record;
1543   share->read_rnd= _mi_read_rnd_mempack_record;
1544   DBUG_RETURN(1);
1545 }
1546 
1547 
_mi_unmap_file(MI_INFO * info)1548 void _mi_unmap_file(MI_INFO *info)
1549 {
1550   DBUG_ASSERT(info->s->options & HA_OPTION_COMPRESS_RECORD);
1551 
1552   (void) my_munmap((char*) info->s->file_map, info->s->mmaped_length);
1553 
1554   if (myisam_mmap_size != SIZE_T_MAX)
1555   {
1556     mysql_mutex_lock(&THR_LOCK_myisam_mmap);
1557     myisam_mmap_used-= info->s->mmaped_length;
1558     mysql_mutex_unlock(&THR_LOCK_myisam_mmap);
1559   }
1560 }
1561 
1562 
_mi_mempack_get_block_info(MI_INFO * myisam,MI_BIT_BUFF * bit_buff,MI_BLOCK_INFO * info,uchar ** rec_buff_p,uchar * header)1563 static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
1564                                          MI_BLOCK_INFO *info, uchar **rec_buff_p,
1565 					 uchar *header)
1566 {
1567   header+= read_pack_length((uint) myisam->s->pack.version, header,
1568                             &info->rec_len);
1569   if (myisam->s->base.blobs)
1570   {
1571     header+= read_pack_length((uint) myisam->s->pack.version, header,
1572                               &info->blob_len);
1573     /* mi_alloc_rec_buff sets my_errno on error */
1574     if (!(mi_alloc_rec_buff(myisam, info->blob_len,
1575 			    rec_buff_p)))
1576       return 0;				/* not enough memory */
1577     bit_buff->blob_pos= (uchar*) *rec_buff_p;
1578     bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len;
1579   }
1580   return header;
1581 }
1582 
1583 
_mi_read_mempack_record(MI_INFO * info,my_off_t filepos,uchar * buf)1584 static int _mi_read_mempack_record(MI_INFO *info, my_off_t filepos, uchar *buf)
1585 {
1586   MI_BLOCK_INFO block_info;
1587   MYISAM_SHARE *share=info->s;
1588   uchar *pos;
1589   DBUG_ENTER("mi_read_mempack_record");
1590 
1591   if (filepos == HA_OFFSET_ERROR)
1592     DBUG_RETURN(-1);			/* _search() didn't find record */
1593 
1594   if (!(pos= (uchar*) _mi_mempack_get_block_info(info, &info->bit_buff,
1595                                                 &block_info, &info->rec_buff,
1596 						(uchar*) share->file_map+
1597 						filepos)))
1598     DBUG_RETURN(-1);
1599   DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf,
1600                                   pos, block_info.rec_len));
1601 }
1602 
1603 
1604 /*ARGSUSED*/
_mi_read_rnd_mempack_record(MI_INFO * info,uchar * buf,register my_off_t filepos,my_bool skip_deleted_blocks)1605 static int _mi_read_rnd_mempack_record(MI_INFO *info, uchar *buf,
1606 				       register my_off_t filepos,
1607 				       my_bool skip_deleted_blocks
1608 				       __attribute__((unused)))
1609 {
1610   MI_BLOCK_INFO block_info;
1611   MYISAM_SHARE *share=info->s;
1612   uchar *pos,*start;
1613   DBUG_ENTER("_mi_read_rnd_mempack_record");
1614 
1615   if (filepos >= share->state.state.data_file_length)
1616   {
1617     my_errno=HA_ERR_END_OF_FILE;
1618     goto err;
1619   }
1620   if (!(pos= (uchar*) _mi_mempack_get_block_info(info, &info->bit_buff,
1621                                                 &block_info, &info->rec_buff,
1622 						(uchar*)
1623 						(start=share->file_map+
1624 						 filepos))))
1625     goto err;
1626 #ifndef DBUG_OFF
1627   if (block_info.rec_len > info->s->max_pack_length)
1628   {
1629     my_errno=HA_ERR_WRONG_IN_RECORD;
1630     goto err;
1631   }
1632 #endif
1633   info->packed_length=block_info.rec_len;
1634   info->lastpos=filepos;
1635   info->nextpos=filepos+(uint) (pos-start)+block_info.rec_len;
1636   info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
1637 
1638   DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf,
1639                                    pos, block_info.rec_len));
1640  err:
1641   DBUG_RETURN(my_errno);
1642 }
1643 
1644 #endif /* HAVE_MMAP */
1645 
1646 	/* Save length of row */
1647 
save_pack_length(uint version,uchar * block_buff,ulong length)1648 uint save_pack_length(uint version, uchar *block_buff, ulong length)
1649 {
1650   if (length < 254)
1651   {
1652     *(uchar*) block_buff= (uchar) length;
1653     return 1;
1654   }
1655   if (length <= 65535)
1656   {
1657     *(uchar*) block_buff=254;
1658     int2store(block_buff+1,(uint) length);
1659     return 3;
1660   }
1661   *(uchar*) block_buff=255;
1662   if (version == 1) /* old format */
1663   {
1664     DBUG_ASSERT(length <= 0xFFFFFF);
1665     int3store(block_buff + 1, (ulong) length);
1666     return 4;
1667   }
1668   else
1669   {
1670     int4store(block_buff + 1, (ulong) length);
1671     return 5;
1672   }
1673 }
1674 
1675 
read_pack_length(uint version,const uchar * buf,ulong * length)1676 static uint read_pack_length(uint version, const uchar *buf, ulong *length)
1677 {
1678   if (buf[0] < 254)
1679   {
1680     *length= buf[0];
1681     return 1;
1682   }
1683   else if (buf[0] == 254)
1684   {
1685     *length= uint2korr(buf + 1);
1686     return 3;
1687   }
1688   if (version == 1) /* old format */
1689   {
1690     *length= uint3korr(buf + 1);
1691     return 4;
1692   }
1693   else
1694   {
1695     *length= uint4korr(buf + 1);
1696     return 5;
1697   }
1698 }
1699 
1700 
calc_pack_length(uint version,ulong length)1701 uint calc_pack_length(uint version, ulong length)
1702 {
1703   return (length < 254) ? 1 : (length < 65536) ? 3 : (version == 1) ? 4 : 5;
1704 }
1705