1 /**
2 
3 Copyright (c) 2021, MariaDB Corporation.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
16 **/
17 /**
18 @file include/fts0vlc.h
19 Full text variable length integer encoding/decoding.
20 
21 Created 2021-10-19 Thirunarayanan Balathandayuthapani
22 **/
23 
24 /** Return length of val if it were encoded using our VLC scheme.
25 @param	val	value to encode
26 @return length of value encoded, in bytes */
fts_get_encoded_len(doc_id_t val)27 inline size_t fts_get_encoded_len(doc_id_t val)
28 {
29   if (val < static_cast<doc_id_t>(1) << 7)
30     return 1;
31   if (val < static_cast<doc_id_t>(1) << 14)
32     return 2;
33   if (val < static_cast<doc_id_t>(1) << 21)
34     return 3;
35   if (val < static_cast<doc_id_t>(1) << 28)
36     return 4;
37   if (val < static_cast<doc_id_t>(1) << 35)
38     return 5;
39   if (val < static_cast<doc_id_t>(1) << 42)
40     return 6;
41   if (val < static_cast<doc_id_t>(1) << 49)
42     return 7;
43   if (val < static_cast<doc_id_t>(1) << 56)
44     return 8;
45   if (val < static_cast<doc_id_t>(1) << 63)
46     return 9;
47   return 10;
48 }
49 
50 /** Encode an integer using our VLC scheme and return the
51 length in bytes.
52 @param	val	value to encode
53 @param	buf	buffer, must have enough space
54 @return length of value encoded, in bytes */
fts_encode_int(doc_id_t val,byte * buf)55 inline byte *fts_encode_int(doc_id_t val, byte *buf)
56 {
57   if (val < static_cast<doc_id_t>(1) << 7)
58     goto add_1;
59   if (val < static_cast<doc_id_t>(1) << 14)
60     goto add_2;
61   if (val < static_cast<doc_id_t>(1) << 21)
62     goto add_3;
63   if (val < static_cast<doc_id_t>(1) << 28)
64     goto add_4;
65   if (val < static_cast<doc_id_t>(1) << 35)
66     goto add_5;
67   if (val < static_cast<doc_id_t>(1) << 42)
68     goto add_6;
69   if (val < static_cast<doc_id_t>(1) << 49)
70     goto add_7;
71   if (val < static_cast<doc_id_t>(1) << 56)
72     goto add_8;
73   if (val < static_cast<doc_id_t>(1) << 63)
74     goto add_9;
75 
76   *buf++= static_cast<byte>(val >> 63);
77 add_9:
78   *buf++= static_cast<byte>(val >> 56) & 0x7F;
79 add_8:
80   *buf++= static_cast<byte>(val >> 49) & 0x7F;
81 add_7:
82   *buf++= static_cast<byte>(val >> 42) & 0x7F;
83 add_6:
84   *buf++= static_cast<byte>(val >> 35) & 0x7F;
85 add_5:
86   *buf++= static_cast<byte>(val >> 28) & 0x7F;
87 add_4:
88   *buf++= static_cast<byte>(val >> 21) & 0x7F;
89 add_3:
90   *buf++= static_cast<byte>(val >> 14) & 0x7F;
91 add_2:
92   *buf++= static_cast<byte>(val >> 7) & 0x7F;
93 add_1:
94   *buf++= static_cast<byte>(val) | 0x80;
95   return buf;
96 }
97 
98 /** Decode and return the integer that was encoded using
99 our VLC scheme.
100 @param	ptr 	pointer to decode from, this ptr is
101 		incremented by the number of bytes decoded
102 @return value decoded */
fts_decode_vlc(const byte ** ptr)103 inline doc_id_t fts_decode_vlc(const byte **ptr)
104 {
105   ut_d(const byte *const start= *ptr);
106   ut_ad(*start);
107 
108   doc_id_t val= 0;
109   for (;;)
110   {
111     byte b= *(*ptr)++;
112     val|= (b & 0x7F);
113 
114     /* High-bit on means "last byte in the encoded integer". */
115     if (b & 0x80)
116       break;
117     ut_ad(val < static_cast<doc_id_t>(1) << (64 - 7));
118     val <<= 7;
119   }
120 
121   ut_ad(*ptr - start <= 10);
122 
123   return(val);
124 }
125