1 /*-------------------------------------------------------------------------
2 *
3 * toast_compression.c
4 * Functions for toast compression.
5 *
6 * Copyright (c) 2021, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/access/common/toast_compression.c
11 *
12 *-------------------------------------------------------------------------
13 */
14 #include "postgres.h"
15
16 #ifdef USE_LZ4
17 #include <lz4.h>
18 #endif
19
20 #include "access/detoast.h"
21 #include "access/toast_compression.h"
22 #include "common/pg_lzcompress.h"
23 #include "fmgr.h"
24 #include "utils/builtins.h"
25
26 /* GUC */
27 int default_toast_compression = TOAST_PGLZ_COMPRESSION;
28
29 #define NO_LZ4_SUPPORT() \
30 ereport(ERROR, \
31 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
32 errmsg("compression method lz4 not supported"), \
33 errdetail("This functionality requires the server to be built with lz4 support."), \
34 errhint("You need to rebuild PostgreSQL using %s.", "--with-lz4")))
35
36 /*
37 * Compress a varlena using PGLZ.
38 *
39 * Returns the compressed varlena, or NULL if compression fails.
40 */
41 struct varlena *
pglz_compress_datum(const struct varlena * value)42 pglz_compress_datum(const struct varlena *value)
43 {
44 int32 valsize,
45 len;
46 struct varlena *tmp = NULL;
47
48 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
49
50 /*
51 * No point in wasting a palloc cycle if value size is outside the allowed
52 * range for compression.
53 */
54 if (valsize < PGLZ_strategy_default->min_input_size ||
55 valsize > PGLZ_strategy_default->max_input_size)
56 return NULL;
57
58 /*
59 * Figure out the maximum possible size of the pglz output, add the bytes
60 * that will be needed for varlena overhead, and allocate that amount.
61 */
62 tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
63 VARHDRSZ_COMPRESSED);
64
65 len = pglz_compress(VARDATA_ANY(value),
66 valsize,
67 (char *) tmp + VARHDRSZ_COMPRESSED,
68 NULL);
69 if (len < 0)
70 {
71 pfree(tmp);
72 return NULL;
73 }
74
75 SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED);
76
77 return tmp;
78 }
79
80 /*
81 * Decompress a varlena that was compressed using PGLZ.
82 */
83 struct varlena *
pglz_decompress_datum(const struct varlena * value)84 pglz_decompress_datum(const struct varlena *value)
85 {
86 struct varlena *result;
87 int32 rawsize;
88
89 /* allocate memory for the uncompressed data */
90 result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
91
92 /* decompress the data */
93 rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
94 VARSIZE(value) - VARHDRSZ_COMPRESSED,
95 VARDATA(result),
96 VARDATA_COMPRESSED_GET_EXTSIZE(value), true);
97 if (rawsize < 0)
98 ereport(ERROR,
99 (errcode(ERRCODE_DATA_CORRUPTED),
100 errmsg_internal("compressed pglz data is corrupt")));
101
102 SET_VARSIZE(result, rawsize + VARHDRSZ);
103
104 return result;
105 }
106
107 /*
108 * Decompress part of a varlena that was compressed using PGLZ.
109 */
110 struct varlena *
pglz_decompress_datum_slice(const struct varlena * value,int32 slicelength)111 pglz_decompress_datum_slice(const struct varlena *value,
112 int32 slicelength)
113 {
114 struct varlena *result;
115 int32 rawsize;
116
117 /* allocate memory for the uncompressed data */
118 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
119
120 /* decompress the data */
121 rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
122 VARSIZE(value) - VARHDRSZ_COMPRESSED,
123 VARDATA(result),
124 slicelength, false);
125 if (rawsize < 0)
126 ereport(ERROR,
127 (errcode(ERRCODE_DATA_CORRUPTED),
128 errmsg_internal("compressed pglz data is corrupt")));
129
130 SET_VARSIZE(result, rawsize + VARHDRSZ);
131
132 return result;
133 }
134
135 /*
136 * Compress a varlena using LZ4.
137 *
138 * Returns the compressed varlena, or NULL if compression fails.
139 */
140 struct varlena *
lz4_compress_datum(const struct varlena * value)141 lz4_compress_datum(const struct varlena *value)
142 {
143 #ifndef USE_LZ4
144 NO_LZ4_SUPPORT();
145 return NULL; /* keep compiler quiet */
146 #else
147 int32 valsize;
148 int32 len;
149 int32 max_size;
150 struct varlena *tmp = NULL;
151
152 valsize = VARSIZE_ANY_EXHDR(value);
153
154 /*
155 * Figure out the maximum possible size of the LZ4 output, add the bytes
156 * that will be needed for varlena overhead, and allocate that amount.
157 */
158 max_size = LZ4_compressBound(valsize);
159 tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED);
160
161 len = LZ4_compress_default(VARDATA_ANY(value),
162 (char *) tmp + VARHDRSZ_COMPRESSED,
163 valsize, max_size);
164 if (len <= 0)
165 elog(ERROR, "lz4 compression failed");
166
167 /* data is incompressible so just free the memory and return NULL */
168 if (len > valsize)
169 {
170 pfree(tmp);
171 return NULL;
172 }
173
174 SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED);
175
176 return tmp;
177 #endif
178 }
179
180 /*
181 * Decompress a varlena that was compressed using LZ4.
182 */
183 struct varlena *
lz4_decompress_datum(const struct varlena * value)184 lz4_decompress_datum(const struct varlena *value)
185 {
186 #ifndef USE_LZ4
187 NO_LZ4_SUPPORT();
188 return NULL; /* keep compiler quiet */
189 #else
190 int32 rawsize;
191 struct varlena *result;
192
193 /* allocate memory for the uncompressed data */
194 result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
195
196 /* decompress the data */
197 rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED,
198 VARDATA(result),
199 VARSIZE(value) - VARHDRSZ_COMPRESSED,
200 VARDATA_COMPRESSED_GET_EXTSIZE(value));
201 if (rawsize < 0)
202 ereport(ERROR,
203 (errcode(ERRCODE_DATA_CORRUPTED),
204 errmsg_internal("compressed lz4 data is corrupt")));
205
206
207 SET_VARSIZE(result, rawsize + VARHDRSZ);
208
209 return result;
210 #endif
211 }
212
213 /*
214 * Decompress part of a varlena that was compressed using LZ4.
215 */
216 struct varlena *
lz4_decompress_datum_slice(const struct varlena * value,int32 slicelength)217 lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
218 {
219 #ifndef USE_LZ4
220 NO_LZ4_SUPPORT();
221 return NULL; /* keep compiler quiet */
222 #else
223 int32 rawsize;
224 struct varlena *result;
225
226 /* slice decompression not supported prior to 1.8.3 */
227 if (LZ4_versionNumber() < 10803)
228 return lz4_decompress_datum(value);
229
230 /* allocate memory for the uncompressed data */
231 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
232
233 /* decompress the data */
234 rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESSED,
235 VARDATA(result),
236 VARSIZE(value) - VARHDRSZ_COMPRESSED,
237 slicelength,
238 slicelength);
239 if (rawsize < 0)
240 ereport(ERROR,
241 (errcode(ERRCODE_DATA_CORRUPTED),
242 errmsg_internal("compressed lz4 data is corrupt")));
243
244 SET_VARSIZE(result, rawsize + VARHDRSZ);
245
246 return result;
247 #endif
248 }
249
250 /*
251 * Extract compression ID from a varlena.
252 *
253 * Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed.
254 */
255 ToastCompressionId
toast_get_compression_id(struct varlena * attr)256 toast_get_compression_id(struct varlena *attr)
257 {
258 ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
259
260 /*
261 * If it is stored externally then fetch the compression method id from
262 * the external toast pointer. If compressed inline, fetch it from the
263 * toast compression header.
264 */
265 if (VARATT_IS_EXTERNAL_ONDISK(attr))
266 {
267 struct varatt_external toast_pointer;
268
269 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
270
271 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
272 cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
273 }
274 else if (VARATT_IS_COMPRESSED(attr))
275 cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr);
276
277 return cmid;
278 }
279
280 /*
281 * CompressionNameToMethod - Get compression method from compression name
282 *
283 * Search in the available built-in methods. If the compression not found
284 * in the built-in methods then return InvalidCompressionMethod.
285 */
286 char
CompressionNameToMethod(const char * compression)287 CompressionNameToMethod(const char *compression)
288 {
289 if (strcmp(compression, "pglz") == 0)
290 return TOAST_PGLZ_COMPRESSION;
291 else if (strcmp(compression, "lz4") == 0)
292 {
293 #ifndef USE_LZ4
294 NO_LZ4_SUPPORT();
295 #endif
296 return TOAST_LZ4_COMPRESSION;
297 }
298
299 return InvalidCompressionMethod;
300 }
301
302 /*
303 * GetCompressionMethodName - Get compression method name
304 */
305 const char *
GetCompressionMethodName(char method)306 GetCompressionMethodName(char method)
307 {
308 switch (method)
309 {
310 case TOAST_PGLZ_COMPRESSION:
311 return "pglz";
312 case TOAST_LZ4_COMPRESSION:
313 return "lz4";
314 default:
315 elog(ERROR, "invalid compression method %c", method);
316 return NULL; /* keep compiler quiet */
317 }
318 }
319