1 /*-------------------------------------------------------------------------
2  *
3  * toast_compression.c
4  *	  Functions for toast compression.
5  *
6  * Copyright (c) 2021, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/access/common/toast_compression.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #ifdef USE_LZ4
17 #include <lz4.h>
18 #endif
19 
20 #include "access/detoast.h"
21 #include "access/toast_compression.h"
22 #include "common/pg_lzcompress.h"
23 #include "fmgr.h"
24 #include "utils/builtins.h"
25 
26 /* GUC */
27 int			default_toast_compression = TOAST_PGLZ_COMPRESSION;
28 
29 #define NO_LZ4_SUPPORT() \
30 	ereport(ERROR, \
31 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
32 			 errmsg("compression method lz4 not supported"), \
33 			 errdetail("This functionality requires the server to be built with lz4 support."), \
34 			 errhint("You need to rebuild PostgreSQL using %s.", "--with-lz4")))
35 
36 /*
37  * Compress a varlena using PGLZ.
38  *
39  * Returns the compressed varlena, or NULL if compression fails.
40  */
41 struct varlena *
pglz_compress_datum(const struct varlena * value)42 pglz_compress_datum(const struct varlena *value)
43 {
44 	int32		valsize,
45 				len;
46 	struct varlena *tmp = NULL;
47 
48 	valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
49 
50 	/*
51 	 * No point in wasting a palloc cycle if value size is outside the allowed
52 	 * range for compression.
53 	 */
54 	if (valsize < PGLZ_strategy_default->min_input_size ||
55 		valsize > PGLZ_strategy_default->max_input_size)
56 		return NULL;
57 
58 	/*
59 	 * Figure out the maximum possible size of the pglz output, add the bytes
60 	 * that will be needed for varlena overhead, and allocate that amount.
61 	 */
62 	tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
63 									VARHDRSZ_COMPRESSED);
64 
65 	len = pglz_compress(VARDATA_ANY(value),
66 						valsize,
67 						(char *) tmp + VARHDRSZ_COMPRESSED,
68 						NULL);
69 	if (len < 0)
70 	{
71 		pfree(tmp);
72 		return NULL;
73 	}
74 
75 	SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED);
76 
77 	return tmp;
78 }
79 
80 /*
81  * Decompress a varlena that was compressed using PGLZ.
82  */
83 struct varlena *
pglz_decompress_datum(const struct varlena * value)84 pglz_decompress_datum(const struct varlena *value)
85 {
86 	struct varlena *result;
87 	int32		rawsize;
88 
89 	/* allocate memory for the uncompressed data */
90 	result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
91 
92 	/* decompress the data */
93 	rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
94 							  VARSIZE(value) - VARHDRSZ_COMPRESSED,
95 							  VARDATA(result),
96 							  VARDATA_COMPRESSED_GET_EXTSIZE(value), true);
97 	if (rawsize < 0)
98 		ereport(ERROR,
99 				(errcode(ERRCODE_DATA_CORRUPTED),
100 				 errmsg_internal("compressed pglz data is corrupt")));
101 
102 	SET_VARSIZE(result, rawsize + VARHDRSZ);
103 
104 	return result;
105 }
106 
107 /*
108  * Decompress part of a varlena that was compressed using PGLZ.
109  */
110 struct varlena *
pglz_decompress_datum_slice(const struct varlena * value,int32 slicelength)111 pglz_decompress_datum_slice(const struct varlena *value,
112 							int32 slicelength)
113 {
114 	struct varlena *result;
115 	int32		rawsize;
116 
117 	/* allocate memory for the uncompressed data */
118 	result = (struct varlena *) palloc(slicelength + VARHDRSZ);
119 
120 	/* decompress the data */
121 	rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
122 							  VARSIZE(value) - VARHDRSZ_COMPRESSED,
123 							  VARDATA(result),
124 							  slicelength, false);
125 	if (rawsize < 0)
126 		ereport(ERROR,
127 				(errcode(ERRCODE_DATA_CORRUPTED),
128 				 errmsg_internal("compressed pglz data is corrupt")));
129 
130 	SET_VARSIZE(result, rawsize + VARHDRSZ);
131 
132 	return result;
133 }
134 
135 /*
136  * Compress a varlena using LZ4.
137  *
138  * Returns the compressed varlena, or NULL if compression fails.
139  */
140 struct varlena *
lz4_compress_datum(const struct varlena * value)141 lz4_compress_datum(const struct varlena *value)
142 {
143 #ifndef USE_LZ4
144 	NO_LZ4_SUPPORT();
145 	return NULL;				/* keep compiler quiet */
146 #else
147 	int32		valsize;
148 	int32		len;
149 	int32		max_size;
150 	struct varlena *tmp = NULL;
151 
152 	valsize = VARSIZE_ANY_EXHDR(value);
153 
154 	/*
155 	 * Figure out the maximum possible size of the LZ4 output, add the bytes
156 	 * that will be needed for varlena overhead, and allocate that amount.
157 	 */
158 	max_size = LZ4_compressBound(valsize);
159 	tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED);
160 
161 	len = LZ4_compress_default(VARDATA_ANY(value),
162 							   (char *) tmp + VARHDRSZ_COMPRESSED,
163 							   valsize, max_size);
164 	if (len <= 0)
165 		elog(ERROR, "lz4 compression failed");
166 
167 	/* data is incompressible so just free the memory and return NULL */
168 	if (len > valsize)
169 	{
170 		pfree(tmp);
171 		return NULL;
172 	}
173 
174 	SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED);
175 
176 	return tmp;
177 #endif
178 }
179 
180 /*
181  * Decompress a varlena that was compressed using LZ4.
182  */
183 struct varlena *
lz4_decompress_datum(const struct varlena * value)184 lz4_decompress_datum(const struct varlena *value)
185 {
186 #ifndef USE_LZ4
187 	NO_LZ4_SUPPORT();
188 	return NULL;				/* keep compiler quiet */
189 #else
190 	int32		rawsize;
191 	struct varlena *result;
192 
193 	/* allocate memory for the uncompressed data */
194 	result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
195 
196 	/* decompress the data */
197 	rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED,
198 								  VARDATA(result),
199 								  VARSIZE(value) - VARHDRSZ_COMPRESSED,
200 								  VARDATA_COMPRESSED_GET_EXTSIZE(value));
201 	if (rawsize < 0)
202 		ereport(ERROR,
203 				(errcode(ERRCODE_DATA_CORRUPTED),
204 				 errmsg_internal("compressed lz4 data is corrupt")));
205 
206 
207 	SET_VARSIZE(result, rawsize + VARHDRSZ);
208 
209 	return result;
210 #endif
211 }
212 
213 /*
214  * Decompress part of a varlena that was compressed using LZ4.
215  */
216 struct varlena *
lz4_decompress_datum_slice(const struct varlena * value,int32 slicelength)217 lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
218 {
219 #ifndef USE_LZ4
220 	NO_LZ4_SUPPORT();
221 	return NULL;				/* keep compiler quiet */
222 #else
223 	int32		rawsize;
224 	struct varlena *result;
225 
226 	/* slice decompression not supported prior to 1.8.3 */
227 	if (LZ4_versionNumber() < 10803)
228 		return lz4_decompress_datum(value);
229 
230 	/* allocate memory for the uncompressed data */
231 	result = (struct varlena *) palloc(slicelength + VARHDRSZ);
232 
233 	/* decompress the data */
234 	rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESSED,
235 										  VARDATA(result),
236 										  VARSIZE(value) - VARHDRSZ_COMPRESSED,
237 										  slicelength,
238 										  slicelength);
239 	if (rawsize < 0)
240 		ereport(ERROR,
241 				(errcode(ERRCODE_DATA_CORRUPTED),
242 				 errmsg_internal("compressed lz4 data is corrupt")));
243 
244 	SET_VARSIZE(result, rawsize + VARHDRSZ);
245 
246 	return result;
247 #endif
248 }
249 
250 /*
251  * Extract compression ID from a varlena.
252  *
253  * Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed.
254  */
255 ToastCompressionId
toast_get_compression_id(struct varlena * attr)256 toast_get_compression_id(struct varlena *attr)
257 {
258 	ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
259 
260 	/*
261 	 * If it is stored externally then fetch the compression method id from
262 	 * the external toast pointer.  If compressed inline, fetch it from the
263 	 * toast compression header.
264 	 */
265 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
266 	{
267 		struct varatt_external toast_pointer;
268 
269 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
270 
271 		if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
272 			cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
273 	}
274 	else if (VARATT_IS_COMPRESSED(attr))
275 		cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr);
276 
277 	return cmid;
278 }
279 
280 /*
281  * CompressionNameToMethod - Get compression method from compression name
282  *
283  * Search in the available built-in methods.  If the compression not found
284  * in the built-in methods then return InvalidCompressionMethod.
285  */
286 char
CompressionNameToMethod(const char * compression)287 CompressionNameToMethod(const char *compression)
288 {
289 	if (strcmp(compression, "pglz") == 0)
290 		return TOAST_PGLZ_COMPRESSION;
291 	else if (strcmp(compression, "lz4") == 0)
292 	{
293 #ifndef USE_LZ4
294 		NO_LZ4_SUPPORT();
295 #endif
296 		return TOAST_LZ4_COMPRESSION;
297 	}
298 
299 	return InvalidCompressionMethod;
300 }
301 
302 /*
303  * GetCompressionMethodName - Get compression method name
304  */
305 const char *
GetCompressionMethodName(char method)306 GetCompressionMethodName(char method)
307 {
308 	switch (method)
309 	{
310 		case TOAST_PGLZ_COMPRESSION:
311 			return "pglz";
312 		case TOAST_LZ4_COMPRESSION:
313 			return "lz4";
314 		default:
315 			elog(ERROR, "invalid compression method %c", method);
316 			return NULL;		/* keep compiler quiet */
317 	}
318 }
319