1 /*-------------------------------------------------------------------------
2  *
3  * uuid.c
4  *	  Functions for the built-in type "uuid".
5  *
6  * Copyright (c) 2007-2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/utils/adt/uuid.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "common/hashfn.h"
17 #include "lib/hyperloglog.h"
18 #include "libpq/pqformat.h"
19 #include "port/pg_bswap.h"
20 #include "utils/builtins.h"
21 #include "utils/guc.h"
22 #include "utils/sortsupport.h"
23 #include "utils/uuid.h"
24 
25 /* sortsupport for uuid */
26 typedef struct
27 {
28 	int64		input_count;	/* number of non-null values seen */
29 	bool		estimating;		/* true if estimating cardinality */
30 
31 	hyperLogLogState abbr_card; /* cardinality estimator */
32 } uuid_sortsupport_state;
33 
34 static void string_to_uuid(const char *source, pg_uuid_t *uuid);
35 static int	uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
36 static int	uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
37 static int	uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
38 static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
39 static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
40 
41 Datum
uuid_in(PG_FUNCTION_ARGS)42 uuid_in(PG_FUNCTION_ARGS)
43 {
44 	char	   *uuid_str = PG_GETARG_CSTRING(0);
45 	pg_uuid_t  *uuid;
46 
47 	uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
48 	string_to_uuid(uuid_str, uuid);
49 	PG_RETURN_UUID_P(uuid);
50 }
51 
52 Datum
uuid_out(PG_FUNCTION_ARGS)53 uuid_out(PG_FUNCTION_ARGS)
54 {
55 	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
56 	static const char hex_chars[] = "0123456789abcdef";
57 	StringInfoData buf;
58 	int			i;
59 
60 	initStringInfo(&buf);
61 	for (i = 0; i < UUID_LEN; i++)
62 	{
63 		int			hi;
64 		int			lo;
65 
66 		/*
67 		 * We print uuid values as a string of 8, 4, 4, 4, and then 12
68 		 * hexadecimal characters, with each group is separated by a hyphen
69 		 * ("-"). Therefore, add the hyphens at the appropriate places here.
70 		 */
71 		if (i == 4 || i == 6 || i == 8 || i == 10)
72 			appendStringInfoChar(&buf, '-');
73 
74 		hi = uuid->data[i] >> 4;
75 		lo = uuid->data[i] & 0x0F;
76 
77 		appendStringInfoChar(&buf, hex_chars[hi]);
78 		appendStringInfoChar(&buf, hex_chars[lo]);
79 	}
80 
81 	PG_RETURN_CSTRING(buf.data);
82 }
83 
84 /*
85  * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
86  * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
87  * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
88  * digits, is the only one used for output.)
89  */
90 static void
string_to_uuid(const char * source,pg_uuid_t * uuid)91 string_to_uuid(const char *source, pg_uuid_t *uuid)
92 {
93 	const char *src = source;
94 	bool		braces = false;
95 	int			i;
96 
97 	if (src[0] == '{')
98 	{
99 		src++;
100 		braces = true;
101 	}
102 
103 	for (i = 0; i < UUID_LEN; i++)
104 	{
105 		char		str_buf[3];
106 
107 		if (src[0] == '\0' || src[1] == '\0')
108 			goto syntax_error;
109 		memcpy(str_buf, src, 2);
110 		if (!isxdigit((unsigned char) str_buf[0]) ||
111 			!isxdigit((unsigned char) str_buf[1]))
112 			goto syntax_error;
113 
114 		str_buf[2] = '\0';
115 		uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
116 		src += 2;
117 		if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
118 			src++;
119 	}
120 
121 	if (braces)
122 	{
123 		if (*src != '}')
124 			goto syntax_error;
125 		src++;
126 	}
127 
128 	if (*src != '\0')
129 		goto syntax_error;
130 
131 	return;
132 
133 syntax_error:
134 	ereport(ERROR,
135 			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
136 			 errmsg("invalid input syntax for type %s: \"%s\"",
137 					"uuid", source)));
138 }
139 
140 Datum
uuid_recv(PG_FUNCTION_ARGS)141 uuid_recv(PG_FUNCTION_ARGS)
142 {
143 	StringInfo	buffer = (StringInfo) PG_GETARG_POINTER(0);
144 	pg_uuid_t  *uuid;
145 
146 	uuid = (pg_uuid_t *) palloc(UUID_LEN);
147 	memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
148 	PG_RETURN_POINTER(uuid);
149 }
150 
151 Datum
uuid_send(PG_FUNCTION_ARGS)152 uuid_send(PG_FUNCTION_ARGS)
153 {
154 	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
155 	StringInfoData buffer;
156 
157 	pq_begintypsend(&buffer);
158 	pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
159 	PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
160 }
161 
162 /* internal uuid compare function */
163 static int
uuid_internal_cmp(const pg_uuid_t * arg1,const pg_uuid_t * arg2)164 uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
165 {
166 	return memcmp(arg1->data, arg2->data, UUID_LEN);
167 }
168 
169 Datum
uuid_lt(PG_FUNCTION_ARGS)170 uuid_lt(PG_FUNCTION_ARGS)
171 {
172 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
173 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
174 
175 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
176 }
177 
178 Datum
uuid_le(PG_FUNCTION_ARGS)179 uuid_le(PG_FUNCTION_ARGS)
180 {
181 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
182 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
183 
184 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
185 }
186 
187 Datum
uuid_eq(PG_FUNCTION_ARGS)188 uuid_eq(PG_FUNCTION_ARGS)
189 {
190 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
191 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
192 
193 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
194 }
195 
196 Datum
uuid_ge(PG_FUNCTION_ARGS)197 uuid_ge(PG_FUNCTION_ARGS)
198 {
199 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
200 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
201 
202 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
203 }
204 
205 Datum
uuid_gt(PG_FUNCTION_ARGS)206 uuid_gt(PG_FUNCTION_ARGS)
207 {
208 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
209 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
210 
211 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
212 }
213 
214 Datum
uuid_ne(PG_FUNCTION_ARGS)215 uuid_ne(PG_FUNCTION_ARGS)
216 {
217 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
218 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
219 
220 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
221 }
222 
223 /* handler for btree index operator */
224 Datum
uuid_cmp(PG_FUNCTION_ARGS)225 uuid_cmp(PG_FUNCTION_ARGS)
226 {
227 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
228 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
229 
230 	PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
231 }
232 
233 /*
234  * Sort support strategy routine
235  */
236 Datum
uuid_sortsupport(PG_FUNCTION_ARGS)237 uuid_sortsupport(PG_FUNCTION_ARGS)
238 {
239 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
240 
241 	ssup->comparator = uuid_fast_cmp;
242 	ssup->ssup_extra = NULL;
243 
244 	if (ssup->abbreviate)
245 	{
246 		uuid_sortsupport_state *uss;
247 		MemoryContext oldcontext;
248 
249 		oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
250 
251 		uss = palloc(sizeof(uuid_sortsupport_state));
252 		uss->input_count = 0;
253 		uss->estimating = true;
254 		initHyperLogLog(&uss->abbr_card, 10);
255 
256 		ssup->ssup_extra = uss;
257 
258 		ssup->comparator = uuid_cmp_abbrev;
259 		ssup->abbrev_converter = uuid_abbrev_convert;
260 		ssup->abbrev_abort = uuid_abbrev_abort;
261 		ssup->abbrev_full_comparator = uuid_fast_cmp;
262 
263 		MemoryContextSwitchTo(oldcontext);
264 	}
265 
266 	PG_RETURN_VOID();
267 }
268 
269 /*
270  * SortSupport comparison func
271  */
272 static int
uuid_fast_cmp(Datum x,Datum y,SortSupport ssup)273 uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
274 {
275 	pg_uuid_t  *arg1 = DatumGetUUIDP(x);
276 	pg_uuid_t  *arg2 = DatumGetUUIDP(y);
277 
278 	return uuid_internal_cmp(arg1, arg2);
279 }
280 
281 /*
282  * Abbreviated key comparison func
283  */
284 static int
uuid_cmp_abbrev(Datum x,Datum y,SortSupport ssup)285 uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
286 {
287 	if (x > y)
288 		return 1;
289 	else if (x == y)
290 		return 0;
291 	else
292 		return -1;
293 }
294 
295 /*
296  * Callback for estimating effectiveness of abbreviated key optimization.
297  *
298  * We pay no attention to the cardinality of the non-abbreviated data, because
299  * there is no equality fast-path within authoritative uuid comparator.
300  */
301 static bool
uuid_abbrev_abort(int memtupcount,SortSupport ssup)302 uuid_abbrev_abort(int memtupcount, SortSupport ssup)
303 {
304 	uuid_sortsupport_state *uss = ssup->ssup_extra;
305 	double		abbr_card;
306 
307 	if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
308 		return false;
309 
310 	abbr_card = estimateHyperLogLog(&uss->abbr_card);
311 
312 	/*
313 	 * If we have >100k distinct values, then even if we were sorting many
314 	 * billion rows we'd likely still break even, and the penalty of undoing
315 	 * that many rows of abbrevs would probably not be worth it.  Stop even
316 	 * counting at that point.
317 	 */
318 	if (abbr_card > 100000.0)
319 	{
320 #ifdef TRACE_SORT
321 		if (trace_sort)
322 			elog(LOG,
323 				 "uuid_abbrev: estimation ends at cardinality %f"
324 				 " after " INT64_FORMAT " values (%d rows)",
325 				 abbr_card, uss->input_count, memtupcount);
326 #endif
327 		uss->estimating = false;
328 		return false;
329 	}
330 
331 	/*
332 	 * Target minimum cardinality is 1 per ~2k of non-null inputs.  0.5 row
333 	 * fudge factor allows us to abort earlier on genuinely pathological data
334 	 * where we've had exactly one abbreviated value in the first 2k
335 	 * (non-null) rows.
336 	 */
337 	if (abbr_card < uss->input_count / 2000.0 + 0.5)
338 	{
339 #ifdef TRACE_SORT
340 		if (trace_sort)
341 			elog(LOG,
342 				 "uuid_abbrev: aborting abbreviation at cardinality %f"
343 				 " below threshold %f after " INT64_FORMAT " values (%d rows)",
344 				 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
345 				 memtupcount);
346 #endif
347 		return true;
348 	}
349 
350 #ifdef TRACE_SORT
351 	if (trace_sort)
352 		elog(LOG,
353 			 "uuid_abbrev: cardinality %f after " INT64_FORMAT
354 			 " values (%d rows)", abbr_card, uss->input_count, memtupcount);
355 #endif
356 
357 	return false;
358 }
359 
360 /*
361  * Conversion routine for sortsupport.  Converts original uuid representation
362  * to abbreviated key representation.  Our encoding strategy is simple -- pack
363  * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
364  * machines, the bytes are stored in reverse order), and treat it as an
365  * unsigned integer.
366  */
367 static Datum
uuid_abbrev_convert(Datum original,SortSupport ssup)368 uuid_abbrev_convert(Datum original, SortSupport ssup)
369 {
370 	uuid_sortsupport_state *uss = ssup->ssup_extra;
371 	pg_uuid_t  *authoritative = DatumGetUUIDP(original);
372 	Datum		res;
373 
374 	memcpy(&res, authoritative->data, sizeof(Datum));
375 	uss->input_count += 1;
376 
377 	if (uss->estimating)
378 	{
379 		uint32		tmp;
380 
381 #if SIZEOF_DATUM == 8
382 		tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
383 #else							/* SIZEOF_DATUM != 8 */
384 		tmp = (uint32) res;
385 #endif
386 
387 		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
388 	}
389 
390 	/*
391 	 * Byteswap on little-endian machines.
392 	 *
393 	 * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
394 	 * comparator) works correctly on all platforms.  If we didn't do this,
395 	 * the comparator would have to call memcmp() with a pair of pointers to
396 	 * the first byte of each abbreviated key, which is slower.
397 	 */
398 	res = DatumBigEndianToNative(res);
399 
400 	return res;
401 }
402 
403 /* hash index support */
404 Datum
uuid_hash(PG_FUNCTION_ARGS)405 uuid_hash(PG_FUNCTION_ARGS)
406 {
407 	pg_uuid_t  *key = PG_GETARG_UUID_P(0);
408 
409 	return hash_any(key->data, UUID_LEN);
410 }
411 
412 Datum
uuid_hash_extended(PG_FUNCTION_ARGS)413 uuid_hash_extended(PG_FUNCTION_ARGS)
414 {
415 	pg_uuid_t  *key = PG_GETARG_UUID_P(0);
416 
417 	return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
418 }
419 
420 Datum
gen_random_uuid(PG_FUNCTION_ARGS)421 gen_random_uuid(PG_FUNCTION_ARGS)
422 {
423 	pg_uuid_t  *uuid = palloc(UUID_LEN);
424 
425 	if (!pg_strong_random(uuid, UUID_LEN))
426 		ereport(ERROR,
427 				(errcode(ERRCODE_INTERNAL_ERROR),
428 				 errmsg("could not generate random values")));
429 
430 	/*
431 	 * Set magic numbers for a "version 4" (pseudorandom) UUID, see
432 	 * http://tools.ietf.org/html/rfc4122#section-4.4
433 	 */
434 	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40;	/* time_hi_and_version */
435 	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;	/* clock_seq_hi_and_reserved */
436 
437 	PG_RETURN_UUID_P(uuid);
438 }
439