1 /*-------------------------------------------------------------------------
2  *
3  * uuid.c
4  *	  Functions for the built-in type "uuid".
5  *
6  * Copyright (c) 2007-2016, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/utils/adt/uuid.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "access/hash.h"
17 #include "lib/hyperloglog.h"
18 #include "libpq/pqformat.h"
19 #include "port/pg_bswap.h"
20 #include "utils/builtins.h"
21 #include "utils/guc.h"
22 #include "utils/sortsupport.h"
23 #include "utils/uuid.h"
24 
25 /* uuid size in bytes */
26 #define UUID_LEN 16
27 
28 /* pg_uuid_t is declared to be struct pg_uuid_t in uuid.h */
29 struct pg_uuid_t
30 {
31 	unsigned char data[UUID_LEN];
32 };
33 
34 /* sortsupport for uuid */
35 typedef struct
36 {
37 	int64		input_count;	/* number of non-null values seen */
38 	bool		estimating;		/* true if estimating cardinality */
39 
40 	hyperLogLogState abbr_card; /* cardinality estimator */
41 } uuid_sortsupport_state;
42 
43 static void string_to_uuid(const char *source, pg_uuid_t *uuid);
44 static int	uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
45 static int	uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
46 static int	uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
47 static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
48 static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
49 
50 Datum
uuid_in(PG_FUNCTION_ARGS)51 uuid_in(PG_FUNCTION_ARGS)
52 {
53 	char	   *uuid_str = PG_GETARG_CSTRING(0);
54 	pg_uuid_t  *uuid;
55 
56 	uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
57 	string_to_uuid(uuid_str, uuid);
58 	PG_RETURN_UUID_P(uuid);
59 }
60 
61 Datum
uuid_out(PG_FUNCTION_ARGS)62 uuid_out(PG_FUNCTION_ARGS)
63 {
64 	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
65 	static const char hex_chars[] = "0123456789abcdef";
66 	StringInfoData buf;
67 	int			i;
68 
69 	initStringInfo(&buf);
70 	for (i = 0; i < UUID_LEN; i++)
71 	{
72 		int			hi;
73 		int			lo;
74 
75 		/*
76 		 * We print uuid values as a string of 8, 4, 4, 4, and then 12
77 		 * hexadecimal characters, with each group is separated by a hyphen
78 		 * ("-"). Therefore, add the hyphens at the appropriate places here.
79 		 */
80 		if (i == 4 || i == 6 || i == 8 || i == 10)
81 			appendStringInfoChar(&buf, '-');
82 
83 		hi = uuid->data[i] >> 4;
84 		lo = uuid->data[i] & 0x0F;
85 
86 		appendStringInfoChar(&buf, hex_chars[hi]);
87 		appendStringInfoChar(&buf, hex_chars[lo]);
88 	}
89 
90 	PG_RETURN_CSTRING(buf.data);
91 }
92 
93 /*
94  * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
95  * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
96  * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
97  * digits, is the only one used for output.)
98  */
99 static void
string_to_uuid(const char * source,pg_uuid_t * uuid)100 string_to_uuid(const char *source, pg_uuid_t *uuid)
101 {
102 	const char *src = source;
103 	bool		braces = false;
104 	int			i;
105 
106 	if (src[0] == '{')
107 	{
108 		src++;
109 		braces = true;
110 	}
111 
112 	for (i = 0; i < UUID_LEN; i++)
113 	{
114 		char		str_buf[3];
115 
116 		if (src[0] == '\0' || src[1] == '\0')
117 			goto syntax_error;
118 		memcpy(str_buf, src, 2);
119 		if (!isxdigit((unsigned char) str_buf[0]) ||
120 			!isxdigit((unsigned char) str_buf[1]))
121 			goto syntax_error;
122 
123 		str_buf[2] = '\0';
124 		uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
125 		src += 2;
126 		if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
127 			src++;
128 	}
129 
130 	if (braces)
131 	{
132 		if (*src != '}')
133 			goto syntax_error;
134 		src++;
135 	}
136 
137 	if (*src != '\0')
138 		goto syntax_error;
139 
140 	return;
141 
142 syntax_error:
143 	ereport(ERROR,
144 			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
145 			 errmsg("invalid input syntax for uuid: \"%s\"",
146 					source)));
147 }
148 
149 Datum
uuid_recv(PG_FUNCTION_ARGS)150 uuid_recv(PG_FUNCTION_ARGS)
151 {
152 	StringInfo	buffer = (StringInfo) PG_GETARG_POINTER(0);
153 	pg_uuid_t  *uuid;
154 
155 	uuid = (pg_uuid_t *) palloc(UUID_LEN);
156 	memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
157 	PG_RETURN_POINTER(uuid);
158 }
159 
160 Datum
uuid_send(PG_FUNCTION_ARGS)161 uuid_send(PG_FUNCTION_ARGS)
162 {
163 	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
164 	StringInfoData buffer;
165 
166 	pq_begintypsend(&buffer);
167 	pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
168 	PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
169 }
170 
171 /* internal uuid compare function */
172 static int
uuid_internal_cmp(const pg_uuid_t * arg1,const pg_uuid_t * arg2)173 uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
174 {
175 	return memcmp(arg1->data, arg2->data, UUID_LEN);
176 }
177 
178 Datum
uuid_lt(PG_FUNCTION_ARGS)179 uuid_lt(PG_FUNCTION_ARGS)
180 {
181 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
182 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
183 
184 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
185 }
186 
187 Datum
uuid_le(PG_FUNCTION_ARGS)188 uuid_le(PG_FUNCTION_ARGS)
189 {
190 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
191 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
192 
193 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
194 }
195 
196 Datum
uuid_eq(PG_FUNCTION_ARGS)197 uuid_eq(PG_FUNCTION_ARGS)
198 {
199 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
200 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
201 
202 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
203 }
204 
205 Datum
uuid_ge(PG_FUNCTION_ARGS)206 uuid_ge(PG_FUNCTION_ARGS)
207 {
208 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
209 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
210 
211 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
212 }
213 
214 Datum
uuid_gt(PG_FUNCTION_ARGS)215 uuid_gt(PG_FUNCTION_ARGS)
216 {
217 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
218 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
219 
220 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
221 }
222 
223 Datum
uuid_ne(PG_FUNCTION_ARGS)224 uuid_ne(PG_FUNCTION_ARGS)
225 {
226 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
227 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
228 
229 	PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
230 }
231 
232 /* handler for btree index operator */
233 Datum
uuid_cmp(PG_FUNCTION_ARGS)234 uuid_cmp(PG_FUNCTION_ARGS)
235 {
236 	pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
237 	pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
238 
239 	PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
240 }
241 
242 /*
243  * Sort support strategy routine
244  */
245 Datum
uuid_sortsupport(PG_FUNCTION_ARGS)246 uuid_sortsupport(PG_FUNCTION_ARGS)
247 {
248 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
249 
250 	ssup->comparator = uuid_fast_cmp;
251 	ssup->ssup_extra = NULL;
252 
253 	if (ssup->abbreviate)
254 	{
255 		uuid_sortsupport_state *uss;
256 		MemoryContext oldcontext;
257 
258 		oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
259 
260 		uss = palloc(sizeof(uuid_sortsupport_state));
261 		uss->input_count = 0;
262 		uss->estimating = true;
263 		initHyperLogLog(&uss->abbr_card, 10);
264 
265 		ssup->ssup_extra = uss;
266 
267 		ssup->comparator = uuid_cmp_abbrev;
268 		ssup->abbrev_converter = uuid_abbrev_convert;
269 		ssup->abbrev_abort = uuid_abbrev_abort;
270 		ssup->abbrev_full_comparator = uuid_fast_cmp;
271 
272 		MemoryContextSwitchTo(oldcontext);
273 	}
274 
275 	PG_RETURN_VOID();
276 }
277 
278 /*
279  * SortSupport comparison func
280  */
281 static int
uuid_fast_cmp(Datum x,Datum y,SortSupport ssup)282 uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
283 {
284 	pg_uuid_t  *arg1 = DatumGetUUIDP(x);
285 	pg_uuid_t  *arg2 = DatumGetUUIDP(y);
286 
287 	return uuid_internal_cmp(arg1, arg2);
288 }
289 
290 /*
291  * Abbreviated key comparison func
292  */
293 static int
uuid_cmp_abbrev(Datum x,Datum y,SortSupport ssup)294 uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
295 {
296 	if (x > y)
297 		return 1;
298 	else if (x == y)
299 		return 0;
300 	else
301 		return -1;
302 }
303 
304 /*
305  * Callback for estimating effectiveness of abbreviated key optimization.
306  *
307  * We pay no attention to the cardinality of the non-abbreviated data, because
308  * there is no equality fast-path within authoritative uuid comparator.
309  */
310 static bool
uuid_abbrev_abort(int memtupcount,SortSupport ssup)311 uuid_abbrev_abort(int memtupcount, SortSupport ssup)
312 {
313 	uuid_sortsupport_state *uss = ssup->ssup_extra;
314 	double		abbr_card;
315 
316 	if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
317 		return false;
318 
319 	abbr_card = estimateHyperLogLog(&uss->abbr_card);
320 
321 	/*
322 	 * If we have >100k distinct values, then even if we were sorting many
323 	 * billion rows we'd likely still break even, and the penalty of undoing
324 	 * that many rows of abbrevs would probably not be worth it.  Stop even
325 	 * counting at that point.
326 	 */
327 	if (abbr_card > 100000.0)
328 	{
329 #ifdef TRACE_SORT
330 		if (trace_sort)
331 			elog(LOG,
332 				 "uuid_abbrev: estimation ends at cardinality %f"
333 				 " after " INT64_FORMAT " values (%d rows)",
334 				 abbr_card, uss->input_count, memtupcount);
335 #endif
336 		uss->estimating = false;
337 		return false;
338 	}
339 
340 	/*
341 	 * Target minimum cardinality is 1 per ~2k of non-null inputs.  0.5 row
342 	 * fudge factor allows us to abort earlier on genuinely pathological data
343 	 * where we've had exactly one abbreviated value in the first 2k
344 	 * (non-null) rows.
345 	 */
346 	if (abbr_card < uss->input_count / 2000.0 + 0.5)
347 	{
348 #ifdef TRACE_SORT
349 		if (trace_sort)
350 			elog(LOG,
351 				 "uuid_abbrev: aborting abbreviation at cardinality %f"
352 			   " below threshold %f after " INT64_FORMAT " values (%d rows)",
353 				 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
354 				 memtupcount);
355 #endif
356 		return true;
357 	}
358 
359 #ifdef TRACE_SORT
360 	if (trace_sort)
361 		elog(LOG,
362 			 "uuid_abbrev: cardinality %f after " INT64_FORMAT
363 			 " values (%d rows)", abbr_card, uss->input_count, memtupcount);
364 #endif
365 
366 	return false;
367 }
368 
369 /*
370  * Conversion routine for sortsupport.  Converts original uuid representation
371  * to abbreviated key representation.  Our encoding strategy is simple -- pack
372  * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
373  * machines, the bytes are stored in reverse order), and treat it as an
374  * unsigned integer.
375  */
376 static Datum
uuid_abbrev_convert(Datum original,SortSupport ssup)377 uuid_abbrev_convert(Datum original, SortSupport ssup)
378 {
379 	uuid_sortsupport_state *uss = ssup->ssup_extra;
380 	pg_uuid_t  *authoritative = DatumGetUUIDP(original);
381 	Datum		res;
382 
383 	memcpy(&res, authoritative->data, sizeof(Datum));
384 	uss->input_count += 1;
385 
386 	if (uss->estimating)
387 	{
388 		uint32		tmp;
389 
390 #if SIZEOF_DATUM == 8
391 		tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
392 #else							/* SIZEOF_DATUM != 8 */
393 		tmp = (uint32) res;
394 #endif
395 
396 		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
397 	}
398 
399 	/*
400 	 * Byteswap on little-endian machines.
401 	 *
402 	 * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
403 	 * comparator) works correctly on all platforms.  If we didn't do this,
404 	 * the comparator would have to call memcmp() with a pair of pointers to
405 	 * the first byte of each abbreviated key, which is slower.
406 	 */
407 	res = DatumBigEndianToNative(res);
408 
409 	return res;
410 }
411 
412 /* hash index support */
413 Datum
uuid_hash(PG_FUNCTION_ARGS)414 uuid_hash(PG_FUNCTION_ARGS)
415 {
416 	pg_uuid_t  *key = PG_GETARG_UUID_P(0);
417 
418 	return hash_any(key->data, UUID_LEN);
419 }
420