1 /*-------------------------------------------------------------------------
2 *
3 * uuid.c
4 * Functions for the built-in type "uuid".
5 *
6 * Copyright (c) 2007-2016, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/utils/adt/uuid.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14 #include "postgres.h"
15
16 #include "access/hash.h"
17 #include "lib/hyperloglog.h"
18 #include "libpq/pqformat.h"
19 #include "port/pg_bswap.h"
20 #include "utils/builtins.h"
21 #include "utils/guc.h"
22 #include "utils/sortsupport.h"
23 #include "utils/uuid.h"
24
25 /* uuid size in bytes */
26 #define UUID_LEN 16
27
28 /* pg_uuid_t is declared to be struct pg_uuid_t in uuid.h */
29 struct pg_uuid_t
30 {
31 unsigned char data[UUID_LEN];
32 };
33
34 /* sortsupport for uuid */
35 typedef struct
36 {
37 int64 input_count; /* number of non-null values seen */
38 bool estimating; /* true if estimating cardinality */
39
40 hyperLogLogState abbr_card; /* cardinality estimator */
41 } uuid_sortsupport_state;
42
43 static void string_to_uuid(const char *source, pg_uuid_t *uuid);
44 static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
45 static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
46 static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
47 static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
48 static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
49
50 Datum
uuid_in(PG_FUNCTION_ARGS)51 uuid_in(PG_FUNCTION_ARGS)
52 {
53 char *uuid_str = PG_GETARG_CSTRING(0);
54 pg_uuid_t *uuid;
55
56 uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
57 string_to_uuid(uuid_str, uuid);
58 PG_RETURN_UUID_P(uuid);
59 }
60
61 Datum
uuid_out(PG_FUNCTION_ARGS)62 uuid_out(PG_FUNCTION_ARGS)
63 {
64 pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
65 static const char hex_chars[] = "0123456789abcdef";
66 StringInfoData buf;
67 int i;
68
69 initStringInfo(&buf);
70 for (i = 0; i < UUID_LEN; i++)
71 {
72 int hi;
73 int lo;
74
75 /*
76 * We print uuid values as a string of 8, 4, 4, 4, and then 12
77 * hexadecimal characters, with each group is separated by a hyphen
78 * ("-"). Therefore, add the hyphens at the appropriate places here.
79 */
80 if (i == 4 || i == 6 || i == 8 || i == 10)
81 appendStringInfoChar(&buf, '-');
82
83 hi = uuid->data[i] >> 4;
84 lo = uuid->data[i] & 0x0F;
85
86 appendStringInfoChar(&buf, hex_chars[hi]);
87 appendStringInfoChar(&buf, hex_chars[lo]);
88 }
89
90 PG_RETURN_CSTRING(buf.data);
91 }
92
93 /*
94 * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
95 * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
96 * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
97 * digits, is the only one used for output.)
98 */
99 static void
string_to_uuid(const char * source,pg_uuid_t * uuid)100 string_to_uuid(const char *source, pg_uuid_t *uuid)
101 {
102 const char *src = source;
103 bool braces = false;
104 int i;
105
106 if (src[0] == '{')
107 {
108 src++;
109 braces = true;
110 }
111
112 for (i = 0; i < UUID_LEN; i++)
113 {
114 char str_buf[3];
115
116 if (src[0] == '\0' || src[1] == '\0')
117 goto syntax_error;
118 memcpy(str_buf, src, 2);
119 if (!isxdigit((unsigned char) str_buf[0]) ||
120 !isxdigit((unsigned char) str_buf[1]))
121 goto syntax_error;
122
123 str_buf[2] = '\0';
124 uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
125 src += 2;
126 if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
127 src++;
128 }
129
130 if (braces)
131 {
132 if (*src != '}')
133 goto syntax_error;
134 src++;
135 }
136
137 if (*src != '\0')
138 goto syntax_error;
139
140 return;
141
142 syntax_error:
143 ereport(ERROR,
144 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
145 errmsg("invalid input syntax for uuid: \"%s\"",
146 source)));
147 }
148
149 Datum
uuid_recv(PG_FUNCTION_ARGS)150 uuid_recv(PG_FUNCTION_ARGS)
151 {
152 StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
153 pg_uuid_t *uuid;
154
155 uuid = (pg_uuid_t *) palloc(UUID_LEN);
156 memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
157 PG_RETURN_POINTER(uuid);
158 }
159
160 Datum
uuid_send(PG_FUNCTION_ARGS)161 uuid_send(PG_FUNCTION_ARGS)
162 {
163 pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
164 StringInfoData buffer;
165
166 pq_begintypsend(&buffer);
167 pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
168 PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
169 }
170
171 /* internal uuid compare function */
172 static int
uuid_internal_cmp(const pg_uuid_t * arg1,const pg_uuid_t * arg2)173 uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
174 {
175 return memcmp(arg1->data, arg2->data, UUID_LEN);
176 }
177
178 Datum
uuid_lt(PG_FUNCTION_ARGS)179 uuid_lt(PG_FUNCTION_ARGS)
180 {
181 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
182 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
183
184 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
185 }
186
187 Datum
uuid_le(PG_FUNCTION_ARGS)188 uuid_le(PG_FUNCTION_ARGS)
189 {
190 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
191 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
192
193 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
194 }
195
196 Datum
uuid_eq(PG_FUNCTION_ARGS)197 uuid_eq(PG_FUNCTION_ARGS)
198 {
199 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
200 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
201
202 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
203 }
204
205 Datum
uuid_ge(PG_FUNCTION_ARGS)206 uuid_ge(PG_FUNCTION_ARGS)
207 {
208 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
209 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
210
211 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
212 }
213
214 Datum
uuid_gt(PG_FUNCTION_ARGS)215 uuid_gt(PG_FUNCTION_ARGS)
216 {
217 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
218 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
219
220 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
221 }
222
223 Datum
uuid_ne(PG_FUNCTION_ARGS)224 uuid_ne(PG_FUNCTION_ARGS)
225 {
226 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
227 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
228
229 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
230 }
231
232 /* handler for btree index operator */
233 Datum
uuid_cmp(PG_FUNCTION_ARGS)234 uuid_cmp(PG_FUNCTION_ARGS)
235 {
236 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
237 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
238
239 PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
240 }
241
242 /*
243 * Sort support strategy routine
244 */
245 Datum
uuid_sortsupport(PG_FUNCTION_ARGS)246 uuid_sortsupport(PG_FUNCTION_ARGS)
247 {
248 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
249
250 ssup->comparator = uuid_fast_cmp;
251 ssup->ssup_extra = NULL;
252
253 if (ssup->abbreviate)
254 {
255 uuid_sortsupport_state *uss;
256 MemoryContext oldcontext;
257
258 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
259
260 uss = palloc(sizeof(uuid_sortsupport_state));
261 uss->input_count = 0;
262 uss->estimating = true;
263 initHyperLogLog(&uss->abbr_card, 10);
264
265 ssup->ssup_extra = uss;
266
267 ssup->comparator = uuid_cmp_abbrev;
268 ssup->abbrev_converter = uuid_abbrev_convert;
269 ssup->abbrev_abort = uuid_abbrev_abort;
270 ssup->abbrev_full_comparator = uuid_fast_cmp;
271
272 MemoryContextSwitchTo(oldcontext);
273 }
274
275 PG_RETURN_VOID();
276 }
277
278 /*
279 * SortSupport comparison func
280 */
281 static int
uuid_fast_cmp(Datum x,Datum y,SortSupport ssup)282 uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
283 {
284 pg_uuid_t *arg1 = DatumGetUUIDP(x);
285 pg_uuid_t *arg2 = DatumGetUUIDP(y);
286
287 return uuid_internal_cmp(arg1, arg2);
288 }
289
290 /*
291 * Abbreviated key comparison func
292 */
293 static int
uuid_cmp_abbrev(Datum x,Datum y,SortSupport ssup)294 uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
295 {
296 if (x > y)
297 return 1;
298 else if (x == y)
299 return 0;
300 else
301 return -1;
302 }
303
304 /*
305 * Callback for estimating effectiveness of abbreviated key optimization.
306 *
307 * We pay no attention to the cardinality of the non-abbreviated data, because
308 * there is no equality fast-path within authoritative uuid comparator.
309 */
310 static bool
uuid_abbrev_abort(int memtupcount,SortSupport ssup)311 uuid_abbrev_abort(int memtupcount, SortSupport ssup)
312 {
313 uuid_sortsupport_state *uss = ssup->ssup_extra;
314 double abbr_card;
315
316 if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
317 return false;
318
319 abbr_card = estimateHyperLogLog(&uss->abbr_card);
320
321 /*
322 * If we have >100k distinct values, then even if we were sorting many
323 * billion rows we'd likely still break even, and the penalty of undoing
324 * that many rows of abbrevs would probably not be worth it. Stop even
325 * counting at that point.
326 */
327 if (abbr_card > 100000.0)
328 {
329 #ifdef TRACE_SORT
330 if (trace_sort)
331 elog(LOG,
332 "uuid_abbrev: estimation ends at cardinality %f"
333 " after " INT64_FORMAT " values (%d rows)",
334 abbr_card, uss->input_count, memtupcount);
335 #endif
336 uss->estimating = false;
337 return false;
338 }
339
340 /*
341 * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
342 * fudge factor allows us to abort earlier on genuinely pathological data
343 * where we've had exactly one abbreviated value in the first 2k
344 * (non-null) rows.
345 */
346 if (abbr_card < uss->input_count / 2000.0 + 0.5)
347 {
348 #ifdef TRACE_SORT
349 if (trace_sort)
350 elog(LOG,
351 "uuid_abbrev: aborting abbreviation at cardinality %f"
352 " below threshold %f after " INT64_FORMAT " values (%d rows)",
353 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
354 memtupcount);
355 #endif
356 return true;
357 }
358
359 #ifdef TRACE_SORT
360 if (trace_sort)
361 elog(LOG,
362 "uuid_abbrev: cardinality %f after " INT64_FORMAT
363 " values (%d rows)", abbr_card, uss->input_count, memtupcount);
364 #endif
365
366 return false;
367 }
368
369 /*
370 * Conversion routine for sortsupport. Converts original uuid representation
371 * to abbreviated key representation. Our encoding strategy is simple -- pack
372 * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
373 * machines, the bytes are stored in reverse order), and treat it as an
374 * unsigned integer.
375 */
376 static Datum
uuid_abbrev_convert(Datum original,SortSupport ssup)377 uuid_abbrev_convert(Datum original, SortSupport ssup)
378 {
379 uuid_sortsupport_state *uss = ssup->ssup_extra;
380 pg_uuid_t *authoritative = DatumGetUUIDP(original);
381 Datum res;
382
383 memcpy(&res, authoritative->data, sizeof(Datum));
384 uss->input_count += 1;
385
386 if (uss->estimating)
387 {
388 uint32 tmp;
389
390 #if SIZEOF_DATUM == 8
391 tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
392 #else /* SIZEOF_DATUM != 8 */
393 tmp = (uint32) res;
394 #endif
395
396 addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
397 }
398
399 /*
400 * Byteswap on little-endian machines.
401 *
402 * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
403 * comparator) works correctly on all platforms. If we didn't do this,
404 * the comparator would have to call memcmp() with a pair of pointers to
405 * the first byte of each abbreviated key, which is slower.
406 */
407 res = DatumBigEndianToNative(res);
408
409 return res;
410 }
411
412 /* hash index support */
413 Datum
uuid_hash(PG_FUNCTION_ARGS)414 uuid_hash(PG_FUNCTION_ARGS)
415 {
416 pg_uuid_t *key = PG_GETARG_UUID_P(0);
417
418 return hash_any(key->data, UUID_LEN);
419 }
420