1 /*-------------------------------------------------------------------------
2  *
3  * mac.c
4  *	  PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses.
5  *
6  * Portions Copyright (c) 1998-2018, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *		  src/backend/utils/adt/mac.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "access/hash.h"
17 #include "lib/hyperloglog.h"
18 #include "libpq/pqformat.h"
19 #include "port/pg_bswap.h"
20 #include "utils/builtins.h"
21 #include "utils/guc.h"
22 #include "utils/inet.h"
23 #include "utils/sortsupport.h"
24 
25 
26 /*
27  *	Utility macros used for sorting and comparing:
28  */
29 
30 #define hibits(addr) \
31   ((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c)))
32 
33 #define lobits(addr) \
34   ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
35 
36 /* sortsupport for macaddr */
37 typedef struct
38 {
39 	int64		input_count;	/* number of non-null values seen */
40 	bool		estimating;		/* true if estimating cardinality */
41 
42 	hyperLogLogState abbr_card; /* cardinality estimator */
43 } macaddr_sortsupport_state;
44 
45 static int	macaddr_cmp_internal(macaddr *a1, macaddr *a2);
46 static int	macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup);
47 static int	macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
48 static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup);
49 static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup);
50 
51 /*
52  *	MAC address reader.  Accepts several common notations.
53  */
54 
55 Datum
56 macaddr_in(PG_FUNCTION_ARGS)
57 {
58 	char	   *str = PG_GETARG_CSTRING(0);
59 	macaddr    *result;
60 	int			a,
61 				b,
62 				c,
63 				d,
64 				e,
65 				f;
66 	char		junk[2];
67 	int			count;
68 
69 	/* %1s matches iff there is trailing non-whitespace garbage */
70 
71 	count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s",
72 				   &a, &b, &c, &d, &e, &f, junk);
73 	if (count != 6)
74 		count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s",
75 					   &a, &b, &c, &d, &e, &f, junk);
76 	if (count != 6)
77 		count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s",
78 					   &a, &b, &c, &d, &e, &f, junk);
79 	if (count != 6)
80 		count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s",
81 					   &a, &b, &c, &d, &e, &f, junk);
82 	if (count != 6)
83 		count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s",
84 					   &a, &b, &c, &d, &e, &f, junk);
85 	if (count != 6)
86 		count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s",
87 					   &a, &b, &c, &d, &e, &f, junk);
88 	if (count != 6)
89 		count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s",
90 					   &a, &b, &c, &d, &e, &f, junk);
91 	if (count != 6)
92 		ereport(ERROR,
93 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
94 				 errmsg("invalid input syntax for type %s: \"%s\"", "macaddr",
95 						str)));
96 
97 	if ((a < 0) || (a > 255) || (b < 0) || (b > 255) ||
98 		(c < 0) || (c > 255) || (d < 0) || (d > 255) ||
99 		(e < 0) || (e > 255) || (f < 0) || (f > 255))
100 		ereport(ERROR,
101 				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
102 				 errmsg("invalid octet value in \"macaddr\" value: \"%s\"", str)));
103 
104 	result = (macaddr *) palloc(sizeof(macaddr));
105 
106 	result->a = a;
107 	result->b = b;
108 	result->c = c;
109 	result->d = d;
110 	result->e = e;
111 	result->f = f;
112 
113 	PG_RETURN_MACADDR_P(result);
114 }
115 
116 /*
117  *	MAC address output function.  Fixed format.
118  */
119 
120 Datum
121 macaddr_out(PG_FUNCTION_ARGS)
122 {
123 	macaddr    *addr = PG_GETARG_MACADDR_P(0);
124 	char	   *result;
125 
126 	result = (char *) palloc(32);
127 
128 	snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x",
129 			 addr->a, addr->b, addr->c, addr->d, addr->e, addr->f);
130 
131 	PG_RETURN_CSTRING(result);
132 }
133 
134 /*
135  *		macaddr_recv			- converts external binary format to macaddr
136  *
137  * The external representation is just the six bytes, MSB first.
138  */
139 Datum
140 macaddr_recv(PG_FUNCTION_ARGS)
141 {
142 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
143 	macaddr    *addr;
144 
145 	addr = (macaddr *) palloc(sizeof(macaddr));
146 
147 	addr->a = pq_getmsgbyte(buf);
148 	addr->b = pq_getmsgbyte(buf);
149 	addr->c = pq_getmsgbyte(buf);
150 	addr->d = pq_getmsgbyte(buf);
151 	addr->e = pq_getmsgbyte(buf);
152 	addr->f = pq_getmsgbyte(buf);
153 
154 	PG_RETURN_MACADDR_P(addr);
155 }
156 
157 /*
158  *		macaddr_send			- converts macaddr to binary format
159  */
160 Datum
161 macaddr_send(PG_FUNCTION_ARGS)
162 {
163 	macaddr    *addr = PG_GETARG_MACADDR_P(0);
164 	StringInfoData buf;
165 
166 	pq_begintypsend(&buf);
167 	pq_sendbyte(&buf, addr->a);
168 	pq_sendbyte(&buf, addr->b);
169 	pq_sendbyte(&buf, addr->c);
170 	pq_sendbyte(&buf, addr->d);
171 	pq_sendbyte(&buf, addr->e);
172 	pq_sendbyte(&buf, addr->f);
173 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
174 }
175 
176 
177 /*
178  *	Comparison function for sorting:
179  */
180 
181 static int
182 macaddr_cmp_internal(macaddr *a1, macaddr *a2)
183 {
184 	if (hibits(a1) < hibits(a2))
185 		return -1;
186 	else if (hibits(a1) > hibits(a2))
187 		return 1;
188 	else if (lobits(a1) < lobits(a2))
189 		return -1;
190 	else if (lobits(a1) > lobits(a2))
191 		return 1;
192 	else
193 		return 0;
194 }
195 
196 Datum
197 macaddr_cmp(PG_FUNCTION_ARGS)
198 {
199 	macaddr    *a1 = PG_GETARG_MACADDR_P(0);
200 	macaddr    *a2 = PG_GETARG_MACADDR_P(1);
201 
202 	PG_RETURN_INT32(macaddr_cmp_internal(a1, a2));
203 }
204 
205 /*
206  *	Boolean comparisons.
207  */
208 
209 Datum
210 macaddr_lt(PG_FUNCTION_ARGS)
211 {
212 	macaddr    *a1 = PG_GETARG_MACADDR_P(0);
213 	macaddr    *a2 = PG_GETARG_MACADDR_P(1);
214 
215 	PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) < 0);
216 }
217 
218 Datum
219 macaddr_le(PG_FUNCTION_ARGS)
220 {
221 	macaddr    *a1 = PG_GETARG_MACADDR_P(0);
222 	macaddr    *a2 = PG_GETARG_MACADDR_P(1);
223 
224 	PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) <= 0);
225 }
226 
227 Datum
228 macaddr_eq(PG_FUNCTION_ARGS)
229 {
230 	macaddr    *a1 = PG_GETARG_MACADDR_P(0);
231 	macaddr    *a2 = PG_GETARG_MACADDR_P(1);
232 
233 	PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) == 0);
234 }
235 
236 Datum
237 macaddr_ge(PG_FUNCTION_ARGS)
238 {
239 	macaddr    *a1 = PG_GETARG_MACADDR_P(0);
240 	macaddr    *a2 = PG_GETARG_MACADDR_P(1);
241 
242 	PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) >= 0);
243 }
244 
245 Datum
246 macaddr_gt(PG_FUNCTION_ARGS)
247 {
248 	macaddr    *a1 = PG_GETARG_MACADDR_P(0);
249 	macaddr    *a2 = PG_GETARG_MACADDR_P(1);
250 
251 	PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) > 0);
252 }
253 
254 Datum
255 macaddr_ne(PG_FUNCTION_ARGS)
256 {
257 	macaddr    *a1 = PG_GETARG_MACADDR_P(0);
258 	macaddr    *a2 = PG_GETARG_MACADDR_P(1);
259 
260 	PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) != 0);
261 }
262 
263 /*
264  * Support function for hash indexes on macaddr.
265  */
266 Datum
267 hashmacaddr(PG_FUNCTION_ARGS)
268 {
269 	macaddr    *key = PG_GETARG_MACADDR_P(0);
270 
271 	return hash_any((unsigned char *) key, sizeof(macaddr));
272 }
273 
274 Datum
275 hashmacaddrextended(PG_FUNCTION_ARGS)
276 {
277 	macaddr    *key = PG_GETARG_MACADDR_P(0);
278 
279 	return hash_any_extended((unsigned char *) key, sizeof(macaddr),
280 							 PG_GETARG_INT64(1));
281 }
282 
283 /*
284  * Arithmetic functions: bitwise NOT, AND, OR.
285  */
286 Datum
287 macaddr_not(PG_FUNCTION_ARGS)
288 {
289 	macaddr    *addr = PG_GETARG_MACADDR_P(0);
290 	macaddr    *result;
291 
292 	result = (macaddr *) palloc(sizeof(macaddr));
293 	result->a = ~addr->a;
294 	result->b = ~addr->b;
295 	result->c = ~addr->c;
296 	result->d = ~addr->d;
297 	result->e = ~addr->e;
298 	result->f = ~addr->f;
299 	PG_RETURN_MACADDR_P(result);
300 }
301 
302 Datum
303 macaddr_and(PG_FUNCTION_ARGS)
304 {
305 	macaddr    *addr1 = PG_GETARG_MACADDR_P(0);
306 	macaddr    *addr2 = PG_GETARG_MACADDR_P(1);
307 	macaddr    *result;
308 
309 	result = (macaddr *) palloc(sizeof(macaddr));
310 	result->a = addr1->a & addr2->a;
311 	result->b = addr1->b & addr2->b;
312 	result->c = addr1->c & addr2->c;
313 	result->d = addr1->d & addr2->d;
314 	result->e = addr1->e & addr2->e;
315 	result->f = addr1->f & addr2->f;
316 	PG_RETURN_MACADDR_P(result);
317 }
318 
319 Datum
320 macaddr_or(PG_FUNCTION_ARGS)
321 {
322 	macaddr    *addr1 = PG_GETARG_MACADDR_P(0);
323 	macaddr    *addr2 = PG_GETARG_MACADDR_P(1);
324 	macaddr    *result;
325 
326 	result = (macaddr *) palloc(sizeof(macaddr));
327 	result->a = addr1->a | addr2->a;
328 	result->b = addr1->b | addr2->b;
329 	result->c = addr1->c | addr2->c;
330 	result->d = addr1->d | addr2->d;
331 	result->e = addr1->e | addr2->e;
332 	result->f = addr1->f | addr2->f;
333 	PG_RETURN_MACADDR_P(result);
334 }
335 
336 /*
337  *	Truncation function to allow comparing mac manufacturers.
338  *	From suggestion by Alex Pilosov <alex@pilosoft.com>
339  */
340 Datum
341 macaddr_trunc(PG_FUNCTION_ARGS)
342 {
343 	macaddr    *addr = PG_GETARG_MACADDR_P(0);
344 	macaddr    *result;
345 
346 	result = (macaddr *) palloc(sizeof(macaddr));
347 
348 	result->a = addr->a;
349 	result->b = addr->b;
350 	result->c = addr->c;
351 	result->d = 0;
352 	result->e = 0;
353 	result->f = 0;
354 
355 	PG_RETURN_MACADDR_P(result);
356 }
357 
358 /*
359  * SortSupport strategy function. Populates a SortSupport struct with the
360  * information necessary to use comparison by abbreviated keys.
361  */
362 Datum
363 macaddr_sortsupport(PG_FUNCTION_ARGS)
364 {
365 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
366 
367 	ssup->comparator = macaddr_fast_cmp;
368 	ssup->ssup_extra = NULL;
369 
370 	if (ssup->abbreviate)
371 	{
372 		macaddr_sortsupport_state *uss;
373 		MemoryContext oldcontext;
374 
375 		oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
376 
377 		uss = palloc(sizeof(macaddr_sortsupport_state));
378 		uss->input_count = 0;
379 		uss->estimating = true;
380 		initHyperLogLog(&uss->abbr_card, 10);
381 
382 		ssup->ssup_extra = uss;
383 
384 		ssup->comparator = macaddr_cmp_abbrev;
385 		ssup->abbrev_converter = macaddr_abbrev_convert;
386 		ssup->abbrev_abort = macaddr_abbrev_abort;
387 		ssup->abbrev_full_comparator = macaddr_fast_cmp;
388 
389 		MemoryContextSwitchTo(oldcontext);
390 	}
391 
392 	PG_RETURN_VOID();
393 }
394 
395 /*
396  * SortSupport "traditional" comparison function. Pulls two MAC addresses from
397  * the heap and runs a standard comparison on them.
398  */
399 static int
400 macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup)
401 {
402 	macaddr    *arg1 = DatumGetMacaddrP(x);
403 	macaddr    *arg2 = DatumGetMacaddrP(y);
404 
405 	return macaddr_cmp_internal(arg1, arg2);
406 }
407 
408 /*
409  * SortSupport abbreviated key comparison function. Compares two MAC addresses
410  * quickly by treating them like integers, and without having to go to the
411  * heap.
412  */
413 static int
414 macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
415 {
416 	if (x > y)
417 		return 1;
418 	else if (x == y)
419 		return 0;
420 	else
421 		return -1;
422 }
423 
424 /*
425  * Callback for estimating effectiveness of abbreviated key optimization.
426  *
427  * We pay no attention to the cardinality of the non-abbreviated data, because
428  * there is no equality fast-path within authoritative macaddr comparator.
429  */
430 static bool
431 macaddr_abbrev_abort(int memtupcount, SortSupport ssup)
432 {
433 	macaddr_sortsupport_state *uss = ssup->ssup_extra;
434 	double		abbr_card;
435 
436 	if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
437 		return false;
438 
439 	abbr_card = estimateHyperLogLog(&uss->abbr_card);
440 
441 	/*
442 	 * If we have >100k distinct values, then even if we were sorting many
443 	 * billion rows we'd likely still break even, and the penalty of undoing
444 	 * that many rows of abbrevs would probably not be worth it. At this point
445 	 * we stop counting because we know that we're now fully committed.
446 	 */
447 	if (abbr_card > 100000.0)
448 	{
449 #ifdef TRACE_SORT
450 		if (trace_sort)
451 			elog(LOG,
452 				 "macaddr_abbrev: estimation ends at cardinality %f"
453 				 " after " INT64_FORMAT " values (%d rows)",
454 				 abbr_card, uss->input_count, memtupcount);
455 #endif
456 		uss->estimating = false;
457 		return false;
458 	}
459 
460 	/*
461 	 * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
462 	 * fudge factor allows us to abort earlier on genuinely pathological data
463 	 * where we've had exactly one abbreviated value in the first 2k
464 	 * (non-null) rows.
465 	 */
466 	if (abbr_card < uss->input_count / 2000.0 + 0.5)
467 	{
468 #ifdef TRACE_SORT
469 		if (trace_sort)
470 			elog(LOG,
471 				 "macaddr_abbrev: aborting abbreviation at cardinality %f"
472 				 " below threshold %f after " INT64_FORMAT " values (%d rows)",
473 				 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
474 				 memtupcount);
475 #endif
476 		return true;
477 	}
478 
479 #ifdef TRACE_SORT
480 	if (trace_sort)
481 		elog(LOG,
482 			 "macaddr_abbrev: cardinality %f after " INT64_FORMAT
483 			 " values (%d rows)", abbr_card, uss->input_count, memtupcount);
484 #endif
485 
486 	return false;
487 }
488 
489 /*
490  * SortSupport conversion routine. Converts original macaddr representation
491  * to abbreviated key representation.
492  *
493  * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an
494  * unsigned integer for purposes of comparison. On a 64-bit machine, there
495  * will be two zeroed bytes of padding. The integer is converted to native
496  * endianness to facilitate easy comparison.
497  */
498 static Datum
499 macaddr_abbrev_convert(Datum original, SortSupport ssup)
500 {
501 	macaddr_sortsupport_state *uss = ssup->ssup_extra;
502 	macaddr    *authoritative = DatumGetMacaddrP(original);
503 	Datum		res;
504 
505 	/*
506 	 * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of
507 	 * the MAC address in. There will be two bytes of zero padding on the end
508 	 * of the least significant bits.
509 	 */
510 #if SIZEOF_DATUM == 8
511 	memset(&res, 0, SIZEOF_DATUM);
512 	memcpy(&res, authoritative, sizeof(macaddr));
513 #else							/* SIZEOF_DATUM != 8 */
514 	memcpy(&res, authoritative, SIZEOF_DATUM);
515 #endif
516 	uss->input_count += 1;
517 
518 	/*
519 	 * Cardinality estimation. The estimate uses uint32, so on a 64-bit
520 	 * architecture, XOR the two 32-bit halves together to produce slightly
521 	 * more entropy. The two zeroed bytes won't have any practical impact on
522 	 * this operation.
523 	 */
524 	if (uss->estimating)
525 	{
526 		uint32		tmp;
527 
528 #if SIZEOF_DATUM == 8
529 		tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
530 #else							/* SIZEOF_DATUM != 8 */
531 		tmp = (uint32) res;
532 #endif
533 
534 		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
535 	}
536 
537 	/*
538 	 * Byteswap on little-endian machines.
539 	 *
540 	 * This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way
541 	 * comparator) works correctly on all platforms. Without this, the
542 	 * comparator would have to call memcmp() with a pair of pointers to the
543 	 * first byte of each abbreviated key, which is slower.
544 	 */
545 	res = DatumBigEndianToNative(res);
546 
547 	return res;
548 }
549