1 /*------------------------------------------------------------------------- 2 * 3 * mac.c 4 * PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses. 5 * 6 * Portions Copyright (c) 1998-2018, PostgreSQL Global Development Group 7 * 8 * IDENTIFICATION 9 * src/backend/utils/adt/mac.c 10 * 11 *------------------------------------------------------------------------- 12 */ 13 14 #include "postgres.h" 15 16 #include "access/hash.h" 17 #include "lib/hyperloglog.h" 18 #include "libpq/pqformat.h" 19 #include "port/pg_bswap.h" 20 #include "utils/builtins.h" 21 #include "utils/guc.h" 22 #include "utils/inet.h" 23 #include "utils/sortsupport.h" 24 25 26 /* 27 * Utility macros used for sorting and comparing: 28 */ 29 30 #define hibits(addr) \ 31 ((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c))) 32 33 #define lobits(addr) \ 34 ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f))) 35 36 /* sortsupport for macaddr */ 37 typedef struct 38 { 39 int64 input_count; /* number of non-null values seen */ 40 bool estimating; /* true if estimating cardinality */ 41 42 hyperLogLogState abbr_card; /* cardinality estimator */ 43 } macaddr_sortsupport_state; 44 45 static int macaddr_cmp_internal(macaddr *a1, macaddr *a2); 46 static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup); 47 static int macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup); 48 static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup); 49 static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup); 50 51 /* 52 * MAC address reader. Accepts several common notations. 53 */ 54 55 Datum 56 macaddr_in(PG_FUNCTION_ARGS) 57 { 58 char *str = PG_GETARG_CSTRING(0); 59 macaddr *result; 60 int a, 61 b, 62 c, 63 d, 64 e, 65 f; 66 char junk[2]; 67 int count; 68 69 /* %1s matches iff there is trailing non-whitespace garbage */ 70 71 count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s", 72 &a, &b, &c, &d, &e, &f, junk); 73 if (count != 6) 74 count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s", 75 &a, &b, &c, &d, &e, &f, junk); 76 if (count != 6) 77 count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s", 78 &a, &b, &c, &d, &e, &f, junk); 79 if (count != 6) 80 count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s", 81 &a, &b, &c, &d, &e, &f, junk); 82 if (count != 6) 83 count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s", 84 &a, &b, &c, &d, &e, &f, junk); 85 if (count != 6) 86 count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s", 87 &a, &b, &c, &d, &e, &f, junk); 88 if (count != 6) 89 count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s", 90 &a, &b, &c, &d, &e, &f, junk); 91 if (count != 6) 92 ereport(ERROR, 93 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 94 errmsg("invalid input syntax for type %s: \"%s\"", "macaddr", 95 str))); 96 97 if ((a < 0) || (a > 255) || (b < 0) || (b > 255) || 98 (c < 0) || (c > 255) || (d < 0) || (d > 255) || 99 (e < 0) || (e > 255) || (f < 0) || (f > 255)) 100 ereport(ERROR, 101 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), 102 errmsg("invalid octet value in \"macaddr\" value: \"%s\"", str))); 103 104 result = (macaddr *) palloc(sizeof(macaddr)); 105 106 result->a = a; 107 result->b = b; 108 result->c = c; 109 result->d = d; 110 result->e = e; 111 result->f = f; 112 113 PG_RETURN_MACADDR_P(result); 114 } 115 116 /* 117 * MAC address output function. Fixed format. 118 */ 119 120 Datum 121 macaddr_out(PG_FUNCTION_ARGS) 122 { 123 macaddr *addr = PG_GETARG_MACADDR_P(0); 124 char *result; 125 126 result = (char *) palloc(32); 127 128 snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x", 129 addr->a, addr->b, addr->c, addr->d, addr->e, addr->f); 130 131 PG_RETURN_CSTRING(result); 132 } 133 134 /* 135 * macaddr_recv - converts external binary format to macaddr 136 * 137 * The external representation is just the six bytes, MSB first. 138 */ 139 Datum 140 macaddr_recv(PG_FUNCTION_ARGS) 141 { 142 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); 143 macaddr *addr; 144 145 addr = (macaddr *) palloc(sizeof(macaddr)); 146 147 addr->a = pq_getmsgbyte(buf); 148 addr->b = pq_getmsgbyte(buf); 149 addr->c = pq_getmsgbyte(buf); 150 addr->d = pq_getmsgbyte(buf); 151 addr->e = pq_getmsgbyte(buf); 152 addr->f = pq_getmsgbyte(buf); 153 154 PG_RETURN_MACADDR_P(addr); 155 } 156 157 /* 158 * macaddr_send - converts macaddr to binary format 159 */ 160 Datum 161 macaddr_send(PG_FUNCTION_ARGS) 162 { 163 macaddr *addr = PG_GETARG_MACADDR_P(0); 164 StringInfoData buf; 165 166 pq_begintypsend(&buf); 167 pq_sendbyte(&buf, addr->a); 168 pq_sendbyte(&buf, addr->b); 169 pq_sendbyte(&buf, addr->c); 170 pq_sendbyte(&buf, addr->d); 171 pq_sendbyte(&buf, addr->e); 172 pq_sendbyte(&buf, addr->f); 173 PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); 174 } 175 176 177 /* 178 * Comparison function for sorting: 179 */ 180 181 static int 182 macaddr_cmp_internal(macaddr *a1, macaddr *a2) 183 { 184 if (hibits(a1) < hibits(a2)) 185 return -1; 186 else if (hibits(a1) > hibits(a2)) 187 return 1; 188 else if (lobits(a1) < lobits(a2)) 189 return -1; 190 else if (lobits(a1) > lobits(a2)) 191 return 1; 192 else 193 return 0; 194 } 195 196 Datum 197 macaddr_cmp(PG_FUNCTION_ARGS) 198 { 199 macaddr *a1 = PG_GETARG_MACADDR_P(0); 200 macaddr *a2 = PG_GETARG_MACADDR_P(1); 201 202 PG_RETURN_INT32(macaddr_cmp_internal(a1, a2)); 203 } 204 205 /* 206 * Boolean comparisons. 207 */ 208 209 Datum 210 macaddr_lt(PG_FUNCTION_ARGS) 211 { 212 macaddr *a1 = PG_GETARG_MACADDR_P(0); 213 macaddr *a2 = PG_GETARG_MACADDR_P(1); 214 215 PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) < 0); 216 } 217 218 Datum 219 macaddr_le(PG_FUNCTION_ARGS) 220 { 221 macaddr *a1 = PG_GETARG_MACADDR_P(0); 222 macaddr *a2 = PG_GETARG_MACADDR_P(1); 223 224 PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) <= 0); 225 } 226 227 Datum 228 macaddr_eq(PG_FUNCTION_ARGS) 229 { 230 macaddr *a1 = PG_GETARG_MACADDR_P(0); 231 macaddr *a2 = PG_GETARG_MACADDR_P(1); 232 233 PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) == 0); 234 } 235 236 Datum 237 macaddr_ge(PG_FUNCTION_ARGS) 238 { 239 macaddr *a1 = PG_GETARG_MACADDR_P(0); 240 macaddr *a2 = PG_GETARG_MACADDR_P(1); 241 242 PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) >= 0); 243 } 244 245 Datum 246 macaddr_gt(PG_FUNCTION_ARGS) 247 { 248 macaddr *a1 = PG_GETARG_MACADDR_P(0); 249 macaddr *a2 = PG_GETARG_MACADDR_P(1); 250 251 PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) > 0); 252 } 253 254 Datum 255 macaddr_ne(PG_FUNCTION_ARGS) 256 { 257 macaddr *a1 = PG_GETARG_MACADDR_P(0); 258 macaddr *a2 = PG_GETARG_MACADDR_P(1); 259 260 PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) != 0); 261 } 262 263 /* 264 * Support function for hash indexes on macaddr. 265 */ 266 Datum 267 hashmacaddr(PG_FUNCTION_ARGS) 268 { 269 macaddr *key = PG_GETARG_MACADDR_P(0); 270 271 return hash_any((unsigned char *) key, sizeof(macaddr)); 272 } 273 274 Datum 275 hashmacaddrextended(PG_FUNCTION_ARGS) 276 { 277 macaddr *key = PG_GETARG_MACADDR_P(0); 278 279 return hash_any_extended((unsigned char *) key, sizeof(macaddr), 280 PG_GETARG_INT64(1)); 281 } 282 283 /* 284 * Arithmetic functions: bitwise NOT, AND, OR. 285 */ 286 Datum 287 macaddr_not(PG_FUNCTION_ARGS) 288 { 289 macaddr *addr = PG_GETARG_MACADDR_P(0); 290 macaddr *result; 291 292 result = (macaddr *) palloc(sizeof(macaddr)); 293 result->a = ~addr->a; 294 result->b = ~addr->b; 295 result->c = ~addr->c; 296 result->d = ~addr->d; 297 result->e = ~addr->e; 298 result->f = ~addr->f; 299 PG_RETURN_MACADDR_P(result); 300 } 301 302 Datum 303 macaddr_and(PG_FUNCTION_ARGS) 304 { 305 macaddr *addr1 = PG_GETARG_MACADDR_P(0); 306 macaddr *addr2 = PG_GETARG_MACADDR_P(1); 307 macaddr *result; 308 309 result = (macaddr *) palloc(sizeof(macaddr)); 310 result->a = addr1->a & addr2->a; 311 result->b = addr1->b & addr2->b; 312 result->c = addr1->c & addr2->c; 313 result->d = addr1->d & addr2->d; 314 result->e = addr1->e & addr2->e; 315 result->f = addr1->f & addr2->f; 316 PG_RETURN_MACADDR_P(result); 317 } 318 319 Datum 320 macaddr_or(PG_FUNCTION_ARGS) 321 { 322 macaddr *addr1 = PG_GETARG_MACADDR_P(0); 323 macaddr *addr2 = PG_GETARG_MACADDR_P(1); 324 macaddr *result; 325 326 result = (macaddr *) palloc(sizeof(macaddr)); 327 result->a = addr1->a | addr2->a; 328 result->b = addr1->b | addr2->b; 329 result->c = addr1->c | addr2->c; 330 result->d = addr1->d | addr2->d; 331 result->e = addr1->e | addr2->e; 332 result->f = addr1->f | addr2->f; 333 PG_RETURN_MACADDR_P(result); 334 } 335 336 /* 337 * Truncation function to allow comparing mac manufacturers. 338 * From suggestion by Alex Pilosov <alex@pilosoft.com> 339 */ 340 Datum 341 macaddr_trunc(PG_FUNCTION_ARGS) 342 { 343 macaddr *addr = PG_GETARG_MACADDR_P(0); 344 macaddr *result; 345 346 result = (macaddr *) palloc(sizeof(macaddr)); 347 348 result->a = addr->a; 349 result->b = addr->b; 350 result->c = addr->c; 351 result->d = 0; 352 result->e = 0; 353 result->f = 0; 354 355 PG_RETURN_MACADDR_P(result); 356 } 357 358 /* 359 * SortSupport strategy function. Populates a SortSupport struct with the 360 * information necessary to use comparison by abbreviated keys. 361 */ 362 Datum 363 macaddr_sortsupport(PG_FUNCTION_ARGS) 364 { 365 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); 366 367 ssup->comparator = macaddr_fast_cmp; 368 ssup->ssup_extra = NULL; 369 370 if (ssup->abbreviate) 371 { 372 macaddr_sortsupport_state *uss; 373 MemoryContext oldcontext; 374 375 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); 376 377 uss = palloc(sizeof(macaddr_sortsupport_state)); 378 uss->input_count = 0; 379 uss->estimating = true; 380 initHyperLogLog(&uss->abbr_card, 10); 381 382 ssup->ssup_extra = uss; 383 384 ssup->comparator = macaddr_cmp_abbrev; 385 ssup->abbrev_converter = macaddr_abbrev_convert; 386 ssup->abbrev_abort = macaddr_abbrev_abort; 387 ssup->abbrev_full_comparator = macaddr_fast_cmp; 388 389 MemoryContextSwitchTo(oldcontext); 390 } 391 392 PG_RETURN_VOID(); 393 } 394 395 /* 396 * SortSupport "traditional" comparison function. Pulls two MAC addresses from 397 * the heap and runs a standard comparison on them. 398 */ 399 static int 400 macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup) 401 { 402 macaddr *arg1 = DatumGetMacaddrP(x); 403 macaddr *arg2 = DatumGetMacaddrP(y); 404 405 return macaddr_cmp_internal(arg1, arg2); 406 } 407 408 /* 409 * SortSupport abbreviated key comparison function. Compares two MAC addresses 410 * quickly by treating them like integers, and without having to go to the 411 * heap. 412 */ 413 static int 414 macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup) 415 { 416 if (x > y) 417 return 1; 418 else if (x == y) 419 return 0; 420 else 421 return -1; 422 } 423 424 /* 425 * Callback for estimating effectiveness of abbreviated key optimization. 426 * 427 * We pay no attention to the cardinality of the non-abbreviated data, because 428 * there is no equality fast-path within authoritative macaddr comparator. 429 */ 430 static bool 431 macaddr_abbrev_abort(int memtupcount, SortSupport ssup) 432 { 433 macaddr_sortsupport_state *uss = ssup->ssup_extra; 434 double abbr_card; 435 436 if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) 437 return false; 438 439 abbr_card = estimateHyperLogLog(&uss->abbr_card); 440 441 /* 442 * If we have >100k distinct values, then even if we were sorting many 443 * billion rows we'd likely still break even, and the penalty of undoing 444 * that many rows of abbrevs would probably not be worth it. At this point 445 * we stop counting because we know that we're now fully committed. 446 */ 447 if (abbr_card > 100000.0) 448 { 449 #ifdef TRACE_SORT 450 if (trace_sort) 451 elog(LOG, 452 "macaddr_abbrev: estimation ends at cardinality %f" 453 " after " INT64_FORMAT " values (%d rows)", 454 abbr_card, uss->input_count, memtupcount); 455 #endif 456 uss->estimating = false; 457 return false; 458 } 459 460 /* 461 * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row 462 * fudge factor allows us to abort earlier on genuinely pathological data 463 * where we've had exactly one abbreviated value in the first 2k 464 * (non-null) rows. 465 */ 466 if (abbr_card < uss->input_count / 2000.0 + 0.5) 467 { 468 #ifdef TRACE_SORT 469 if (trace_sort) 470 elog(LOG, 471 "macaddr_abbrev: aborting abbreviation at cardinality %f" 472 " below threshold %f after " INT64_FORMAT " values (%d rows)", 473 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, 474 memtupcount); 475 #endif 476 return true; 477 } 478 479 #ifdef TRACE_SORT 480 if (trace_sort) 481 elog(LOG, 482 "macaddr_abbrev: cardinality %f after " INT64_FORMAT 483 " values (%d rows)", abbr_card, uss->input_count, memtupcount); 484 #endif 485 486 return false; 487 } 488 489 /* 490 * SortSupport conversion routine. Converts original macaddr representation 491 * to abbreviated key representation. 492 * 493 * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an 494 * unsigned integer for purposes of comparison. On a 64-bit machine, there 495 * will be two zeroed bytes of padding. The integer is converted to native 496 * endianness to facilitate easy comparison. 497 */ 498 static Datum 499 macaddr_abbrev_convert(Datum original, SortSupport ssup) 500 { 501 macaddr_sortsupport_state *uss = ssup->ssup_extra; 502 macaddr *authoritative = DatumGetMacaddrP(original); 503 Datum res; 504 505 /* 506 * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of 507 * the MAC address in. There will be two bytes of zero padding on the end 508 * of the least significant bits. 509 */ 510 #if SIZEOF_DATUM == 8 511 memset(&res, 0, SIZEOF_DATUM); 512 memcpy(&res, authoritative, sizeof(macaddr)); 513 #else /* SIZEOF_DATUM != 8 */ 514 memcpy(&res, authoritative, SIZEOF_DATUM); 515 #endif 516 uss->input_count += 1; 517 518 /* 519 * Cardinality estimation. The estimate uses uint32, so on a 64-bit 520 * architecture, XOR the two 32-bit halves together to produce slightly 521 * more entropy. The two zeroed bytes won't have any practical impact on 522 * this operation. 523 */ 524 if (uss->estimating) 525 { 526 uint32 tmp; 527 528 #if SIZEOF_DATUM == 8 529 tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); 530 #else /* SIZEOF_DATUM != 8 */ 531 tmp = (uint32) res; 532 #endif 533 534 addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); 535 } 536 537 /* 538 * Byteswap on little-endian machines. 539 * 540 * This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way 541 * comparator) works correctly on all platforms. Without this, the 542 * comparator would have to call memcmp() with a pair of pointers to the 543 * first byte of each abbreviated key, which is slower. 544 */ 545 res = DatumBigEndianToNative(res); 546 547 return res; 548 } 549