1 /*
2 * PostgreSQL type definitions for the INET and CIDR types.
3 *
4 * src/backend/utils/adt/network.c
5 *
6 * Jon Postel RIP 16 Oct 1998
7 */
8
9 #include "postgres.h"
10
11 #include <sys/socket.h>
12 #include <netinet/in.h>
13 #include <arpa/inet.h>
14
15 #include "access/stratnum.h"
16 #include "catalog/pg_opfamily.h"
17 #include "catalog/pg_type.h"
18 #include "common/hashfn.h"
19 #include "common/ip.h"
20 #include "lib/hyperloglog.h"
21 #include "libpq/libpq-be.h"
22 #include "libpq/pqformat.h"
23 #include "miscadmin.h"
24 #include "nodes/makefuncs.h"
25 #include "nodes/nodeFuncs.h"
26 #include "nodes/supportnodes.h"
27 #include "utils/builtins.h"
28 #include "utils/fmgroids.h"
29 #include "utils/guc.h"
30 #include "utils/inet.h"
31 #include "utils/lsyscache.h"
32 #include "utils/sortsupport.h"
33
34
35 /*
36 * An IPv4 netmask size is a value in the range of 0 - 32, which is
37 * represented with 6 bits in inet/cidr abbreviated keys where possible.
38 *
39 * An IPv4 inet/cidr abbreviated key can use up to 25 bits for subnet
40 * component.
41 */
42 #define ABBREV_BITS_INET4_NETMASK_SIZE 6
43 #define ABBREV_BITS_INET4_SUBNET 25
44
45 /* sortsupport for inet/cidr */
46 typedef struct
47 {
48 int64 input_count; /* number of non-null values seen */
49 bool estimating; /* true if estimating cardinality */
50
51 hyperLogLogState abbr_card; /* cardinality estimator */
52 } network_sortsupport_state;
53
54 static int32 network_cmp_internal(inet *a1, inet *a2);
55 static int network_fast_cmp(Datum x, Datum y, SortSupport ssup);
56 static int network_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
57 static bool network_abbrev_abort(int memtupcount, SortSupport ssup);
58 static Datum network_abbrev_convert(Datum original, SortSupport ssup);
59 static List *match_network_function(Node *leftop,
60 Node *rightop,
61 int indexarg,
62 Oid funcid,
63 Oid opfamily);
64 static List *match_network_subset(Node *leftop,
65 Node *rightop,
66 bool is_eq,
67 Oid opfamily);
68 static bool addressOK(unsigned char *a, int bits, int family);
69 static inet *internal_inetpl(inet *ip, int64 addend);
70
71
72 /*
73 * Common INET/CIDR input routine
74 */
75 static inet *
network_in(char * src,bool is_cidr)76 network_in(char *src, bool is_cidr)
77 {
78 int bits;
79 inet *dst;
80
81 dst = (inet *) palloc0(sizeof(inet));
82
83 /*
84 * First, check to see if this is an IPv6 or IPv4 address. IPv6 addresses
85 * will have a : somewhere in them (several, in fact) so if there is one
86 * present, assume it's V6, otherwise assume it's V4.
87 */
88
89 if (strchr(src, ':') != NULL)
90 ip_family(dst) = PGSQL_AF_INET6;
91 else
92 ip_family(dst) = PGSQL_AF_INET;
93
94 bits = pg_inet_net_pton(ip_family(dst), src, ip_addr(dst),
95 is_cidr ? ip_addrsize(dst) : -1);
96 if ((bits < 0) || (bits > ip_maxbits(dst)))
97 ereport(ERROR,
98 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
99 /* translator: first %s is inet or cidr */
100 errmsg("invalid input syntax for type %s: \"%s\"",
101 is_cidr ? "cidr" : "inet", src)));
102
103 /*
104 * Error check: CIDR values must not have any bits set beyond the masklen.
105 */
106 if (is_cidr)
107 {
108 if (!addressOK(ip_addr(dst), bits, ip_family(dst)))
109 ereport(ERROR,
110 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
111 errmsg("invalid cidr value: \"%s\"", src),
112 errdetail("Value has bits set to right of mask.")));
113 }
114
115 ip_bits(dst) = bits;
116 SET_INET_VARSIZE(dst);
117
118 return dst;
119 }
120
121 Datum
inet_in(PG_FUNCTION_ARGS)122 inet_in(PG_FUNCTION_ARGS)
123 {
124 char *src = PG_GETARG_CSTRING(0);
125
126 PG_RETURN_INET_P(network_in(src, false));
127 }
128
129 Datum
cidr_in(PG_FUNCTION_ARGS)130 cidr_in(PG_FUNCTION_ARGS)
131 {
132 char *src = PG_GETARG_CSTRING(0);
133
134 PG_RETURN_INET_P(network_in(src, true));
135 }
136
137
138 /*
139 * Common INET/CIDR output routine
140 */
141 static char *
network_out(inet * src,bool is_cidr)142 network_out(inet *src, bool is_cidr)
143 {
144 char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
145 char *dst;
146 int len;
147
148 dst = pg_inet_net_ntop(ip_family(src), ip_addr(src), ip_bits(src),
149 tmp, sizeof(tmp));
150 if (dst == NULL)
151 ereport(ERROR,
152 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
153 errmsg("could not format inet value: %m")));
154
155 /* For CIDR, add /n if not present */
156 if (is_cidr && strchr(tmp, '/') == NULL)
157 {
158 len = strlen(tmp);
159 snprintf(tmp + len, sizeof(tmp) - len, "/%u", ip_bits(src));
160 }
161
162 return pstrdup(tmp);
163 }
164
165 Datum
inet_out(PG_FUNCTION_ARGS)166 inet_out(PG_FUNCTION_ARGS)
167 {
168 inet *src = PG_GETARG_INET_PP(0);
169
170 PG_RETURN_CSTRING(network_out(src, false));
171 }
172
173 Datum
cidr_out(PG_FUNCTION_ARGS)174 cidr_out(PG_FUNCTION_ARGS)
175 {
176 inet *src = PG_GETARG_INET_PP(0);
177
178 PG_RETURN_CSTRING(network_out(src, true));
179 }
180
181
182 /*
183 * network_recv - converts external binary format to inet
184 *
185 * The external representation is (one byte apiece for)
186 * family, bits, is_cidr, address length, address in network byte order.
187 *
188 * Presence of is_cidr is largely for historical reasons, though it might
189 * allow some code-sharing on the client side. We send it correctly on
190 * output, but ignore the value on input.
191 */
192 static inet *
network_recv(StringInfo buf,bool is_cidr)193 network_recv(StringInfo buf, bool is_cidr)
194 {
195 inet *addr;
196 char *addrptr;
197 int bits;
198 int nb,
199 i;
200
201 /* make sure any unused bits in a CIDR value are zeroed */
202 addr = (inet *) palloc0(sizeof(inet));
203
204 ip_family(addr) = pq_getmsgbyte(buf);
205 if (ip_family(addr) != PGSQL_AF_INET &&
206 ip_family(addr) != PGSQL_AF_INET6)
207 ereport(ERROR,
208 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
209 /* translator: %s is inet or cidr */
210 errmsg("invalid address family in external \"%s\" value",
211 is_cidr ? "cidr" : "inet")));
212 bits = pq_getmsgbyte(buf);
213 if (bits < 0 || bits > ip_maxbits(addr))
214 ereport(ERROR,
215 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
216 /* translator: %s is inet or cidr */
217 errmsg("invalid bits in external \"%s\" value",
218 is_cidr ? "cidr" : "inet")));
219 ip_bits(addr) = bits;
220 i = pq_getmsgbyte(buf); /* ignore is_cidr */
221 nb = pq_getmsgbyte(buf);
222 if (nb != ip_addrsize(addr))
223 ereport(ERROR,
224 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
225 /* translator: %s is inet or cidr */
226 errmsg("invalid length in external \"%s\" value",
227 is_cidr ? "cidr" : "inet")));
228
229 addrptr = (char *) ip_addr(addr);
230 for (i = 0; i < nb; i++)
231 addrptr[i] = pq_getmsgbyte(buf);
232
233 /*
234 * Error check: CIDR values must not have any bits set beyond the masklen.
235 */
236 if (is_cidr)
237 {
238 if (!addressOK(ip_addr(addr), bits, ip_family(addr)))
239 ereport(ERROR,
240 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
241 errmsg("invalid external \"cidr\" value"),
242 errdetail("Value has bits set to right of mask.")));
243 }
244
245 SET_INET_VARSIZE(addr);
246
247 return addr;
248 }
249
250 Datum
inet_recv(PG_FUNCTION_ARGS)251 inet_recv(PG_FUNCTION_ARGS)
252 {
253 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
254
255 PG_RETURN_INET_P(network_recv(buf, false));
256 }
257
258 Datum
cidr_recv(PG_FUNCTION_ARGS)259 cidr_recv(PG_FUNCTION_ARGS)
260 {
261 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
262
263 PG_RETURN_INET_P(network_recv(buf, true));
264 }
265
266
267 /*
268 * network_send - converts inet to binary format
269 */
270 static bytea *
network_send(inet * addr,bool is_cidr)271 network_send(inet *addr, bool is_cidr)
272 {
273 StringInfoData buf;
274 char *addrptr;
275 int nb,
276 i;
277
278 pq_begintypsend(&buf);
279 pq_sendbyte(&buf, ip_family(addr));
280 pq_sendbyte(&buf, ip_bits(addr));
281 pq_sendbyte(&buf, is_cidr);
282 nb = ip_addrsize(addr);
283 if (nb < 0)
284 nb = 0;
285 pq_sendbyte(&buf, nb);
286 addrptr = (char *) ip_addr(addr);
287 for (i = 0; i < nb; i++)
288 pq_sendbyte(&buf, addrptr[i]);
289 return pq_endtypsend(&buf);
290 }
291
292 Datum
inet_send(PG_FUNCTION_ARGS)293 inet_send(PG_FUNCTION_ARGS)
294 {
295 inet *addr = PG_GETARG_INET_PP(0);
296
297 PG_RETURN_BYTEA_P(network_send(addr, false));
298 }
299
300 Datum
cidr_send(PG_FUNCTION_ARGS)301 cidr_send(PG_FUNCTION_ARGS)
302 {
303 inet *addr = PG_GETARG_INET_PP(0);
304
305 PG_RETURN_BYTEA_P(network_send(addr, true));
306 }
307
308
309 Datum
inet_to_cidr(PG_FUNCTION_ARGS)310 inet_to_cidr(PG_FUNCTION_ARGS)
311 {
312 inet *src = PG_GETARG_INET_PP(0);
313 int bits;
314
315 bits = ip_bits(src);
316
317 /* safety check */
318 if ((bits < 0) || (bits > ip_maxbits(src)))
319 elog(ERROR, "invalid inet bit length: %d", bits);
320
321 PG_RETURN_INET_P(cidr_set_masklen_internal(src, bits));
322 }
323
324 Datum
inet_set_masklen(PG_FUNCTION_ARGS)325 inet_set_masklen(PG_FUNCTION_ARGS)
326 {
327 inet *src = PG_GETARG_INET_PP(0);
328 int bits = PG_GETARG_INT32(1);
329 inet *dst;
330
331 if (bits == -1)
332 bits = ip_maxbits(src);
333
334 if ((bits < 0) || (bits > ip_maxbits(src)))
335 ereport(ERROR,
336 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
337 errmsg("invalid mask length: %d", bits)));
338
339 /* clone the original data */
340 dst = (inet *) palloc(VARSIZE_ANY(src));
341 memcpy(dst, src, VARSIZE_ANY(src));
342
343 ip_bits(dst) = bits;
344
345 PG_RETURN_INET_P(dst);
346 }
347
348 Datum
cidr_set_masklen(PG_FUNCTION_ARGS)349 cidr_set_masklen(PG_FUNCTION_ARGS)
350 {
351 inet *src = PG_GETARG_INET_PP(0);
352 int bits = PG_GETARG_INT32(1);
353
354 if (bits == -1)
355 bits = ip_maxbits(src);
356
357 if ((bits < 0) || (bits > ip_maxbits(src)))
358 ereport(ERROR,
359 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
360 errmsg("invalid mask length: %d", bits)));
361
362 PG_RETURN_INET_P(cidr_set_masklen_internal(src, bits));
363 }
364
365 /*
366 * Copy src and set mask length to 'bits' (which must be valid for the family)
367 */
368 inet *
cidr_set_masklen_internal(const inet * src,int bits)369 cidr_set_masklen_internal(const inet *src, int bits)
370 {
371 inet *dst = (inet *) palloc0(sizeof(inet));
372
373 ip_family(dst) = ip_family(src);
374 ip_bits(dst) = bits;
375
376 if (bits > 0)
377 {
378 Assert(bits <= ip_maxbits(dst));
379
380 /* Clone appropriate bytes of the address, leaving the rest 0 */
381 memcpy(ip_addr(dst), ip_addr(src), (bits + 7) / 8);
382
383 /* Clear any unwanted bits in the last partial byte */
384 if (bits % 8)
385 ip_addr(dst)[bits / 8] &= ~(0xFF >> (bits % 8));
386 }
387
388 /* Set varlena header correctly */
389 SET_INET_VARSIZE(dst);
390
391 return dst;
392 }
393
394 /*
395 * Basic comparison function for sorting and inet/cidr comparisons.
396 *
397 * Comparison is first on the common bits of the network part, then on
398 * the length of the network part, and then on the whole unmasked address.
399 * The effect is that the network part is the major sort key, and for
400 * equal network parts we sort on the host part. Note this is only sane
401 * for CIDR if address bits to the right of the mask are guaranteed zero;
402 * otherwise logically-equal CIDRs might compare different.
403 */
404
405 static int32
network_cmp_internal(inet * a1,inet * a2)406 network_cmp_internal(inet *a1, inet *a2)
407 {
408 if (ip_family(a1) == ip_family(a2))
409 {
410 int order;
411
412 order = bitncmp(ip_addr(a1), ip_addr(a2),
413 Min(ip_bits(a1), ip_bits(a2)));
414 if (order != 0)
415 return order;
416 order = ((int) ip_bits(a1)) - ((int) ip_bits(a2));
417 if (order != 0)
418 return order;
419 return bitncmp(ip_addr(a1), ip_addr(a2), ip_maxbits(a1));
420 }
421
422 return ip_family(a1) - ip_family(a2);
423 }
424
425 Datum
network_cmp(PG_FUNCTION_ARGS)426 network_cmp(PG_FUNCTION_ARGS)
427 {
428 inet *a1 = PG_GETARG_INET_PP(0);
429 inet *a2 = PG_GETARG_INET_PP(1);
430
431 PG_RETURN_INT32(network_cmp_internal(a1, a2));
432 }
433
434 /*
435 * SortSupport strategy routine
436 */
437 Datum
network_sortsupport(PG_FUNCTION_ARGS)438 network_sortsupport(PG_FUNCTION_ARGS)
439 {
440 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
441
442 ssup->comparator = network_fast_cmp;
443 ssup->ssup_extra = NULL;
444
445 if (ssup->abbreviate)
446 {
447 network_sortsupport_state *uss;
448 MemoryContext oldcontext;
449
450 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
451
452 uss = palloc(sizeof(network_sortsupport_state));
453 uss->input_count = 0;
454 uss->estimating = true;
455 initHyperLogLog(&uss->abbr_card, 10);
456
457 ssup->ssup_extra = uss;
458
459 ssup->comparator = network_cmp_abbrev;
460 ssup->abbrev_converter = network_abbrev_convert;
461 ssup->abbrev_abort = network_abbrev_abort;
462 ssup->abbrev_full_comparator = network_fast_cmp;
463
464 MemoryContextSwitchTo(oldcontext);
465 }
466
467 PG_RETURN_VOID();
468 }
469
470 /*
471 * SortSupport comparison func
472 */
473 static int
network_fast_cmp(Datum x,Datum y,SortSupport ssup)474 network_fast_cmp(Datum x, Datum y, SortSupport ssup)
475 {
476 inet *arg1 = DatumGetInetPP(x);
477 inet *arg2 = DatumGetInetPP(y);
478
479 return network_cmp_internal(arg1, arg2);
480 }
481
482 /*
483 * Abbreviated key comparison func
484 */
485 static int
network_cmp_abbrev(Datum x,Datum y,SortSupport ssup)486 network_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
487 {
488 if (x > y)
489 return 1;
490 else if (x == y)
491 return 0;
492 else
493 return -1;
494 }
495
496 /*
497 * Callback for estimating effectiveness of abbreviated key optimization.
498 *
499 * We pay no attention to the cardinality of the non-abbreviated data, because
500 * there is no equality fast-path within authoritative inet comparator.
501 */
502 static bool
network_abbrev_abort(int memtupcount,SortSupport ssup)503 network_abbrev_abort(int memtupcount, SortSupport ssup)
504 {
505 network_sortsupport_state *uss = ssup->ssup_extra;
506 double abbr_card;
507
508 if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
509 return false;
510
511 abbr_card = estimateHyperLogLog(&uss->abbr_card);
512
513 /*
514 * If we have >100k distinct values, then even if we were sorting many
515 * billion rows we'd likely still break even, and the penalty of undoing
516 * that many rows of abbrevs would probably not be worth it. At this point
517 * we stop counting because we know that we're now fully committed.
518 */
519 if (abbr_card > 100000.0)
520 {
521 #ifdef TRACE_SORT
522 if (trace_sort)
523 elog(LOG,
524 "network_abbrev: estimation ends at cardinality %f"
525 " after " INT64_FORMAT " values (%d rows)",
526 abbr_card, uss->input_count, memtupcount);
527 #endif
528 uss->estimating = false;
529 return false;
530 }
531
532 /*
533 * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
534 * fudge factor allows us to abort earlier on genuinely pathological data
535 * where we've had exactly one abbreviated value in the first 2k
536 * (non-null) rows.
537 */
538 if (abbr_card < uss->input_count / 2000.0 + 0.5)
539 {
540 #ifdef TRACE_SORT
541 if (trace_sort)
542 elog(LOG,
543 "network_abbrev: aborting abbreviation at cardinality %f"
544 " below threshold %f after " INT64_FORMAT " values (%d rows)",
545 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
546 memtupcount);
547 #endif
548 return true;
549 }
550
551 #ifdef TRACE_SORT
552 if (trace_sort)
553 elog(LOG,
554 "network_abbrev: cardinality %f after " INT64_FORMAT
555 " values (%d rows)", abbr_card, uss->input_count, memtupcount);
556 #endif
557
558 return false;
559 }
560
561 /*
562 * SortSupport conversion routine. Converts original inet/cidr representation
563 * to abbreviated key representation that works with simple 3-way unsigned int
564 * comparisons. The network_cmp_internal() rules for sorting inet/cidr datums
565 * are followed by abbreviated comparisons by an encoding scheme that
566 * conditions keys through careful use of padding.
567 *
568 * Some background: inet values have three major components (take for example
569 * the address 1.2.3.4/24):
570 *
571 * * A network, or netmasked bits (1.2.3.0).
572 * * A netmask size (/24).
573 * * A subnet, or bits outside of the netmask (0.0.0.4).
574 *
575 * cidr values are the same except that with only the first two components --
576 * all their subnet bits *must* be zero (1.2.3.0/24).
577 *
578 * IPv4 and IPv6 are identical in this makeup, with the difference being that
579 * IPv4 addresses have a maximum of 32 bits compared to IPv6's 64 bits, so in
580 * IPv6 each part may be larger.
581 *
582 * inet/cidr types compare using these sorting rules. If inequality is detected
583 * at any step, comparison is finished. If any rule is a tie, the algorithm
584 * drops through to the next to break it:
585 *
586 * 1. IPv4 always appears before IPv6.
587 * 2. Network bits are compared.
588 * 3. Netmask size is compared.
589 * 4. All bits are compared (having made it here, we know that both
590 * netmasked bits and netmask size are equal, so we're in effect only
591 * comparing subnet bits).
592 *
593 * When generating abbreviated keys for SortSupport, we pack as much as we can
594 * into a datum while ensuring that when comparing those keys as integers,
595 * these rules will be respected. Exact contents depend on IP family and datum
596 * size.
597 *
598 * IPv4
599 * ----
600 *
601 * 4 byte datums:
602 *
603 * Start with 1 bit for the IP family (IPv4 or IPv6; this bit is present in
604 * every case below) followed by all but 1 of the netmasked bits.
605 *
606 * +----------+---------------------+
607 * | 1 bit IP | 31 bits network | (1 bit network
608 * | family | (truncated) | omitted)
609 * +----------+---------------------+
610 *
611 * 8 byte datums:
612 *
613 * We have space to store all netmasked bits, followed by the netmask size,
614 * followed by 25 bits of the subnet (25 bits is usually more than enough in
615 * practice). cidr datums always have all-zero subnet bits.
616 *
617 * +----------+-----------------------+--------------+--------------------+
618 * | 1 bit IP | 32 bits network | 6 bits | 25 bits subnet |
619 * | family | (full) | network size | (truncated) |
620 * +----------+-----------------------+--------------+--------------------+
621 *
622 * IPv6
623 * ----
624 *
625 * 4 byte datums:
626 *
627 * +----------+---------------------+
628 * | 1 bit IP | 31 bits network | (up to 97 bits
629 * | family | (truncated) | network omitted)
630 * +----------+---------------------+
631 *
632 * 8 byte datums:
633 *
634 * +----------+---------------------------------+
635 * | 1 bit IP | 63 bits network | (up to 65 bits
636 * | family | (truncated) | network omitted)
637 * +----------+---------------------------------+
638 */
639 static Datum
network_abbrev_convert(Datum original,SortSupport ssup)640 network_abbrev_convert(Datum original, SortSupport ssup)
641 {
642 network_sortsupport_state *uss = ssup->ssup_extra;
643 inet *authoritative = DatumGetInetPP(original);
644 Datum res,
645 ipaddr_datum,
646 subnet_bitmask,
647 network;
648 int subnet_size;
649
650 Assert(ip_family(authoritative) == PGSQL_AF_INET ||
651 ip_family(authoritative) == PGSQL_AF_INET6);
652
653 /*
654 * Get an unsigned integer representation of the IP address by taking its
655 * first 4 or 8 bytes. Always take all 4 bytes of an IPv4 address. Take
656 * the first 8 bytes of an IPv6 address with an 8 byte datum and 4 bytes
657 * otherwise.
658 *
659 * We're consuming an array of unsigned char, so byteswap on little endian
660 * systems (an inet's ipaddr field stores the most significant byte
661 * first).
662 */
663 if (ip_family(authoritative) == PGSQL_AF_INET)
664 {
665 uint32 ipaddr_datum32;
666
667 memcpy(&ipaddr_datum32, ip_addr(authoritative), sizeof(uint32));
668
669 /* Must byteswap on little-endian machines */
670 #ifndef WORDS_BIGENDIAN
671 ipaddr_datum = pg_bswap32(ipaddr_datum32);
672 #else
673 ipaddr_datum = ipaddr_datum32;
674 #endif
675
676 /* Initialize result without setting ipfamily bit */
677 res = (Datum) 0;
678 }
679 else
680 {
681 memcpy(&ipaddr_datum, ip_addr(authoritative), sizeof(Datum));
682
683 /* Must byteswap on little-endian machines */
684 ipaddr_datum = DatumBigEndianToNative(ipaddr_datum);
685
686 /* Initialize result with ipfamily (most significant) bit set */
687 res = ((Datum) 1) << (SIZEOF_DATUM * BITS_PER_BYTE - 1);
688 }
689
690 /*
691 * ipaddr_datum must be "split": high order bits go in "network" component
692 * of abbreviated key (often with zeroed bits at the end due to masking),
693 * while low order bits go in "subnet" component when there is space for
694 * one. This is often accomplished by generating a temp datum subnet
695 * bitmask, which we may reuse later when generating the subnet bits
696 * themselves. (Note that subnet bits are only used with IPv4 datums on
697 * platforms where datum is 8 bytes.)
698 *
699 * The number of bits in subnet is used to generate a datum subnet
700 * bitmask. For example, with a /24 IPv4 datum there are 8 subnet bits
701 * (since 32 - 24 is 8), so the final subnet bitmask is B'1111 1111'. We
702 * need explicit handling for cases where the ipaddr bits cannot all fit
703 * in a datum, though (otherwise we'd incorrectly mask the network
704 * component with IPv6 values).
705 */
706 subnet_size = ip_maxbits(authoritative) - ip_bits(authoritative);
707 Assert(subnet_size >= 0);
708 /* subnet size must work with prefix ipaddr cases */
709 subnet_size %= SIZEOF_DATUM * BITS_PER_BYTE;
710 if (ip_bits(authoritative) == 0)
711 {
712 /* Fit as many ipaddr bits as possible into subnet */
713 subnet_bitmask = ((Datum) 0) - 1;
714 network = 0;
715 }
716 else if (ip_bits(authoritative) < SIZEOF_DATUM * BITS_PER_BYTE)
717 {
718 /* Split ipaddr bits between network and subnet */
719 subnet_bitmask = (((Datum) 1) << subnet_size) - 1;
720 network = ipaddr_datum & ~subnet_bitmask;
721 }
722 else
723 {
724 /* Fit as many ipaddr bits as possible into network */
725 subnet_bitmask = 0;
726 network = ipaddr_datum;
727 }
728
729 #if SIZEOF_DATUM == 8
730 if (ip_family(authoritative) == PGSQL_AF_INET)
731 {
732 /*
733 * IPv4 with 8 byte datums: keep all 32 netmasked bits, netmask size,
734 * and most significant 25 subnet bits
735 */
736 Datum netmask_size = (Datum) ip_bits(authoritative);
737 Datum subnet;
738
739 /*
740 * Shift left 31 bits: 6 bits netmask size + 25 subnet bits.
741 *
742 * We don't make any distinction between network bits that are zero
743 * due to masking and "true"/non-masked zero bits. An abbreviated
744 * comparison that is resolved by comparing a non-masked and non-zero
745 * bit to a masked/zeroed bit is effectively resolved based on
746 * ip_bits(), even though the comparison won't reach the netmask_size
747 * bits.
748 */
749 network <<= (ABBREV_BITS_INET4_NETMASK_SIZE +
750 ABBREV_BITS_INET4_SUBNET);
751
752 /* Shift size to make room for subnet bits at the end */
753 netmask_size <<= ABBREV_BITS_INET4_SUBNET;
754
755 /* Extract subnet bits without shifting them */
756 subnet = ipaddr_datum & subnet_bitmask;
757
758 /*
759 * If we have more than 25 subnet bits, we can't fit everything. Shift
760 * subnet down to avoid clobbering bits that are only supposed to be
761 * used for netmask_size.
762 *
763 * Discarding the least significant subnet bits like this is correct
764 * because abbreviated comparisons that are resolved at the subnet
765 * level must have had equal netmask_size/ip_bits() values in order to
766 * get that far.
767 */
768 if (subnet_size > ABBREV_BITS_INET4_SUBNET)
769 subnet >>= subnet_size - ABBREV_BITS_INET4_SUBNET;
770
771 /*
772 * Assemble the final abbreviated key without clobbering the ipfamily
773 * bit that must remain a zero.
774 */
775 res |= network | netmask_size | subnet;
776 }
777 else
778 #endif
779 {
780 /*
781 * 4 byte datums, or IPv6 with 8 byte datums: Use as many of the
782 * netmasked bits as will fit in final abbreviated key. Avoid
783 * clobbering the ipfamily bit that was set earlier.
784 */
785 res |= network >> 1;
786 }
787
788 uss->input_count += 1;
789
790 /* Hash abbreviated key */
791 if (uss->estimating)
792 {
793 uint32 tmp;
794
795 #if SIZEOF_DATUM == 8
796 tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
797 #else /* SIZEOF_DATUM != 8 */
798 tmp = (uint32) res;
799 #endif
800
801 addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
802 }
803
804 return res;
805 }
806
807 /*
808 * Boolean ordering tests.
809 */
810 Datum
network_lt(PG_FUNCTION_ARGS)811 network_lt(PG_FUNCTION_ARGS)
812 {
813 inet *a1 = PG_GETARG_INET_PP(0);
814 inet *a2 = PG_GETARG_INET_PP(1);
815
816 PG_RETURN_BOOL(network_cmp_internal(a1, a2) < 0);
817 }
818
819 Datum
network_le(PG_FUNCTION_ARGS)820 network_le(PG_FUNCTION_ARGS)
821 {
822 inet *a1 = PG_GETARG_INET_PP(0);
823 inet *a2 = PG_GETARG_INET_PP(1);
824
825 PG_RETURN_BOOL(network_cmp_internal(a1, a2) <= 0);
826 }
827
828 Datum
network_eq(PG_FUNCTION_ARGS)829 network_eq(PG_FUNCTION_ARGS)
830 {
831 inet *a1 = PG_GETARG_INET_PP(0);
832 inet *a2 = PG_GETARG_INET_PP(1);
833
834 PG_RETURN_BOOL(network_cmp_internal(a1, a2) == 0);
835 }
836
837 Datum
network_ge(PG_FUNCTION_ARGS)838 network_ge(PG_FUNCTION_ARGS)
839 {
840 inet *a1 = PG_GETARG_INET_PP(0);
841 inet *a2 = PG_GETARG_INET_PP(1);
842
843 PG_RETURN_BOOL(network_cmp_internal(a1, a2) >= 0);
844 }
845
846 Datum
network_gt(PG_FUNCTION_ARGS)847 network_gt(PG_FUNCTION_ARGS)
848 {
849 inet *a1 = PG_GETARG_INET_PP(0);
850 inet *a2 = PG_GETARG_INET_PP(1);
851
852 PG_RETURN_BOOL(network_cmp_internal(a1, a2) > 0);
853 }
854
855 Datum
network_ne(PG_FUNCTION_ARGS)856 network_ne(PG_FUNCTION_ARGS)
857 {
858 inet *a1 = PG_GETARG_INET_PP(0);
859 inet *a2 = PG_GETARG_INET_PP(1);
860
861 PG_RETURN_BOOL(network_cmp_internal(a1, a2) != 0);
862 }
863
864 /*
865 * MIN/MAX support functions.
866 */
867 Datum
network_smaller(PG_FUNCTION_ARGS)868 network_smaller(PG_FUNCTION_ARGS)
869 {
870 inet *a1 = PG_GETARG_INET_PP(0);
871 inet *a2 = PG_GETARG_INET_PP(1);
872
873 if (network_cmp_internal(a1, a2) < 0)
874 PG_RETURN_INET_P(a1);
875 else
876 PG_RETURN_INET_P(a2);
877 }
878
879 Datum
network_larger(PG_FUNCTION_ARGS)880 network_larger(PG_FUNCTION_ARGS)
881 {
882 inet *a1 = PG_GETARG_INET_PP(0);
883 inet *a2 = PG_GETARG_INET_PP(1);
884
885 if (network_cmp_internal(a1, a2) > 0)
886 PG_RETURN_INET_P(a1);
887 else
888 PG_RETURN_INET_P(a2);
889 }
890
891 /*
892 * Support function for hash indexes on inet/cidr.
893 */
894 Datum
hashinet(PG_FUNCTION_ARGS)895 hashinet(PG_FUNCTION_ARGS)
896 {
897 inet *addr = PG_GETARG_INET_PP(0);
898 int addrsize = ip_addrsize(addr);
899
900 /* XXX this assumes there are no pad bytes in the data structure */
901 return hash_any((unsigned char *) VARDATA_ANY(addr), addrsize + 2);
902 }
903
904 Datum
hashinetextended(PG_FUNCTION_ARGS)905 hashinetextended(PG_FUNCTION_ARGS)
906 {
907 inet *addr = PG_GETARG_INET_PP(0);
908 int addrsize = ip_addrsize(addr);
909
910 return hash_any_extended((unsigned char *) VARDATA_ANY(addr), addrsize + 2,
911 PG_GETARG_INT64(1));
912 }
913
914 /*
915 * Boolean network-inclusion tests.
916 */
917 Datum
network_sub(PG_FUNCTION_ARGS)918 network_sub(PG_FUNCTION_ARGS)
919 {
920 inet *a1 = PG_GETARG_INET_PP(0);
921 inet *a2 = PG_GETARG_INET_PP(1);
922
923 if (ip_family(a1) == ip_family(a2))
924 {
925 PG_RETURN_BOOL(ip_bits(a1) > ip_bits(a2) &&
926 bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a2)) == 0);
927 }
928
929 PG_RETURN_BOOL(false);
930 }
931
932 Datum
network_subeq(PG_FUNCTION_ARGS)933 network_subeq(PG_FUNCTION_ARGS)
934 {
935 inet *a1 = PG_GETARG_INET_PP(0);
936 inet *a2 = PG_GETARG_INET_PP(1);
937
938 if (ip_family(a1) == ip_family(a2))
939 {
940 PG_RETURN_BOOL(ip_bits(a1) >= ip_bits(a2) &&
941 bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a2)) == 0);
942 }
943
944 PG_RETURN_BOOL(false);
945 }
946
947 Datum
network_sup(PG_FUNCTION_ARGS)948 network_sup(PG_FUNCTION_ARGS)
949 {
950 inet *a1 = PG_GETARG_INET_PP(0);
951 inet *a2 = PG_GETARG_INET_PP(1);
952
953 if (ip_family(a1) == ip_family(a2))
954 {
955 PG_RETURN_BOOL(ip_bits(a1) < ip_bits(a2) &&
956 bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a1)) == 0);
957 }
958
959 PG_RETURN_BOOL(false);
960 }
961
962 Datum
network_supeq(PG_FUNCTION_ARGS)963 network_supeq(PG_FUNCTION_ARGS)
964 {
965 inet *a1 = PG_GETARG_INET_PP(0);
966 inet *a2 = PG_GETARG_INET_PP(1);
967
968 if (ip_family(a1) == ip_family(a2))
969 {
970 PG_RETURN_BOOL(ip_bits(a1) <= ip_bits(a2) &&
971 bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a1)) == 0);
972 }
973
974 PG_RETURN_BOOL(false);
975 }
976
977 Datum
network_overlap(PG_FUNCTION_ARGS)978 network_overlap(PG_FUNCTION_ARGS)
979 {
980 inet *a1 = PG_GETARG_INET_PP(0);
981 inet *a2 = PG_GETARG_INET_PP(1);
982
983 if (ip_family(a1) == ip_family(a2))
984 {
985 PG_RETURN_BOOL(bitncmp(ip_addr(a1), ip_addr(a2),
986 Min(ip_bits(a1), ip_bits(a2))) == 0);
987 }
988
989 PG_RETURN_BOOL(false);
990 }
991
992 /*
993 * Planner support function for network subset/superset operators
994 */
995 Datum
network_subset_support(PG_FUNCTION_ARGS)996 network_subset_support(PG_FUNCTION_ARGS)
997 {
998 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
999 Node *ret = NULL;
1000
1001 if (IsA(rawreq, SupportRequestIndexCondition))
1002 {
1003 /* Try to convert operator/function call to index conditions */
1004 SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
1005
1006 if (is_opclause(req->node))
1007 {
1008 OpExpr *clause = (OpExpr *) req->node;
1009
1010 Assert(list_length(clause->args) == 2);
1011 ret = (Node *)
1012 match_network_function((Node *) linitial(clause->args),
1013 (Node *) lsecond(clause->args),
1014 req->indexarg,
1015 req->funcid,
1016 req->opfamily);
1017 }
1018 else if (is_funcclause(req->node)) /* be paranoid */
1019 {
1020 FuncExpr *clause = (FuncExpr *) req->node;
1021
1022 Assert(list_length(clause->args) == 2);
1023 ret = (Node *)
1024 match_network_function((Node *) linitial(clause->args),
1025 (Node *) lsecond(clause->args),
1026 req->indexarg,
1027 req->funcid,
1028 req->opfamily);
1029 }
1030 }
1031
1032 PG_RETURN_POINTER(ret);
1033 }
1034
1035 /*
1036 * match_network_function
1037 * Try to generate an indexqual for a network subset/superset function.
1038 *
1039 * This layer is just concerned with identifying the function and swapping
1040 * the arguments if necessary.
1041 */
1042 static List *
match_network_function(Node * leftop,Node * rightop,int indexarg,Oid funcid,Oid opfamily)1043 match_network_function(Node *leftop,
1044 Node *rightop,
1045 int indexarg,
1046 Oid funcid,
1047 Oid opfamily)
1048 {
1049 switch (funcid)
1050 {
1051 case F_NETWORK_SUB:
1052 /* indexkey must be on the left */
1053 if (indexarg != 0)
1054 return NIL;
1055 return match_network_subset(leftop, rightop, false, opfamily);
1056
1057 case F_NETWORK_SUBEQ:
1058 /* indexkey must be on the left */
1059 if (indexarg != 0)
1060 return NIL;
1061 return match_network_subset(leftop, rightop, true, opfamily);
1062
1063 case F_NETWORK_SUP:
1064 /* indexkey must be on the right */
1065 if (indexarg != 1)
1066 return NIL;
1067 return match_network_subset(rightop, leftop, false, opfamily);
1068
1069 case F_NETWORK_SUPEQ:
1070 /* indexkey must be on the right */
1071 if (indexarg != 1)
1072 return NIL;
1073 return match_network_subset(rightop, leftop, true, opfamily);
1074
1075 default:
1076
1077 /*
1078 * We'd only get here if somebody attached this support function
1079 * to an unexpected function. Maybe we should complain, but for
1080 * now, do nothing.
1081 */
1082 return NIL;
1083 }
1084 }
1085
1086 /*
1087 * match_network_subset
1088 * Try to generate an indexqual for a network subset function.
1089 */
1090 static List *
match_network_subset(Node * leftop,Node * rightop,bool is_eq,Oid opfamily)1091 match_network_subset(Node *leftop,
1092 Node *rightop,
1093 bool is_eq,
1094 Oid opfamily)
1095 {
1096 List *result;
1097 Datum rightopval;
1098 Oid datatype = INETOID;
1099 Oid opr1oid;
1100 Oid opr2oid;
1101 Datum opr1right;
1102 Datum opr2right;
1103 Expr *expr;
1104
1105 /*
1106 * Can't do anything with a non-constant or NULL comparison value.
1107 *
1108 * Note that since we restrict ourselves to cases with a hard constant on
1109 * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
1110 * about verifying that.
1111 */
1112 if (!IsA(rightop, Const) ||
1113 ((Const *) rightop)->constisnull)
1114 return NIL;
1115 rightopval = ((Const *) rightop)->constvalue;
1116
1117 /*
1118 * Must check that index's opfamily supports the operators we will want to
1119 * apply.
1120 *
1121 * We insist on the opfamily being the specific one we expect, else we'd
1122 * do the wrong thing if someone were to make a reverse-sort opfamily with
1123 * the same operators.
1124 */
1125 if (opfamily != NETWORK_BTREE_FAM_OID)
1126 return NIL;
1127
1128 /*
1129 * create clause "key >= network_scan_first( rightopval )", or ">" if the
1130 * operator disallows equality.
1131 *
1132 * Note: seeing that this function supports only fixed values for opfamily
1133 * and datatype, we could just hard-wire the operator OIDs instead of
1134 * looking them up. But for now it seems better to be general.
1135 */
1136 if (is_eq)
1137 {
1138 opr1oid = get_opfamily_member(opfamily, datatype, datatype,
1139 BTGreaterEqualStrategyNumber);
1140 if (opr1oid == InvalidOid)
1141 elog(ERROR, "no >= operator for opfamily %u", opfamily);
1142 }
1143 else
1144 {
1145 opr1oid = get_opfamily_member(opfamily, datatype, datatype,
1146 BTGreaterStrategyNumber);
1147 if (opr1oid == InvalidOid)
1148 elog(ERROR, "no > operator for opfamily %u", opfamily);
1149 }
1150
1151 opr1right = network_scan_first(rightopval);
1152
1153 expr = make_opclause(opr1oid, BOOLOID, false,
1154 (Expr *) leftop,
1155 (Expr *) makeConst(datatype, -1,
1156 InvalidOid, /* not collatable */
1157 -1, opr1right,
1158 false, false),
1159 InvalidOid, InvalidOid);
1160 result = list_make1(expr);
1161
1162 /* create clause "key <= network_scan_last( rightopval )" */
1163
1164 opr2oid = get_opfamily_member(opfamily, datatype, datatype,
1165 BTLessEqualStrategyNumber);
1166 if (opr2oid == InvalidOid)
1167 elog(ERROR, "no <= operator for opfamily %u", opfamily);
1168
1169 opr2right = network_scan_last(rightopval);
1170
1171 expr = make_opclause(opr2oid, BOOLOID, false,
1172 (Expr *) leftop,
1173 (Expr *) makeConst(datatype, -1,
1174 InvalidOid, /* not collatable */
1175 -1, opr2right,
1176 false, false),
1177 InvalidOid, InvalidOid);
1178 result = lappend(result, expr);
1179
1180 return result;
1181 }
1182
1183
1184 /*
1185 * Extract data from a network datatype.
1186 */
1187 Datum
network_host(PG_FUNCTION_ARGS)1188 network_host(PG_FUNCTION_ARGS)
1189 {
1190 inet *ip = PG_GETARG_INET_PP(0);
1191 char *ptr;
1192 char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
1193
1194 /* force display of max bits, regardless of masklen... */
1195 if (pg_inet_net_ntop(ip_family(ip), ip_addr(ip), ip_maxbits(ip),
1196 tmp, sizeof(tmp)) == NULL)
1197 ereport(ERROR,
1198 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1199 errmsg("could not format inet value: %m")));
1200
1201 /* Suppress /n if present (shouldn't happen now) */
1202 if ((ptr = strchr(tmp, '/')) != NULL)
1203 *ptr = '\0';
1204
1205 PG_RETURN_TEXT_P(cstring_to_text(tmp));
1206 }
1207
1208 /*
1209 * network_show implements the inet and cidr casts to text. This is not
1210 * quite the same behavior as network_out, hence we can't drop it in favor
1211 * of CoerceViaIO.
1212 */
1213 Datum
network_show(PG_FUNCTION_ARGS)1214 network_show(PG_FUNCTION_ARGS)
1215 {
1216 inet *ip = PG_GETARG_INET_PP(0);
1217 int len;
1218 char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
1219
1220 if (pg_inet_net_ntop(ip_family(ip), ip_addr(ip), ip_maxbits(ip),
1221 tmp, sizeof(tmp)) == NULL)
1222 ereport(ERROR,
1223 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1224 errmsg("could not format inet value: %m")));
1225
1226 /* Add /n if not present (which it won't be) */
1227 if (strchr(tmp, '/') == NULL)
1228 {
1229 len = strlen(tmp);
1230 snprintf(tmp + len, sizeof(tmp) - len, "/%u", ip_bits(ip));
1231 }
1232
1233 PG_RETURN_TEXT_P(cstring_to_text(tmp));
1234 }
1235
1236 Datum
inet_abbrev(PG_FUNCTION_ARGS)1237 inet_abbrev(PG_FUNCTION_ARGS)
1238 {
1239 inet *ip = PG_GETARG_INET_PP(0);
1240 char *dst;
1241 char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
1242
1243 dst = pg_inet_net_ntop(ip_family(ip), ip_addr(ip),
1244 ip_bits(ip), tmp, sizeof(tmp));
1245
1246 if (dst == NULL)
1247 ereport(ERROR,
1248 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1249 errmsg("could not format inet value: %m")));
1250
1251 PG_RETURN_TEXT_P(cstring_to_text(tmp));
1252 }
1253
1254 Datum
cidr_abbrev(PG_FUNCTION_ARGS)1255 cidr_abbrev(PG_FUNCTION_ARGS)
1256 {
1257 inet *ip = PG_GETARG_INET_PP(0);
1258 char *dst;
1259 char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
1260
1261 dst = pg_inet_cidr_ntop(ip_family(ip), ip_addr(ip),
1262 ip_bits(ip), tmp, sizeof(tmp));
1263
1264 if (dst == NULL)
1265 ereport(ERROR,
1266 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1267 errmsg("could not format cidr value: %m")));
1268
1269 PG_RETURN_TEXT_P(cstring_to_text(tmp));
1270 }
1271
1272 Datum
network_masklen(PG_FUNCTION_ARGS)1273 network_masklen(PG_FUNCTION_ARGS)
1274 {
1275 inet *ip = PG_GETARG_INET_PP(0);
1276
1277 PG_RETURN_INT32(ip_bits(ip));
1278 }
1279
1280 Datum
network_family(PG_FUNCTION_ARGS)1281 network_family(PG_FUNCTION_ARGS)
1282 {
1283 inet *ip = PG_GETARG_INET_PP(0);
1284
1285 switch (ip_family(ip))
1286 {
1287 case PGSQL_AF_INET:
1288 PG_RETURN_INT32(4);
1289 break;
1290 case PGSQL_AF_INET6:
1291 PG_RETURN_INT32(6);
1292 break;
1293 default:
1294 PG_RETURN_INT32(0);
1295 break;
1296 }
1297 }
1298
1299 Datum
network_broadcast(PG_FUNCTION_ARGS)1300 network_broadcast(PG_FUNCTION_ARGS)
1301 {
1302 inet *ip = PG_GETARG_INET_PP(0);
1303 inet *dst;
1304 int byte;
1305 int bits;
1306 int maxbytes;
1307 unsigned char mask;
1308 unsigned char *a,
1309 *b;
1310
1311 /* make sure any unused bits are zeroed */
1312 dst = (inet *) palloc0(sizeof(inet));
1313
1314 maxbytes = ip_addrsize(ip);
1315 bits = ip_bits(ip);
1316 a = ip_addr(ip);
1317 b = ip_addr(dst);
1318
1319 for (byte = 0; byte < maxbytes; byte++)
1320 {
1321 if (bits >= 8)
1322 {
1323 mask = 0x00;
1324 bits -= 8;
1325 }
1326 else if (bits == 0)
1327 mask = 0xff;
1328 else
1329 {
1330 mask = 0xff >> bits;
1331 bits = 0;
1332 }
1333
1334 b[byte] = a[byte] | mask;
1335 }
1336
1337 ip_family(dst) = ip_family(ip);
1338 ip_bits(dst) = ip_bits(ip);
1339 SET_INET_VARSIZE(dst);
1340
1341 PG_RETURN_INET_P(dst);
1342 }
1343
1344 Datum
network_network(PG_FUNCTION_ARGS)1345 network_network(PG_FUNCTION_ARGS)
1346 {
1347 inet *ip = PG_GETARG_INET_PP(0);
1348 inet *dst;
1349 int byte;
1350 int bits;
1351 unsigned char mask;
1352 unsigned char *a,
1353 *b;
1354
1355 /* make sure any unused bits are zeroed */
1356 dst = (inet *) palloc0(sizeof(inet));
1357
1358 bits = ip_bits(ip);
1359 a = ip_addr(ip);
1360 b = ip_addr(dst);
1361
1362 byte = 0;
1363
1364 while (bits)
1365 {
1366 if (bits >= 8)
1367 {
1368 mask = 0xff;
1369 bits -= 8;
1370 }
1371 else
1372 {
1373 mask = 0xff << (8 - bits);
1374 bits = 0;
1375 }
1376
1377 b[byte] = a[byte] & mask;
1378 byte++;
1379 }
1380
1381 ip_family(dst) = ip_family(ip);
1382 ip_bits(dst) = ip_bits(ip);
1383 SET_INET_VARSIZE(dst);
1384
1385 PG_RETURN_INET_P(dst);
1386 }
1387
1388 Datum
network_netmask(PG_FUNCTION_ARGS)1389 network_netmask(PG_FUNCTION_ARGS)
1390 {
1391 inet *ip = PG_GETARG_INET_PP(0);
1392 inet *dst;
1393 int byte;
1394 int bits;
1395 unsigned char mask;
1396 unsigned char *b;
1397
1398 /* make sure any unused bits are zeroed */
1399 dst = (inet *) palloc0(sizeof(inet));
1400
1401 bits = ip_bits(ip);
1402 b = ip_addr(dst);
1403
1404 byte = 0;
1405
1406 while (bits)
1407 {
1408 if (bits >= 8)
1409 {
1410 mask = 0xff;
1411 bits -= 8;
1412 }
1413 else
1414 {
1415 mask = 0xff << (8 - bits);
1416 bits = 0;
1417 }
1418
1419 b[byte] = mask;
1420 byte++;
1421 }
1422
1423 ip_family(dst) = ip_family(ip);
1424 ip_bits(dst) = ip_maxbits(ip);
1425 SET_INET_VARSIZE(dst);
1426
1427 PG_RETURN_INET_P(dst);
1428 }
1429
1430 Datum
network_hostmask(PG_FUNCTION_ARGS)1431 network_hostmask(PG_FUNCTION_ARGS)
1432 {
1433 inet *ip = PG_GETARG_INET_PP(0);
1434 inet *dst;
1435 int byte;
1436 int bits;
1437 int maxbytes;
1438 unsigned char mask;
1439 unsigned char *b;
1440
1441 /* make sure any unused bits are zeroed */
1442 dst = (inet *) palloc0(sizeof(inet));
1443
1444 maxbytes = ip_addrsize(ip);
1445 bits = ip_maxbits(ip) - ip_bits(ip);
1446 b = ip_addr(dst);
1447
1448 byte = maxbytes - 1;
1449
1450 while (bits)
1451 {
1452 if (bits >= 8)
1453 {
1454 mask = 0xff;
1455 bits -= 8;
1456 }
1457 else
1458 {
1459 mask = 0xff >> (8 - bits);
1460 bits = 0;
1461 }
1462
1463 b[byte] = mask;
1464 byte--;
1465 }
1466
1467 ip_family(dst) = ip_family(ip);
1468 ip_bits(dst) = ip_maxbits(ip);
1469 SET_INET_VARSIZE(dst);
1470
1471 PG_RETURN_INET_P(dst);
1472 }
1473
1474 /*
1475 * Returns true if the addresses are from the same family, or false. Used to
1476 * check that we can create a network which contains both of the networks.
1477 */
1478 Datum
inet_same_family(PG_FUNCTION_ARGS)1479 inet_same_family(PG_FUNCTION_ARGS)
1480 {
1481 inet *a1 = PG_GETARG_INET_PP(0);
1482 inet *a2 = PG_GETARG_INET_PP(1);
1483
1484 PG_RETURN_BOOL(ip_family(a1) == ip_family(a2));
1485 }
1486
1487 /*
1488 * Returns the smallest CIDR which contains both of the inputs.
1489 */
1490 Datum
inet_merge(PG_FUNCTION_ARGS)1491 inet_merge(PG_FUNCTION_ARGS)
1492 {
1493 inet *a1 = PG_GETARG_INET_PP(0),
1494 *a2 = PG_GETARG_INET_PP(1);
1495 int commonbits;
1496
1497 if (ip_family(a1) != ip_family(a2))
1498 ereport(ERROR,
1499 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1500 errmsg("cannot merge addresses from different families")));
1501
1502 commonbits = bitncommon(ip_addr(a1), ip_addr(a2),
1503 Min(ip_bits(a1), ip_bits(a2)));
1504
1505 PG_RETURN_INET_P(cidr_set_masklen_internal(a1, commonbits));
1506 }
1507
1508 /*
1509 * Convert a value of a network datatype to an approximate scalar value.
1510 * This is used for estimating selectivities of inequality operators
1511 * involving network types.
1512 *
1513 * On failure (e.g., unsupported typid), set *failure to true;
1514 * otherwise, that variable is not changed.
1515 */
1516 double
convert_network_to_scalar(Datum value,Oid typid,bool * failure)1517 convert_network_to_scalar(Datum value, Oid typid, bool *failure)
1518 {
1519 switch (typid)
1520 {
1521 case INETOID:
1522 case CIDROID:
1523 {
1524 inet *ip = DatumGetInetPP(value);
1525 int len;
1526 double res;
1527 int i;
1528
1529 /*
1530 * Note that we don't use the full address for IPv6.
1531 */
1532 if (ip_family(ip) == PGSQL_AF_INET)
1533 len = 4;
1534 else
1535 len = 5;
1536
1537 res = ip_family(ip);
1538 for (i = 0; i < len; i++)
1539 {
1540 res *= 256;
1541 res += ip_addr(ip)[i];
1542 }
1543 return res;
1544 }
1545 case MACADDROID:
1546 {
1547 macaddr *mac = DatumGetMacaddrP(value);
1548 double res;
1549
1550 res = (mac->a << 16) | (mac->b << 8) | (mac->c);
1551 res *= 256 * 256 * 256;
1552 res += (mac->d << 16) | (mac->e << 8) | (mac->f);
1553 return res;
1554 }
1555 case MACADDR8OID:
1556 {
1557 macaddr8 *mac = DatumGetMacaddr8P(value);
1558 double res;
1559
1560 res = (mac->a << 24) | (mac->b << 16) | (mac->c << 8) | (mac->d);
1561 res *= ((double) 256) * 256 * 256 * 256;
1562 res += (mac->e << 24) | (mac->f << 16) | (mac->g << 8) | (mac->h);
1563 return res;
1564 }
1565 }
1566
1567 *failure = true;
1568 return 0;
1569 }
1570
1571 /*
1572 * int
1573 * bitncmp(l, r, n)
1574 * compare bit masks l and r, for n bits.
1575 * return:
1576 * <0, >0, or 0 in the libc tradition.
1577 * note:
1578 * network byte order assumed. this means 192.5.5.240/28 has
1579 * 0x11110000 in its fourth octet.
1580 * author:
1581 * Paul Vixie (ISC), June 1996
1582 */
1583 int
bitncmp(const unsigned char * l,const unsigned char * r,int n)1584 bitncmp(const unsigned char *l, const unsigned char *r, int n)
1585 {
1586 unsigned int lb,
1587 rb;
1588 int x,
1589 b;
1590
1591 b = n / 8;
1592 x = memcmp(l, r, b);
1593 if (x || (n % 8) == 0)
1594 return x;
1595
1596 lb = l[b];
1597 rb = r[b];
1598 for (b = n % 8; b > 0; b--)
1599 {
1600 if (IS_HIGHBIT_SET(lb) != IS_HIGHBIT_SET(rb))
1601 {
1602 if (IS_HIGHBIT_SET(lb))
1603 return 1;
1604 return -1;
1605 }
1606 lb <<= 1;
1607 rb <<= 1;
1608 }
1609 return 0;
1610 }
1611
1612 /*
1613 * bitncommon: compare bit masks l and r, for up to n bits.
1614 *
1615 * Returns the number of leading bits that match (0 to n).
1616 */
1617 int
bitncommon(const unsigned char * l,const unsigned char * r,int n)1618 bitncommon(const unsigned char *l, const unsigned char *r, int n)
1619 {
1620 int byte,
1621 nbits;
1622
1623 /* number of bits to examine in last byte */
1624 nbits = n % 8;
1625
1626 /* check whole bytes */
1627 for (byte = 0; byte < n / 8; byte++)
1628 {
1629 if (l[byte] != r[byte])
1630 {
1631 /* at least one bit in the last byte is not common */
1632 nbits = 7;
1633 break;
1634 }
1635 }
1636
1637 /* check bits in last partial byte */
1638 if (nbits != 0)
1639 {
1640 /* calculate diff of first non-matching bytes */
1641 unsigned int diff = l[byte] ^ r[byte];
1642
1643 /* compare the bits from the most to the least */
1644 while ((diff >> (8 - nbits)) != 0)
1645 nbits--;
1646 }
1647
1648 return (8 * byte) + nbits;
1649 }
1650
1651
1652 /*
1653 * Verify a CIDR address is OK (doesn't have bits set past the masklen)
1654 */
1655 static bool
addressOK(unsigned char * a,int bits,int family)1656 addressOK(unsigned char *a, int bits, int family)
1657 {
1658 int byte;
1659 int nbits;
1660 int maxbits;
1661 int maxbytes;
1662 unsigned char mask;
1663
1664 if (family == PGSQL_AF_INET)
1665 {
1666 maxbits = 32;
1667 maxbytes = 4;
1668 }
1669 else
1670 {
1671 maxbits = 128;
1672 maxbytes = 16;
1673 }
1674 Assert(bits <= maxbits);
1675
1676 if (bits == maxbits)
1677 return true;
1678
1679 byte = bits / 8;
1680
1681 nbits = bits % 8;
1682 mask = 0xff;
1683 if (bits != 0)
1684 mask >>= nbits;
1685
1686 while (byte < maxbytes)
1687 {
1688 if ((a[byte] & mask) != 0)
1689 return false;
1690 mask = 0xff;
1691 byte++;
1692 }
1693
1694 return true;
1695 }
1696
1697
1698 /*
1699 * These functions are used by planner to generate indexscan limits
1700 * for clauses a << b and a <<= b
1701 */
1702
1703 /* return the minimal value for an IP on a given network */
1704 Datum
network_scan_first(Datum in)1705 network_scan_first(Datum in)
1706 {
1707 return DirectFunctionCall1(network_network, in);
1708 }
1709
1710 /*
1711 * return "last" IP on a given network. It's the broadcast address,
1712 * however, masklen has to be set to its max bits, since
1713 * 192.168.0.255/24 is considered less than 192.168.0.255/32
1714 *
1715 * inet_set_masklen() hacked to max out the masklength to 128 for IPv6
1716 * and 32 for IPv4 when given '-1' as argument.
1717 */
1718 Datum
network_scan_last(Datum in)1719 network_scan_last(Datum in)
1720 {
1721 return DirectFunctionCall2(inet_set_masklen,
1722 DirectFunctionCall1(network_broadcast, in),
1723 Int32GetDatum(-1));
1724 }
1725
1726
1727 /*
1728 * IP address that the client is connecting from (NULL if Unix socket)
1729 */
1730 Datum
inet_client_addr(PG_FUNCTION_ARGS)1731 inet_client_addr(PG_FUNCTION_ARGS)
1732 {
1733 Port *port = MyProcPort;
1734 char remote_host[NI_MAXHOST];
1735 int ret;
1736
1737 if (port == NULL)
1738 PG_RETURN_NULL();
1739
1740 switch (port->raddr.addr.ss_family)
1741 {
1742 case AF_INET:
1743 #ifdef HAVE_IPV6
1744 case AF_INET6:
1745 #endif
1746 break;
1747 default:
1748 PG_RETURN_NULL();
1749 }
1750
1751 remote_host[0] = '\0';
1752
1753 ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
1754 remote_host, sizeof(remote_host),
1755 NULL, 0,
1756 NI_NUMERICHOST | NI_NUMERICSERV);
1757 if (ret != 0)
1758 PG_RETURN_NULL();
1759
1760 clean_ipv6_addr(port->raddr.addr.ss_family, remote_host);
1761
1762 PG_RETURN_INET_P(network_in(remote_host, false));
1763 }
1764
1765
1766 /*
1767 * port that the client is connecting from (NULL if Unix socket)
1768 */
1769 Datum
inet_client_port(PG_FUNCTION_ARGS)1770 inet_client_port(PG_FUNCTION_ARGS)
1771 {
1772 Port *port = MyProcPort;
1773 char remote_port[NI_MAXSERV];
1774 int ret;
1775
1776 if (port == NULL)
1777 PG_RETURN_NULL();
1778
1779 switch (port->raddr.addr.ss_family)
1780 {
1781 case AF_INET:
1782 #ifdef HAVE_IPV6
1783 case AF_INET6:
1784 #endif
1785 break;
1786 default:
1787 PG_RETURN_NULL();
1788 }
1789
1790 remote_port[0] = '\0';
1791
1792 ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
1793 NULL, 0,
1794 remote_port, sizeof(remote_port),
1795 NI_NUMERICHOST | NI_NUMERICSERV);
1796 if (ret != 0)
1797 PG_RETURN_NULL();
1798
1799 PG_RETURN_DATUM(DirectFunctionCall1(int4in, CStringGetDatum(remote_port)));
1800 }
1801
1802
1803 /*
1804 * IP address that the server accepted the connection on (NULL if Unix socket)
1805 */
1806 Datum
inet_server_addr(PG_FUNCTION_ARGS)1807 inet_server_addr(PG_FUNCTION_ARGS)
1808 {
1809 Port *port = MyProcPort;
1810 char local_host[NI_MAXHOST];
1811 int ret;
1812
1813 if (port == NULL)
1814 PG_RETURN_NULL();
1815
1816 switch (port->laddr.addr.ss_family)
1817 {
1818 case AF_INET:
1819 #ifdef HAVE_IPV6
1820 case AF_INET6:
1821 #endif
1822 break;
1823 default:
1824 PG_RETURN_NULL();
1825 }
1826
1827 local_host[0] = '\0';
1828
1829 ret = pg_getnameinfo_all(&port->laddr.addr, port->laddr.salen,
1830 local_host, sizeof(local_host),
1831 NULL, 0,
1832 NI_NUMERICHOST | NI_NUMERICSERV);
1833 if (ret != 0)
1834 PG_RETURN_NULL();
1835
1836 clean_ipv6_addr(port->laddr.addr.ss_family, local_host);
1837
1838 PG_RETURN_INET_P(network_in(local_host, false));
1839 }
1840
1841
1842 /*
1843 * port that the server accepted the connection on (NULL if Unix socket)
1844 */
1845 Datum
inet_server_port(PG_FUNCTION_ARGS)1846 inet_server_port(PG_FUNCTION_ARGS)
1847 {
1848 Port *port = MyProcPort;
1849 char local_port[NI_MAXSERV];
1850 int ret;
1851
1852 if (port == NULL)
1853 PG_RETURN_NULL();
1854
1855 switch (port->laddr.addr.ss_family)
1856 {
1857 case AF_INET:
1858 #ifdef HAVE_IPV6
1859 case AF_INET6:
1860 #endif
1861 break;
1862 default:
1863 PG_RETURN_NULL();
1864 }
1865
1866 local_port[0] = '\0';
1867
1868 ret = pg_getnameinfo_all(&port->laddr.addr, port->laddr.salen,
1869 NULL, 0,
1870 local_port, sizeof(local_port),
1871 NI_NUMERICHOST | NI_NUMERICSERV);
1872 if (ret != 0)
1873 PG_RETURN_NULL();
1874
1875 PG_RETURN_DATUM(DirectFunctionCall1(int4in, CStringGetDatum(local_port)));
1876 }
1877
1878
1879 Datum
inetnot(PG_FUNCTION_ARGS)1880 inetnot(PG_FUNCTION_ARGS)
1881 {
1882 inet *ip = PG_GETARG_INET_PP(0);
1883 inet *dst;
1884
1885 dst = (inet *) palloc0(sizeof(inet));
1886
1887 {
1888 int nb = ip_addrsize(ip);
1889 unsigned char *pip = ip_addr(ip);
1890 unsigned char *pdst = ip_addr(dst);
1891
1892 while (nb-- > 0)
1893 pdst[nb] = ~pip[nb];
1894 }
1895 ip_bits(dst) = ip_bits(ip);
1896
1897 ip_family(dst) = ip_family(ip);
1898 SET_INET_VARSIZE(dst);
1899
1900 PG_RETURN_INET_P(dst);
1901 }
1902
1903
1904 Datum
inetand(PG_FUNCTION_ARGS)1905 inetand(PG_FUNCTION_ARGS)
1906 {
1907 inet *ip = PG_GETARG_INET_PP(0);
1908 inet *ip2 = PG_GETARG_INET_PP(1);
1909 inet *dst;
1910
1911 dst = (inet *) palloc0(sizeof(inet));
1912
1913 if (ip_family(ip) != ip_family(ip2))
1914 ereport(ERROR,
1915 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1916 errmsg("cannot AND inet values of different sizes")));
1917 else
1918 {
1919 int nb = ip_addrsize(ip);
1920 unsigned char *pip = ip_addr(ip);
1921 unsigned char *pip2 = ip_addr(ip2);
1922 unsigned char *pdst = ip_addr(dst);
1923
1924 while (nb-- > 0)
1925 pdst[nb] = pip[nb] & pip2[nb];
1926 }
1927 ip_bits(dst) = Max(ip_bits(ip), ip_bits(ip2));
1928
1929 ip_family(dst) = ip_family(ip);
1930 SET_INET_VARSIZE(dst);
1931
1932 PG_RETURN_INET_P(dst);
1933 }
1934
1935
1936 Datum
inetor(PG_FUNCTION_ARGS)1937 inetor(PG_FUNCTION_ARGS)
1938 {
1939 inet *ip = PG_GETARG_INET_PP(0);
1940 inet *ip2 = PG_GETARG_INET_PP(1);
1941 inet *dst;
1942
1943 dst = (inet *) palloc0(sizeof(inet));
1944
1945 if (ip_family(ip) != ip_family(ip2))
1946 ereport(ERROR,
1947 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1948 errmsg("cannot OR inet values of different sizes")));
1949 else
1950 {
1951 int nb = ip_addrsize(ip);
1952 unsigned char *pip = ip_addr(ip);
1953 unsigned char *pip2 = ip_addr(ip2);
1954 unsigned char *pdst = ip_addr(dst);
1955
1956 while (nb-- > 0)
1957 pdst[nb] = pip[nb] | pip2[nb];
1958 }
1959 ip_bits(dst) = Max(ip_bits(ip), ip_bits(ip2));
1960
1961 ip_family(dst) = ip_family(ip);
1962 SET_INET_VARSIZE(dst);
1963
1964 PG_RETURN_INET_P(dst);
1965 }
1966
1967
1968 static inet *
internal_inetpl(inet * ip,int64 addend)1969 internal_inetpl(inet *ip, int64 addend)
1970 {
1971 inet *dst;
1972
1973 dst = (inet *) palloc0(sizeof(inet));
1974
1975 {
1976 int nb = ip_addrsize(ip);
1977 unsigned char *pip = ip_addr(ip);
1978 unsigned char *pdst = ip_addr(dst);
1979 int carry = 0;
1980
1981 while (nb-- > 0)
1982 {
1983 carry = pip[nb] + (int) (addend & 0xFF) + carry;
1984 pdst[nb] = (unsigned char) (carry & 0xFF);
1985 carry >>= 8;
1986
1987 /*
1988 * We have to be careful about right-shifting addend because
1989 * right-shift isn't portable for negative values, while simply
1990 * dividing by 256 doesn't work (the standard rounding is in the
1991 * wrong direction, besides which there may be machines out there
1992 * that round the wrong way). So, explicitly clear the low-order
1993 * byte to remove any doubt about the correct result of the
1994 * division, and then divide rather than shift.
1995 */
1996 addend &= ~((int64) 0xFF);
1997 addend /= 0x100;
1998 }
1999
2000 /*
2001 * At this point we should have addend and carry both zero if original
2002 * addend was >= 0, or addend -1 and carry 1 if original addend was <
2003 * 0. Anything else means overflow.
2004 */
2005 if (!((addend == 0 && carry == 0) ||
2006 (addend == -1 && carry == 1)))
2007 ereport(ERROR,
2008 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2009 errmsg("result is out of range")));
2010 }
2011
2012 ip_bits(dst) = ip_bits(ip);
2013 ip_family(dst) = ip_family(ip);
2014 SET_INET_VARSIZE(dst);
2015
2016 return dst;
2017 }
2018
2019
2020 Datum
inetpl(PG_FUNCTION_ARGS)2021 inetpl(PG_FUNCTION_ARGS)
2022 {
2023 inet *ip = PG_GETARG_INET_PP(0);
2024 int64 addend = PG_GETARG_INT64(1);
2025
2026 PG_RETURN_INET_P(internal_inetpl(ip, addend));
2027 }
2028
2029
2030 Datum
inetmi_int8(PG_FUNCTION_ARGS)2031 inetmi_int8(PG_FUNCTION_ARGS)
2032 {
2033 inet *ip = PG_GETARG_INET_PP(0);
2034 int64 addend = PG_GETARG_INT64(1);
2035
2036 PG_RETURN_INET_P(internal_inetpl(ip, -addend));
2037 }
2038
2039
2040 Datum
inetmi(PG_FUNCTION_ARGS)2041 inetmi(PG_FUNCTION_ARGS)
2042 {
2043 inet *ip = PG_GETARG_INET_PP(0);
2044 inet *ip2 = PG_GETARG_INET_PP(1);
2045 int64 res = 0;
2046
2047 if (ip_family(ip) != ip_family(ip2))
2048 ereport(ERROR,
2049 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2050 errmsg("cannot subtract inet values of different sizes")));
2051 else
2052 {
2053 /*
2054 * We form the difference using the traditional complement, increment,
2055 * and add rule, with the increment part being handled by starting the
2056 * carry off at 1. If you don't think integer arithmetic is done in
2057 * two's complement, too bad.
2058 */
2059 int nb = ip_addrsize(ip);
2060 int byte = 0;
2061 unsigned char *pip = ip_addr(ip);
2062 unsigned char *pip2 = ip_addr(ip2);
2063 int carry = 1;
2064
2065 while (nb-- > 0)
2066 {
2067 int lobyte;
2068
2069 carry = pip[nb] + (~pip2[nb] & 0xFF) + carry;
2070 lobyte = carry & 0xFF;
2071 if (byte < sizeof(int64))
2072 {
2073 res |= ((int64) lobyte) << (byte * 8);
2074 }
2075 else
2076 {
2077 /*
2078 * Input wider than int64: check for overflow. All bytes to
2079 * the left of what will fit should be 0 or 0xFF, depending on
2080 * sign of the now-complete result.
2081 */
2082 if ((res < 0) ? (lobyte != 0xFF) : (lobyte != 0))
2083 ereport(ERROR,
2084 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2085 errmsg("result is out of range")));
2086 }
2087 carry >>= 8;
2088 byte++;
2089 }
2090
2091 /*
2092 * If input is narrower than int64, overflow is not possible, but we
2093 * have to do proper sign extension.
2094 */
2095 if (carry == 0 && byte < sizeof(int64))
2096 res |= ((uint64) (int64) -1) << (byte * 8);
2097 }
2098
2099 PG_RETURN_INT64(res);
2100 }
2101
2102
2103 /*
2104 * clean_ipv6_addr --- remove any '%zone' part from an IPv6 address string
2105 *
2106 * XXX This should go away someday!
2107 *
2108 * This is a kluge needed because we don't yet support zones in stored inet
2109 * values. Since the result of getnameinfo() might include a zone spec,
2110 * call this to remove it anywhere we want to feed getnameinfo's output to
2111 * network_in. Beats failing entirely.
2112 *
2113 * An alternative approach would be to let network_in ignore %-parts for
2114 * itself, but that would mean we'd silently drop zone specs in user input,
2115 * which seems not such a good idea.
2116 */
2117 void
clean_ipv6_addr(int addr_family,char * addr)2118 clean_ipv6_addr(int addr_family, char *addr)
2119 {
2120 #ifdef HAVE_IPV6
2121 if (addr_family == AF_INET6)
2122 {
2123 char *pct = strchr(addr, '%');
2124
2125 if (pct)
2126 *pct = '\0';
2127 }
2128 #endif
2129 }
2130