1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright 2009 Paul Ramsey <pramsey@cleverelephant.ca>
22  * Copyright 2017-2019 Darafei Praliaskouski <me@komzpa.net>
23  *
24  **********************************************************************/
25 
26 /*
27 ** R-Tree Bibliography
28 **
29 ** [1] A. Guttman. R-tree: a dynamic index structure for spatial searching.
30 **     Proceedings of the ACM SIGMOD Conference, pp 47-57, June 1984.
31 ** [2] C.-H. Ang and T. C. Tan. New linear node splitting algorithm for
32 **     R-Trees. Advances in Spatial Databases - 5th International Symposium,
33 **     1997
34 ** [3] N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger. The R*tree: an
35 **     efficient and robust access method for points and rectangles.
36 **     Proceedings of the ACM SIGMOD Conference. June 1990.
37 */
38 
39 #include "postgres.h"
40 #include "access/gist.h" /* For GiST */
41 #include "access/itup.h"
42 #include "access/skey.h"
43 
44 #include "../postgis_config.h"
45 
46 #include "liblwgeom.h"        /* For standard geometry types. */
47 #include "lwgeom_pg.h"        /* For debugging macros. */
48 #include "gserialized_gist.h" /* For utility functions. */
49 #include "geography.h"
50 
51 #include <assert.h>
52 #include <math.h>
53 
54 /*
55 ** When is a node split not so good? If more than 90% of the entries
56 ** end up in one of the children.
57 */
58 #define LIMIT_RATIO 0.1
59 
60 /*
61 ** For debugging
62 */
63 #if POSTGIS_DEBUG_LEVEL > 0
64 static int geog_counter_leaf = 0;
65 static int geog_counter_internal = 0;
66 #endif
67 
68 /*
69 ** ND Index key type stub prototypes
70 */
71 Datum gidx_out(PG_FUNCTION_ARGS);
72 Datum gidx_in(PG_FUNCTION_ARGS);
73 
74 /*
75 ** ND GiST prototypes
76 */
77 Datum gserialized_gist_consistent(PG_FUNCTION_ARGS);
78 Datum gserialized_gist_compress(PG_FUNCTION_ARGS);
79 Datum gserialized_gist_decompress(PG_FUNCTION_ARGS);
80 Datum gserialized_gist_penalty(PG_FUNCTION_ARGS);
81 Datum gserialized_gist_picksplit(PG_FUNCTION_ARGS);
82 Datum gserialized_gist_union(PG_FUNCTION_ARGS);
83 Datum gserialized_gist_same(PG_FUNCTION_ARGS);
84 Datum gserialized_gist_distance(PG_FUNCTION_ARGS);
85 Datum gserialized_gist_geog_distance(PG_FUNCTION_ARGS);
86 
87 /*
88 ** ND Operator prototypes
89 */
90 Datum gserialized_overlaps(PG_FUNCTION_ARGS);
91 Datum gserialized_gidx_geom_overlaps(PG_FUNCTION_ARGS);
92 Datum gserialized_gidx_gidx_overlaps(PG_FUNCTION_ARGS);
93 Datum gserialized_contains(PG_FUNCTION_ARGS);
94 Datum gserialized_gidx_geom_contains(PG_FUNCTION_ARGS);
95 Datum gserialized_gidx_gidx_contains(PG_FUNCTION_ARGS);
96 Datum gserialized_within(PG_FUNCTION_ARGS);
97 Datum gserialized_gidx_geom_within(PG_FUNCTION_ARGS);
98 Datum gserialized_gidx_gidx_within(PG_FUNCTION_ARGS);
99 Datum gserialized_same(PG_FUNCTION_ARGS);
100 Datum gserialized_gidx_geom_same(PG_FUNCTION_ARGS);
101 Datum gserialized_gidx_gidx_same(PG_FUNCTION_ARGS);
102 Datum gserialized_distance_nd(PG_FUNCTION_ARGS);
103 
104 /*
105 ** GIDX true/false test function type
106 */
107 typedef bool (*gidx_predicate)(GIDX *a, GIDX *b);
108 
109 /* Allocate a new copy of GIDX */
110 GIDX *
gidx_copy(GIDX * b)111 gidx_copy(GIDX *b)
112 {
113 	GIDX *c = (GIDX *)palloc(VARSIZE(b));
114 	POSTGIS_DEBUGF(5, "copied gidx (%p) to gidx (%p)", b, c);
115 	memcpy((void *)c, (void *)b, VARSIZE(b));
116 	return c;
117 }
118 
119 /* Ensure all minimums are below maximums. */
120 void
gidx_validate(GIDX * b)121 gidx_validate(GIDX *b)
122 {
123 	uint32_t i;
124 	Assert(b);
125 	POSTGIS_DEBUGF(5, "validating gidx (%s)", gidx_to_string(b));
126 	for (i = 0; i < GIDX_NDIMS(b); i++)
127 	{
128 		if (GIDX_GET_MIN(b, i) > GIDX_GET_MAX(b, i))
129 		{
130 			float tmp;
131 			tmp = GIDX_GET_MIN(b, i);
132 			GIDX_SET_MIN(b, i, GIDX_GET_MAX(b, i));
133 			GIDX_SET_MAX(b, i, tmp);
134 		}
135 	}
136 	return;
137 }
138 
139 /* An "unknown" GIDX is used to represent the bounds of an EMPTY
140    geometry or other-wise unindexable geometry (like one with NaN
141    or Inf bounds) */
142 inline bool
gidx_is_unknown(const GIDX * a)143 gidx_is_unknown(const GIDX *a)
144 {
145 	size_t size = VARSIZE_ANY_EXHDR(a);
146 	/* "unknown" gidx objects have a too-small size of one float */
147 	if (size <= 0.0)
148 		return true;
149 	return false;
150 }
151 
152 void
gidx_set_unknown(GIDX * a)153 gidx_set_unknown(GIDX *a)
154 {
155 	SET_VARSIZE(a, VARHDRSZ);
156 }
157 
158 /* Enlarge b_union to contain b_new. */
159 void
gidx_merge(GIDX ** b_union,GIDX * b_new)160 gidx_merge(GIDX **b_union, GIDX *b_new)
161 {
162 	int i, dims_union, dims_new;
163 	Assert(b_union);
164 	Assert(*b_union);
165 	Assert(b_new);
166 
167 	/* Can't merge an unknown into any thing */
168 	/* Q: Unknown is 0 dimensions. Should we reset result to unknown instead? (ticket #4232) */
169 	if (gidx_is_unknown(b_new))
170 		return;
171 
172 	/* Merge of unknown and known is known */
173 	/* Q: Unknown is 0 dimensions. Should we never modify unknown instead? (ticket #4232) */
174 	if (gidx_is_unknown(*b_union))
175 	{
176 		pfree(*b_union);
177 		*b_union = gidx_copy(b_new);
178 		return;
179 	}
180 
181 	dims_union = GIDX_NDIMS(*b_union);
182 	dims_new = GIDX_NDIMS(b_new);
183 
184 	/* Shrink unshared dimensions.
185 	 * Unset dimension is essentially [-FLT_MAX, FLT_MAX], so we can either trim it or reset to that range.*/
186 	if (dims_new < dims_union)
187 	{
188 		*b_union = (GIDX *)repalloc(*b_union, GIDX_SIZE(dims_new));
189 		SET_VARSIZE(*b_union, VARSIZE(b_new));
190 		dims_union = dims_new;
191 	}
192 
193 	for (i = 0; i < dims_union; i++)
194 	{
195 		/* Adjust minimums */
196 		GIDX_SET_MIN(*b_union, i, Min(GIDX_GET_MIN(*b_union, i), GIDX_GET_MIN(b_new, i)));
197 		/* Adjust maximums */
198 		GIDX_SET_MAX(*b_union, i, Max(GIDX_GET_MAX(*b_union, i), GIDX_GET_MAX(b_new, i)));
199 	}
200 }
201 
202 /* Calculate the volume (in n-d units) of the GIDX */
203 static float
gidx_volume(GIDX * a)204 gidx_volume(GIDX *a)
205 {
206 	float result;
207 	uint32_t i;
208 	if (!a || gidx_is_unknown(a))
209 		return 0.0;
210 	result = GIDX_GET_MAX(a, 0) - GIDX_GET_MIN(a, 0);
211 	for (i = 1; i < GIDX_NDIMS(a); i++)
212 		result *= (GIDX_GET_MAX(a, i) - GIDX_GET_MIN(a, i));
213 	POSTGIS_DEBUGF(5, "calculated volume of %s as %.8g", gidx_to_string(a), result);
214 	return result;
215 }
216 
217 /* Calculate the edge of the GIDX */
218 static float
gidx_edge(GIDX * a)219 gidx_edge(GIDX *a)
220 {
221 	float result;
222 	uint32_t i;
223 	if (!a || gidx_is_unknown(a))
224 		return 0.0;
225 	result = GIDX_GET_MAX(a, 0) - GIDX_GET_MIN(a, 0);
226 	for (i = 1; i < GIDX_NDIMS(a); i++)
227 		result += (GIDX_GET_MAX(a, i) - GIDX_GET_MIN(a, i));
228 	POSTGIS_DEBUGF(5, "calculated edge of %s as %.8g", gidx_to_string(a), result);
229 	return result;
230 }
231 
232 /* Ensure the first argument has the higher dimensionality. */
233 static void
gidx_dimensionality_check(GIDX ** a,GIDX ** b)234 gidx_dimensionality_check(GIDX **a, GIDX **b)
235 {
236 	if (GIDX_NDIMS(*a) < GIDX_NDIMS(*b))
237 	{
238 		GIDX *tmp = *b;
239 		*b = *a;
240 		*a = tmp;
241 	}
242 }
243 
244 /* Calculate the volume of the union of the boxes. Avoids creating an intermediate box. */
245 static float
gidx_union_volume(GIDX * a,GIDX * b)246 gidx_union_volume(GIDX *a, GIDX *b)
247 {
248 	float result;
249 	int i;
250 	int ndims_a, ndims_b;
251 
252 	POSTGIS_DEBUG(5, "entered function");
253 
254 	if (!a && !b)
255 	{
256 		elog(ERROR, "gidx_union_volume received two null arguments");
257 		return 0.0;
258 	}
259 
260 	if (!a || gidx_is_unknown(a))
261 		return gidx_volume(b);
262 
263 	if (!b || gidx_is_unknown(b))
264 		return gidx_volume(a);
265 
266 	if (gidx_is_unknown(a) && gidx_is_unknown(b))
267 		return 0.0;
268 
269 	/* Ensure 'a' has the most dimensions. */
270 	gidx_dimensionality_check(&a, &b);
271 
272 	ndims_a = GIDX_NDIMS(a);
273 	ndims_b = GIDX_NDIMS(b);
274 
275 	/* Initialize with maximal length of first dimension. */
276 	result = Max(GIDX_GET_MAX(a, 0), GIDX_GET_MAX(b, 0)) - Min(GIDX_GET_MIN(a, 0), GIDX_GET_MIN(b, 0));
277 
278 	/* Multiply by maximal length of remaining dimensions. */
279 	for (i = 1; i < ndims_b; i++)
280 		result *= (Max(GIDX_GET_MAX(a, i), GIDX_GET_MAX(b, i)) - Min(GIDX_GET_MIN(a, i), GIDX_GET_MIN(b, i)));
281 
282 	/* Add in dimensions of higher dimensional box. */
283 	for (i = ndims_b; i < ndims_a; i++)
284 		result *= (GIDX_GET_MAX(a, i) - GIDX_GET_MIN(a, i));
285 
286 	POSTGIS_DEBUGF(5, "volume( %s union %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result);
287 
288 	return result;
289 }
290 
291 /* Calculate the edge of the union of the boxes. Avoids creating an intermediate box. */
292 static float
gidx_union_edge(GIDX * a,GIDX * b)293 gidx_union_edge(GIDX *a, GIDX *b)
294 {
295 	float result;
296 	int i;
297 	int ndims_a, ndims_b;
298 
299 	POSTGIS_DEBUG(5, "entered function");
300 
301 	if (!a && !b)
302 	{
303 		elog(ERROR, "gidx_union_edge received two null arguments");
304 		return 0.0;
305 	}
306 
307 	if (!a || gidx_is_unknown(a))
308 		return gidx_volume(b);
309 
310 	if (!b || gidx_is_unknown(b))
311 		return gidx_volume(a);
312 
313 	if (gidx_is_unknown(a) && gidx_is_unknown(b))
314 		return 0.0;
315 
316 	/* Ensure 'a' has the most dimensions. */
317 	gidx_dimensionality_check(&a, &b);
318 
319 	ndims_a = GIDX_NDIMS(a);
320 	ndims_b = GIDX_NDIMS(b);
321 
322 	/* Initialize with maximal length of first dimension. */
323 	result = Max(GIDX_GET_MAX(a, 0), GIDX_GET_MAX(b, 0)) - Min(GIDX_GET_MIN(a, 0), GIDX_GET_MIN(b, 0));
324 
325 	/* Add maximal length of remaining dimensions. */
326 	for (i = 1; i < ndims_b; i++)
327 		result += (Max(GIDX_GET_MAX(a, i), GIDX_GET_MAX(b, i)) - Min(GIDX_GET_MIN(a, i), GIDX_GET_MIN(b, i)));
328 
329 	/* Add in dimensions of higher dimensional box. */
330 	for (i = ndims_b; i < ndims_a; i++)
331 		result += (GIDX_GET_MAX(a, i) - GIDX_GET_MIN(a, i));
332 
333 	POSTGIS_DEBUGF(5, "edge( %s union %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result);
334 
335 	return result;
336 }
337 
338 /* Calculate the volume of the intersection of the boxes. */
339 static float
gidx_inter_volume(GIDX * a,GIDX * b)340 gidx_inter_volume(GIDX *a, GIDX *b)
341 {
342 	uint32_t i;
343 	float result;
344 
345 	POSTGIS_DEBUG(5, "entered function");
346 
347 	if (!a || !b)
348 	{
349 		elog(ERROR, "gidx_inter_volume received a null argument");
350 		return 0.0;
351 	}
352 
353 	if (gidx_is_unknown(a) || gidx_is_unknown(b))
354 		return 0.0;
355 
356 	/* Ensure 'a' has the most dimensions. */
357 	gidx_dimensionality_check(&a, &b);
358 
359 	/* Initialize with minimal length of first dimension. */
360 	result = Min(GIDX_GET_MAX(a, 0), GIDX_GET_MAX(b, 0)) - Max(GIDX_GET_MIN(a, 0), GIDX_GET_MIN(b, 0));
361 
362 	/* If they are disjoint (max < min) then return zero. */
363 	if (result < 0.0)
364 		return 0.0;
365 
366 	/* Continue for remaining dimensions. */
367 	for (i = 1; i < GIDX_NDIMS(b); i++)
368 	{
369 		float width = Min(GIDX_GET_MAX(a, i), GIDX_GET_MAX(b, i)) - Max(GIDX_GET_MIN(a, i), GIDX_GET_MIN(b, i));
370 		if (width < 0.0)
371 			return 0.0;
372 		/* Multiply by minimal length of remaining dimensions. */
373 		result *= width;
374 	}
375 	POSTGIS_DEBUGF(5, "volume( %s intersection %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result);
376 	return result;
377 }
378 
379 /*
380 ** Overlapping GIDX box test.
381 **
382 ** Box(A) Overlaps Box(B) IFF for every dimension d:
383 **   min(A,d) <= max(B,d) && max(A,d) => min(B,d)
384 **
385 ** Any missing dimension is assumed by convention to
386 ** overlap whatever finite range available on the
387 ** other operand. See
388 ** http://lists.osgeo.org/pipermail/postgis-devel/2015-February/024757.html
389 **
390 ** Empty boxes never overlap.
391 */
392 bool
gidx_overlaps(GIDX * a,GIDX * b)393 gidx_overlaps(GIDX *a, GIDX *b)
394 {
395 	int i, dims_a, dims_b;
396 
397 	POSTGIS_DEBUG(5, "entered function");
398 
399 	if (!a || !b)
400 		return false;
401 
402 	if (gidx_is_unknown(a) || gidx_is_unknown(b))
403 		return false;
404 
405 	dims_a = GIDX_NDIMS(a);
406 	dims_b = GIDX_NDIMS(b);
407 
408 	/* For all shared dimensions min(a) > max(b) and min(b) > max(a)
409 	   Unshared dimensions do not matter */
410 	for (i = 0; i < Min(dims_a, dims_b); i++)
411 	{
412 		/* If the missing dimension was not padded with -+FLT_MAX */
413 		if (GIDX_GET_MAX(a, i) != FLT_MAX && GIDX_GET_MAX(b, i) != FLT_MAX)
414 		{
415 			if (GIDX_GET_MIN(a, i) > GIDX_GET_MAX(b, i))
416 				return false;
417 			if (GIDX_GET_MIN(b, i) > GIDX_GET_MAX(a, i))
418 				return false;
419 		}
420 	}
421 
422 	return true;
423 }
424 
425 /*
426 ** Containment GIDX test.
427 **
428 ** Box(A) CONTAINS Box(B) IFF (pt(A)LL < pt(B)LL) && (pt(A)UR > pt(B)UR)
429 */
430 bool
gidx_contains(GIDX * a,GIDX * b)431 gidx_contains(GIDX *a, GIDX *b)
432 {
433 	uint32_t i, dims_a, dims_b;
434 
435 	if (!a || !b)
436 		return false;
437 
438 	if (gidx_is_unknown(a) || gidx_is_unknown(b))
439 		return false;
440 
441 	dims_a = GIDX_NDIMS(a);
442 	dims_b = GIDX_NDIMS(b);
443 
444 	/* For all shared dimensions min(a) > min(b) and max(a) < max(b)
445 	   Unshared dimensions do not matter */
446 	for (i = 0; i < Min(dims_a, dims_b); i++)
447 	{
448 		/* If the missing dimension was not padded with -+FLT_MAX */
449 		if (GIDX_GET_MAX(a, i) != FLT_MAX && GIDX_GET_MAX(b, i) != FLT_MAX)
450 		{
451 			if (GIDX_GET_MIN(a, i) > GIDX_GET_MIN(b, i))
452 				return false;
453 			if (GIDX_GET_MAX(a, i) < GIDX_GET_MAX(b, i))
454 				return false;
455 		}
456 	}
457 
458 	return true;
459 }
460 
461 /*
462 ** Equality GIDX test.
463 **
464 ** Box(A) EQUALS Box(B) IFF (pt(A)LL == pt(B)LL) && (pt(A)UR == pt(B)UR)
465 */
466 bool
gidx_equals(GIDX * a,GIDX * b)467 gidx_equals(GIDX *a, GIDX *b)
468 {
469 	uint32_t i, dims_a, dims_b;
470 
471 	if (!a && !b)
472 		return true;
473 	if (!a || !b)
474 		return false;
475 
476 	if (gidx_is_unknown(a) && gidx_is_unknown(b))
477 		return true;
478 
479 	if (gidx_is_unknown(a) || gidx_is_unknown(b))
480 		return false;
481 
482 	dims_a = GIDX_NDIMS(a);
483 	dims_b = GIDX_NDIMS(b);
484 
485 	/* For all shared dimensions min(a) == min(b), max(a) == max(b)
486 	   Unshared dimensions do not matter */
487 	for (i = 0; i < Min(dims_a, dims_b); i++)
488 	{
489 		/* If the missing dimension was not padded with -+FLT_MAX */
490 		if (GIDX_GET_MAX(a, i) != FLT_MAX && GIDX_GET_MAX(b, i) != FLT_MAX)
491 		{
492 			if (GIDX_GET_MIN(a, i) != GIDX_GET_MIN(b, i))
493 				return false;
494 			if (GIDX_GET_MAX(a, i) != GIDX_GET_MAX(b, i))
495 				return false;
496 		}
497 	}
498 	return true;
499 }
500 
501 /**
502  * Support function. Based on two datums return true if
503  * they satisfy the predicate and false otherwise.
504  */
505 static int
gserialized_datum_predicate(Datum gs1,Datum gs2,gidx_predicate predicate)506 gserialized_datum_predicate(Datum gs1, Datum gs2, gidx_predicate predicate)
507 {
508 	/* Put aside some stack memory and use it for GIDX pointers. */
509 	char boxmem1[GIDX_MAX_SIZE];
510 	char boxmem2[GIDX_MAX_SIZE];
511 	GIDX *gidx1 = (GIDX *)boxmem1;
512 	GIDX *gidx2 = (GIDX *)boxmem2;
513 
514 	POSTGIS_DEBUG(3, "entered function");
515 
516 	/* Must be able to build box for each arguement (ie, not empty geometry)
517 	   and predicate function to return true. */
518 	if ((gserialized_datum_get_gidx_p(gs1, gidx1) == LW_SUCCESS) &&
519 	    (gserialized_datum_get_gidx_p(gs2, gidx2) == LW_SUCCESS) && predicate(gidx1, gidx2))
520 	{
521 		POSTGIS_DEBUGF(3, "got boxes %s and %s", gidx_to_string(gidx1), gidx_to_string(gidx2));
522 		return LW_TRUE;
523 	}
524 	return LW_FALSE;
525 }
526 
527 static int
gserialized_datum_predicate_gidx_geom(GIDX * gidx1,Datum gs2,gidx_predicate predicate)528 gserialized_datum_predicate_gidx_geom(GIDX *gidx1, Datum gs2, gidx_predicate predicate)
529 {
530 	/* Put aside some stack memory and use it for GIDX pointers. */
531 	char boxmem2[GIDX_MAX_SIZE];
532 	GIDX *gidx2 = (GIDX *)boxmem2;
533 
534 	POSTGIS_DEBUG(3, "entered function");
535 
536 	/* Must be able to build box for gs2 arguement (ie, not empty geometry)
537 	   and predicate function to return true. */
538 	if ((gserialized_datum_get_gidx_p(gs2, gidx2) == LW_SUCCESS) && predicate(gidx1, gidx2))
539 	{
540 		POSTGIS_DEBUGF(3, "got boxes %s and %s", gidx_to_string(gidx1), gidx_to_string(gidx2));
541 		return LW_TRUE;
542 	}
543 	return LW_FALSE;
544 }
545 
546 static int
gserialized_datum_predicate_geom_gidx(Datum gs1,GIDX * gidx2,gidx_predicate predicate)547 gserialized_datum_predicate_geom_gidx(Datum gs1, GIDX *gidx2, gidx_predicate predicate)
548 {
549 	/* Put aside some stack memory and use it for GIDX pointers. */
550 	char boxmem2[GIDX_MAX_SIZE];
551 	GIDX *gidx1 = (GIDX *)boxmem2;
552 
553 	POSTGIS_DEBUG(3, "entered function");
554 
555 	/* Must be able to build box for gs2 arguement (ie, not empty geometry)
556 	   and predicate function to return true. */
557 	if ((gserialized_datum_get_gidx_p(gs1, gidx1) == LW_SUCCESS) && predicate(gidx1, gidx2))
558 	{
559 		POSTGIS_DEBUGF(3, "got boxes %s and %s", gidx_to_string(gidx1), gidx_to_string(gidx2));
560 		return LW_TRUE;
561 	}
562 	return LW_FALSE;
563 }
564 
565 /**
566  * Calculate the box->box distance.
567  */
568 static double
gidx_distance(const GIDX * a,const GIDX * b,int m_is_time)569 gidx_distance(const GIDX *a, const GIDX *b, int m_is_time)
570 {
571 	int ndims, i;
572 	double sum = 0;
573 
574 	/* Base computation on least available dimensions */
575 	ndims = Min(GIDX_NDIMS(b), GIDX_NDIMS(a));
576 	for (i = 0; i < ndims; ++i)
577 	{
578 		double d;
579 		double amin = GIDX_GET_MIN(a, i);
580 		double amax = GIDX_GET_MAX(a, i);
581 		double bmin = GIDX_GET_MIN(b, i);
582 		double bmax = GIDX_GET_MAX(b, i);
583 		POSTGIS_DEBUGF(3, "A %g - %g", amin, amax);
584 		POSTGIS_DEBUGF(3, "B %g - %g", bmin, bmax);
585 
586 		if ((amin <= bmax && amax >= bmin))
587 		{
588 			/* overlaps */
589 			d = 0;
590 		}
591 		else if (i == 4 && m_is_time)
592 		{
593 			return FLT_MAX;
594 		}
595 		else if (bmax < amin)
596 		{
597 			/* is "left" */
598 			d = amin - bmax;
599 		}
600 		else
601 		{
602 			/* is "right" */
603 			assert(bmin > amax);
604 			d = bmin - amax;
605 		}
606 		if (!isfinite(d))
607 		{
608 			/* Can happen if coordinates are corrupted/NaN */
609 			continue;
610 		}
611 		sum += d * d;
612 		POSTGIS_DEBUGF(3, "dist %g, squared %g, grows sum to %g", d, d * d, sum);
613 	}
614 	return sqrt(sum);
615 }
616 
617 /**
618  * Return a #GSERIALIZED with an expanded bounding box.
619  */
620 GSERIALIZED *
gserialized_expand(GSERIALIZED * g,double distance)621 gserialized_expand(GSERIALIZED *g, double distance)
622 {
623 	GBOX gbox;
624 	gbox_init(&gbox);
625 
626 	/* Get our bounding box out of the geography, return right away if
627 	   input is an EMPTY geometry. */
628 	if (gserialized_get_gbox_p(g, &gbox) == LW_FAILURE)
629 		return g;
630 
631 	gbox_expand(&gbox, distance);
632 
633 	return gserialized_set_gbox(g, &gbox);
634 }
635 
636 /***********************************************************************
637  * GiST N-D Index Operator Functions
638  */
639 
640 /*
641  * do "real" n-d distance
642  */
643 PG_FUNCTION_INFO_V1(gserialized_distance_nd);
gserialized_distance_nd(PG_FUNCTION_ARGS)644 Datum gserialized_distance_nd(PG_FUNCTION_ARGS)
645 {
646 	/* Feature-to-feature distance */
647 	GSERIALIZED *geom1 = PG_GETARG_GSERIALIZED_P(0);
648 	GSERIALIZED *geom2 = PG_GETARG_GSERIALIZED_P(1);
649 	LWGEOM *lw1 = lwgeom_from_gserialized(geom1);
650 	LWGEOM *lw2 = lwgeom_from_gserialized(geom2);
651 	LWGEOM *closest;
652 	double distance;
653 
654 	/* Find an exact shortest line w/ the dimensions we support */
655 	if (lwgeom_has_z(lw1) && lwgeom_has_z(lw2))
656 	{
657 		closest = lwgeom_closest_line_3d(lw1, lw2);
658 		distance = lwgeom_length(closest);
659 	}
660 	else
661 	{
662 		closest = lwgeom_closest_line(lw1, lw2);
663 		distance = lwgeom_length_2d(closest);
664 	}
665 
666 	/* Can only add the M term if both objects have M */
667 	if (lwgeom_has_m(lw1) && lwgeom_has_m(lw2))
668 	{
669 		double m1 = 0, m2 = 0;
670 		int usebox = false;
671 		/* Un-sqrt the distance so we can add extra terms */
672 		distance = distance * distance;
673 
674 		if (lwgeom_get_type(lw1) == POINTTYPE)
675 		{
676 			POINT4D p;
677 			lwpoint_getPoint4d_p((LWPOINT *)lw1, &p);
678 			m1 = p.m;
679 		}
680 		else if (lwgeom_get_type(lw1) == LINETYPE)
681 		{
682 			LWPOINT *lwp1 = lwline_get_lwpoint(lwgeom_as_lwline(closest), 0);
683 			m1 = lwgeom_interpolate_point(lw1, lwp1);
684 			lwpoint_free(lwp1);
685 		}
686 		else
687 			usebox = true;
688 
689 		if (lwgeom_get_type(lw2) == POINTTYPE)
690 		{
691 			POINT4D p;
692 			lwpoint_getPoint4d_p((LWPOINT *)lw2, &p);
693 			m2 = p.m;
694 		}
695 		else if (lwgeom_get_type(lw2) == LINETYPE)
696 		{
697 			LWPOINT *lwp2 = lwline_get_lwpoint(lwgeom_as_lwline(closest), 1);
698 			m2 = lwgeom_interpolate_point(lw2, lwp2);
699 			lwpoint_free(lwp2);
700 		}
701 		else
702 			usebox = true;
703 
704 		if (usebox)
705 		{
706 			GBOX b1, b2;
707 			if (gserialized_get_gbox_p(geom1, &b1) && gserialized_get_gbox_p(geom2, &b2))
708 			{
709 				double d;
710 				/* Disjoint left */
711 				if (b1.mmin > b2.mmax)
712 					d = b1.mmin - b2.mmax;
713 				/* Disjoint right */
714 				else if (b2.mmin > b1.mmax)
715 					d = b2.mmin - b1.mmax;
716 				/* Not Disjoint */
717 				else
718 					d = 0;
719 				distance += d * d;
720 			}
721 		}
722 		else
723 			distance += (m2 - m1) * (m2 - m1);
724 
725 		distance = sqrt(distance);
726 	}
727 
728 	lwgeom_free(closest);
729 
730 	PG_FREE_IF_COPY(geom1, 0);
731 	PG_FREE_IF_COPY(geom2, 1);
732 	PG_RETURN_FLOAT8(distance);
733 }
734 
735 /*
736 ** '~~' and operator function. Based on two serialized return true if
737 ** the first is contained by the second.
738 */
739 PG_FUNCTION_INFO_V1(gserialized_within);
gserialized_within(PG_FUNCTION_ARGS)740 Datum gserialized_within(PG_FUNCTION_ARGS)
741 {
742 	if (gserialized_datum_predicate(PG_GETARG_DATUM(1), PG_GETARG_DATUM(0), gidx_contains))
743 		PG_RETURN_BOOL(true);
744 
745 	PG_RETURN_BOOL(false);
746 }
747 
748 /*
749 ** '~~' and operator function. Based on a GIDX and a serialized return true if
750 ** the first is contained by the second.
751 */
752 PG_FUNCTION_INFO_V1(gserialized_gidx_geom_within);
gserialized_gidx_geom_within(PG_FUNCTION_ARGS)753 Datum gserialized_gidx_geom_within(PG_FUNCTION_ARGS)
754 {
755 	GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
756 
757 	if (gserialized_datum_predicate_geom_gidx(PG_GETARG_DATUM(1), gidx, gidx_contains))
758 		PG_RETURN_BOOL(true);
759 
760 	PG_RETURN_BOOL(false);
761 }
762 
763 /*
764 ** '~~' and operator function. Based on two GIDX return true if
765 ** the first is contained by the second.
766 */
767 PG_FUNCTION_INFO_V1(gserialized_gidx_gidx_within);
gserialized_gidx_gidx_within(PG_FUNCTION_ARGS)768 Datum gserialized_gidx_gidx_within(PG_FUNCTION_ARGS)
769 {
770 	if (gidx_contains((GIDX *)PG_GETARG_POINTER(1), (GIDX *)PG_GETARG_POINTER(0)))
771 		PG_RETURN_BOOL(true);
772 
773 	PG_RETURN_BOOL(false);
774 }
775 
776 /*
777 ** '@@' and operator function. Based on two serialized return true if
778 ** the first contains the second.
779 */
780 PG_FUNCTION_INFO_V1(gserialized_contains);
gserialized_contains(PG_FUNCTION_ARGS)781 Datum gserialized_contains(PG_FUNCTION_ARGS)
782 {
783 	if (gserialized_datum_predicate(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), gidx_contains))
784 		PG_RETURN_BOOL(true);
785 
786 	PG_RETURN_BOOL(false);
787 }
788 
789 /*
790 ** '@@' and operator function. Based on a GIDX and a serialized return true if
791 ** the first contains the second.
792 */
793 PG_FUNCTION_INFO_V1(gserialized_gidx_geom_contains);
gserialized_gidx_geom_contains(PG_FUNCTION_ARGS)794 Datum gserialized_gidx_geom_contains(PG_FUNCTION_ARGS)
795 {
796 	GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
797 
798 	if (gserialized_datum_predicate_gidx_geom(gidx, PG_GETARG_DATUM(1), gidx_contains))
799 		PG_RETURN_BOOL(true);
800 
801 	PG_RETURN_BOOL(false);
802 }
803 
804 /*
805 ** '@@' and operator function. Based on two GIDX return true if
806 ** the first contains the second.
807 */
808 PG_FUNCTION_INFO_V1(gserialized_gidx_gidx_contains);
gserialized_gidx_gidx_contains(PG_FUNCTION_ARGS)809 Datum gserialized_gidx_gidx_contains(PG_FUNCTION_ARGS)
810 {
811 	if (gidx_contains((GIDX *)PG_GETARG_POINTER(0), (GIDX *)PG_GETARG_POINTER(1)))
812 		PG_RETURN_BOOL(true);
813 
814 	PG_RETURN_BOOL(false);
815 }
816 
817 /*
818 ** '~=' and operator function. Based on two serialized return true if
819 ** the first equals the second.
820 */
821 PG_FUNCTION_INFO_V1(gserialized_same);
gserialized_same(PG_FUNCTION_ARGS)822 Datum gserialized_same(PG_FUNCTION_ARGS)
823 {
824 	if (gserialized_datum_predicate(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), gidx_equals))
825 		PG_RETURN_BOOL(true);
826 
827 	PG_RETURN_BOOL(false);
828 }
829 
830 PG_FUNCTION_INFO_V1(gserialized_gidx_geom_same);
gserialized_gidx_geom_same(PG_FUNCTION_ARGS)831 Datum gserialized_gidx_geom_same(PG_FUNCTION_ARGS)
832 {
833 	GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
834 
835 	if (gserialized_datum_predicate_gidx_geom(gidx, PG_GETARG_DATUM(1), gidx_equals))
836 		PG_RETURN_BOOL(true);
837 
838 	PG_RETURN_BOOL(false);
839 }
840 
841 /*
842 ** '~=' and operator function. Based on two GIDX return true if
843 ** the first equals the second.
844 */
845 PG_FUNCTION_INFO_V1(gserialized_gidx_gidx_same);
gserialized_gidx_gidx_same(PG_FUNCTION_ARGS)846 Datum gserialized_gidx_gidx_same(PG_FUNCTION_ARGS)
847 {
848 	if (gidx_equals((GIDX *)PG_GETARG_POINTER(0), (GIDX *)PG_GETARG_POINTER(1)))
849 		PG_RETURN_BOOL(true);
850 
851 	PG_RETURN_BOOL(false);
852 }
853 
854 /*
855 ** '&&&' operator function. Based on two serialized return true if
856 ** they overlap and false otherwise.
857 */
858 PG_FUNCTION_INFO_V1(gserialized_overlaps);
gserialized_overlaps(PG_FUNCTION_ARGS)859 Datum gserialized_overlaps(PG_FUNCTION_ARGS)
860 {
861 	if (gserialized_datum_predicate(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), gidx_overlaps))
862 		PG_RETURN_BOOL(true);
863 
864 	PG_RETURN_BOOL(false);
865 }
866 
867 /*
868  * This is the cross-operator for the geographies
869  */
870 PG_FUNCTION_INFO_V1(gserialized_gidx_geog_overlaps);
gserialized_gidx_geog_overlaps(PG_FUNCTION_ARGS)871 Datum gserialized_gidx_geog_overlaps(PG_FUNCTION_ARGS)
872 {
873 	GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
874 
875 	if (gserialized_datum_predicate_gidx_geom(gidx, PG_GETARG_DATUM(1), gidx_overlaps))
876 		PG_RETURN_BOOL(true);
877 
878 	PG_RETURN_BOOL(false);
879 }
880 
881 PG_FUNCTION_INFO_V1(gserialized_gidx_geom_overlaps);
gserialized_gidx_geom_overlaps(PG_FUNCTION_ARGS)882 Datum gserialized_gidx_geom_overlaps(PG_FUNCTION_ARGS)
883 {
884 	GIDX *gidx = (GIDX *)PG_GETARG_POINTER(0);
885 
886 	if (gserialized_datum_predicate_gidx_geom(gidx, PG_GETARG_DATUM(1), gidx_overlaps))
887 		PG_RETURN_BOOL(true);
888 
889 	PG_RETURN_BOOL(false);
890 }
891 
892 PG_FUNCTION_INFO_V1(gserialized_gidx_gidx_overlaps);
gserialized_gidx_gidx_overlaps(PG_FUNCTION_ARGS)893 Datum gserialized_gidx_gidx_overlaps(PG_FUNCTION_ARGS)
894 {
895 	if (gidx_overlaps((GIDX *)PG_GETARG_POINTER(0), (GIDX *)PG_GETARG_POINTER(1)))
896 		PG_RETURN_BOOL(true);
897 
898 	PG_RETURN_BOOL(false);
899 }
900 
901 /***********************************************************************
902  * GiST Index  Support Functions
903  */
904 
905 /*
906 ** GiST support function. Given a geography, return a "compressed"
907 ** version. In this case, we convert the geography into a geocentric
908 ** bounding box. If the geography already has the box embedded in it
909 ** we pull that out and hand it back.
910 */
911 PG_FUNCTION_INFO_V1(gserialized_gist_compress);
gserialized_gist_compress(PG_FUNCTION_ARGS)912 Datum gserialized_gist_compress(PG_FUNCTION_ARGS)
913 {
914 	GISTENTRY *entry_in = (GISTENTRY *)PG_GETARG_POINTER(0);
915 	GISTENTRY *entry_out = NULL;
916 	char gidxmem[GIDX_MAX_SIZE];
917 	GIDX *bbox_out = (GIDX *)gidxmem;
918 	int result = LW_SUCCESS;
919 	uint32_t i;
920 
921 	POSTGIS_DEBUG(4, "[GIST] 'compress' function called");
922 
923 	/*
924 	** Not a leaf key? There's nothing to do.
925 	** Return the input unchanged.
926 	*/
927 	if (!entry_in->leafkey)
928 	{
929 		POSTGIS_DEBUG(4, "[GIST] non-leafkey entry, returning input unaltered");
930 		PG_RETURN_POINTER(entry_in);
931 	}
932 
933 	POSTGIS_DEBUG(4, "[GIST] processing leafkey input");
934 	entry_out = palloc(sizeof(GISTENTRY));
935 
936 	/*
937 	** Null key? Make a copy of the input entry and
938 	** return.
939 	*/
940 	if (!DatumGetPointer(entry_in->key))
941 	{
942 		POSTGIS_DEBUG(4, "[GIST] leafkey is null");
943 		gistentryinit(*entry_out, (Datum)0, entry_in->rel, entry_in->page, entry_in->offset, false);
944 		POSTGIS_DEBUG(4, "[GIST] returning copy of input");
945 		PG_RETURN_POINTER(entry_out);
946 	}
947 
948 	/* Extract our index key from the GiST entry. */
949 	result = gserialized_datum_get_gidx_p(entry_in->key, bbox_out);
950 
951 	/* Is the bounding box valid (non-empty, non-infinite) ?
952 	 * If not, use the "unknown" GIDX. */
953 	if (result == LW_FAILURE)
954 	{
955 		POSTGIS_DEBUG(4, "[GIST] empty geometry!");
956 		gidx_set_unknown(bbox_out);
957 		gistentryinit(*entry_out,
958 			      PointerGetDatum(gidx_copy(bbox_out)),
959 			      entry_in->rel,
960 			      entry_in->page,
961 			      entry_in->offset,
962 			      false);
963 		PG_RETURN_POINTER(entry_out);
964 	}
965 
966 	POSTGIS_DEBUGF(4, "[GIST] got entry_in->key: %s", gidx_to_string(bbox_out));
967 
968 	/* ORIGINAL VERSION */
969 	/* Check all the dimensions for finite values.
970 	 * If not, use the "unknown" GIDX as a key */
971 	for (i = 0; i < GIDX_NDIMS(bbox_out); i++)
972 	{
973 		if (!isfinite(GIDX_GET_MAX(bbox_out, i)) || !isfinite(GIDX_GET_MIN(bbox_out, i)))
974 		{
975 			gidx_set_unknown(bbox_out);
976 			gistentryinit(*entry_out,
977 				      PointerGetDatum(gidx_copy(bbox_out)),
978 				      entry_in->rel,
979 				      entry_in->page,
980 				      entry_in->offset,
981 				      false);
982 			PG_RETURN_POINTER(entry_out);
983 		}
984 	}
985 
986 	/* Ensure bounding box has minimums below maximums. */
987 	gidx_validate(bbox_out);
988 
989 	/* Prepare GISTENTRY for return. */
990 	gistentryinit(
991 	    *entry_out, PointerGetDatum(gidx_copy(bbox_out)), entry_in->rel, entry_in->page, entry_in->offset, false);
992 
993 	/* Return GISTENTRY. */
994 	POSTGIS_DEBUG(4, "[GIST] 'compress' function complete");
995 	PG_RETURN_POINTER(entry_out);
996 }
997 
998 /*
999 ** GiST support function.
1000 ** Decompress an entry. Unused for geography, so we return.
1001 */
1002 PG_FUNCTION_INFO_V1(gserialized_gist_decompress);
gserialized_gist_decompress(PG_FUNCTION_ARGS)1003 Datum gserialized_gist_decompress(PG_FUNCTION_ARGS)
1004 {
1005 	POSTGIS_DEBUG(5, "[GIST] 'decompress' function called");
1006 	/* We don't decompress. Just return the input. */
1007 	PG_RETURN_POINTER(PG_GETARG_POINTER(0));
1008 }
1009 
1010 /*
1011 ** GiST support function. Called from gserialized_gist_consistent below.
1012 */
1013 static inline bool
gserialized_gist_consistent_leaf(GIDX * key,GIDX * query,StrategyNumber strategy)1014 gserialized_gist_consistent_leaf(GIDX *key, GIDX *query, StrategyNumber strategy)
1015 {
1016 	bool retval;
1017 
1018 	POSTGIS_DEBUGF(4, "[GIST] leaf consistent, strategy [%d], count[%i]", strategy, geog_counter_leaf++);
1019 
1020 	switch (strategy)
1021 	{
1022 	case RTOverlapStrategyNumber:
1023 		retval = (bool)gidx_overlaps(key, query);
1024 		break;
1025 	case RTSameStrategyNumber:
1026 		retval = (bool)gidx_equals(key, query);
1027 		break;
1028 	case RTContainsStrategyNumber:
1029 	case RTOldContainsStrategyNumber:
1030 		retval = (bool)gidx_contains(key, query);
1031 		break;
1032 	case RTContainedByStrategyNumber:
1033 	case RTOldContainedByStrategyNumber:
1034 		retval = (bool)gidx_contains(query, key);
1035 		break;
1036 	default:
1037 		retval = false;
1038 	}
1039 
1040 	return retval;
1041 }
1042 
1043 /*
1044 ** GiST support function. Called from gserialized_gist_consistent below.
1045 */
1046 static inline bool
gserialized_gist_consistent_internal(GIDX * key,GIDX * query,StrategyNumber strategy)1047 gserialized_gist_consistent_internal(GIDX *key, GIDX *query, StrategyNumber strategy)
1048 {
1049 	bool retval;
1050 
1051 	POSTGIS_DEBUGF(4,
1052 		       "[GIST] internal consistent, strategy [%d], count[%i], query[%s], key[%s]",
1053 		       strategy,
1054 		       geog_counter_internal++,
1055 		       gidx_to_string(query),
1056 		       gidx_to_string(key));
1057 
1058 	switch (strategy)
1059 	{
1060 	case RTOverlapStrategyNumber:
1061 	case RTContainedByStrategyNumber:
1062 	case RTOldContainedByStrategyNumber:
1063 		retval = (bool)gidx_overlaps(key, query);
1064 		break;
1065 	case RTSameStrategyNumber:
1066 	case RTContainsStrategyNumber:
1067 	case RTOldContainsStrategyNumber:
1068 		retval = (bool)gidx_contains(key, query);
1069 		break;
1070 	default:
1071 		retval = false;
1072 	}
1073 
1074 	return retval;
1075 }
1076 
1077 /*
1078 ** GiST support function. Take in a query and an entry and see what the
1079 ** relationship is, based on the query strategy.
1080 */
1081 PG_FUNCTION_INFO_V1(gserialized_gist_consistent);
gserialized_gist_consistent(PG_FUNCTION_ARGS)1082 Datum gserialized_gist_consistent(PG_FUNCTION_ARGS)
1083 {
1084 	GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
1085 	StrategyNumber strategy = (StrategyNumber)PG_GETARG_UINT16(2);
1086 	bool result;
1087 	char gidxmem[GIDX_MAX_SIZE];
1088 	GIDX *query_gbox_index = (GIDX *)gidxmem;
1089 
1090 	/* PostgreSQL 8.4 and later require the RECHECK flag to be set here,
1091 	   rather than being supplied as part of the operator class definition */
1092 	bool *recheck = (bool *)PG_GETARG_POINTER(4);
1093 
1094 	/* We set recheck to false to avoid repeatedly pulling every "possibly matched" geometry
1095 	   out during index scans. For cases when the geometries are large, rechecking
1096 	   can make things twice as slow. */
1097 	*recheck = false;
1098 
1099 	POSTGIS_DEBUG(4, "[GIST] 'consistent' function called");
1100 
1101 	/* Quick sanity check on query argument. */
1102 	if (!DatumGetPointer(PG_GETARG_DATUM(1)))
1103 	{
1104 		POSTGIS_DEBUG(4, "[GIST] null query pointer (!?!), returning false");
1105 		PG_RETURN_BOOL(false); /* NULL query! This is screwy! */
1106 	}
1107 
1108 	/* Quick sanity check on entry key. */
1109 	if (!DatumGetPointer(entry->key))
1110 	{
1111 		POSTGIS_DEBUG(4, "[GIST] null index entry, returning false");
1112 		PG_RETURN_BOOL(false); /* NULL entry! */
1113 	}
1114 
1115 	/* Null box should never make this far. */
1116 	if (gserialized_datum_get_gidx_p(PG_GETARG_DATUM(1), query_gbox_index) == LW_FAILURE)
1117 	{
1118 		POSTGIS_DEBUG(4, "[GIST] null query_gbox_index!");
1119 		PG_RETURN_BOOL(false);
1120 	}
1121 
1122 	/* Treat leaf node tests different from internal nodes */
1123 	if (GIST_LEAF(entry))
1124 	{
1125 		result =
1126 		    gserialized_gist_consistent_leaf((GIDX *)DatumGetPointer(entry->key), query_gbox_index, strategy);
1127 	}
1128 	else
1129 	{
1130 		result = gserialized_gist_consistent_internal(
1131 		    (GIDX *)DatumGetPointer(entry->key), query_gbox_index, strategy);
1132 	}
1133 
1134 	PG_RETURN_BOOL(result);
1135 }
1136 
1137 /*
1138 ** Function to pack floats of different realms.
1139 ** This function serves to pack bit flags inside float type.
1140 ** Result value represent can be from two different "realms".
1141 ** Every value from realm 1 is greater than any value from realm 0.
1142 ** Values from the same realm loose one bit of precision.
1143 **
1144 ** This technique is possible due to floating point numbers specification
1145 ** according to IEEE 754: exponent bits are highest
1146 ** (excluding sign bits, but here penalty is always positive). If float a is
1147 ** greater than float b, integer A with same bit representation as a is greater
1148 ** than integer B with same bits as b.
1149 */
1150 static inline float
pack_float(const float value,const uint8_t realm)1151 pack_float(const float value, const uint8_t realm)
1152 {
1153 	union {
1154 		float f;
1155 		struct {
1156 			unsigned value : 31, sign : 1;
1157 		} vbits;
1158 		struct {
1159 			unsigned value : 30, realm : 1, sign : 1;
1160 		} rbits;
1161 	} a;
1162 
1163 	a.f = value;
1164 	a.rbits.value = a.vbits.value >> 1;
1165 	a.rbits.realm = realm;
1166 
1167 	return a.f;
1168 }
1169 
1170 /*
1171 ** GiST support function. Calculate the "penalty" cost of adding this entry into an existing entry.
1172 ** Calculate the change in volume of the old entry once the new entry is added.
1173 */
1174 PG_FUNCTION_INFO_V1(gserialized_gist_penalty);
gserialized_gist_penalty(PG_FUNCTION_ARGS)1175 Datum gserialized_gist_penalty(PG_FUNCTION_ARGS)
1176 {
1177 	GISTENTRY *origentry = (GISTENTRY *)PG_GETARG_POINTER(0);
1178 	GISTENTRY *newentry = (GISTENTRY *)PG_GETARG_POINTER(1);
1179 	float *result = (float *)PG_GETARG_POINTER(2);
1180 	GIDX *gbox_index_orig, *gbox_index_new;
1181 
1182 	gbox_index_orig = (GIDX *)DatumGetPointer(origentry->key);
1183 	gbox_index_new = (GIDX *)DatumGetPointer(newentry->key);
1184 
1185 	/* Penalty value of 0 has special code path in Postgres's gistchoose.
1186 	 * It is used as an early exit condition in subtree loop, allowing faster tree descend.
1187 	 * For multi-column index, it lets next column break the tie, possibly more confidently.
1188 	 */
1189 	*result = 0.0;
1190 
1191 	/* Drop out if we're dealing with null inputs. Shouldn't happen. */
1192 	if (gbox_index_orig && gbox_index_new)
1193 	{
1194 		/* Calculate the size difference of the boxes (volume difference in this case). */
1195 		float size_union = gidx_union_volume(gbox_index_orig, gbox_index_new);
1196 		float size_orig = gidx_volume(gbox_index_orig);
1197 		float volume_extension = size_union - size_orig;
1198 
1199 		/* REALM 1: Area extension is nonzero, return it */
1200 		if (volume_extension > FLT_EPSILON)
1201 			*result = pack_float(volume_extension, 1);
1202 		else
1203 		{
1204 			/* REALM 0: Area extension is zero, return nonzero edge extension */
1205 			float edge_union = gidx_union_edge(gbox_index_orig, gbox_index_new);
1206 			float edge_orig = gidx_edge(gbox_index_orig);
1207 			float edge_extension = edge_union - edge_orig;
1208 			if (edge_extension > FLT_EPSILON)
1209 				*result = pack_float(edge_extension, 0);
1210 		}
1211 	}
1212 	PG_RETURN_POINTER(result);
1213 }
1214 
1215 /*
1216 ** GiST support function. Merge all the boxes in a page into one master box.
1217 */
1218 PG_FUNCTION_INFO_V1(gserialized_gist_union);
gserialized_gist_union(PG_FUNCTION_ARGS)1219 Datum gserialized_gist_union(PG_FUNCTION_ARGS)
1220 {
1221 	GistEntryVector *entryvec = (GistEntryVector *)PG_GETARG_POINTER(0);
1222 	int *sizep = (int *)PG_GETARG_POINTER(1); /* Size of the return value */
1223 	int numranges, i;
1224 	GIDX *box_cur, *box_union;
1225 
1226 	POSTGIS_DEBUG(4, "[GIST] 'union' function called");
1227 
1228 	numranges = entryvec->n;
1229 
1230 	box_cur = (GIDX *)DatumGetPointer(entryvec->vector[0].key);
1231 
1232 	box_union = gidx_copy(box_cur);
1233 
1234 	for (i = 1; i < numranges; i++)
1235 	{
1236 		box_cur = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1237 		gidx_merge(&box_union, box_cur);
1238 	}
1239 
1240 	*sizep = VARSIZE(box_union);
1241 
1242 	POSTGIS_DEBUGF(4, "[GIST] union called, numranges(%i), pageunion %s", numranges, gidx_to_string(box_union));
1243 
1244 	PG_RETURN_POINTER(box_union);
1245 }
1246 
1247 /*
1248 ** GiST support function. Test equality of keys.
1249 */
1250 PG_FUNCTION_INFO_V1(gserialized_gist_same);
gserialized_gist_same(PG_FUNCTION_ARGS)1251 Datum gserialized_gist_same(PG_FUNCTION_ARGS)
1252 {
1253 	GIDX *b1 = (GIDX *)PG_GETARG_POINTER(0);
1254 	GIDX *b2 = (GIDX *)PG_GETARG_POINTER(1);
1255 	bool *result = (bool *)PG_GETARG_POINTER(2);
1256 
1257 	POSTGIS_DEBUG(4, "[GIST] 'same' function called");
1258 
1259 	*result = gidx_equals(b1, b2);
1260 
1261 	PG_RETURN_POINTER(result);
1262 }
1263 
1264 PG_FUNCTION_INFO_V1(gserialized_gist_geog_distance);
gserialized_gist_geog_distance(PG_FUNCTION_ARGS)1265 Datum gserialized_gist_geog_distance(PG_FUNCTION_ARGS)
1266 {
1267 	GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
1268 	Datum query_datum = PG_GETARG_DATUM(1);
1269 	StrategyNumber strategy = (StrategyNumber)PG_GETARG_UINT16(2);
1270 	bool *recheck = (bool *)PG_GETARG_POINTER(4);
1271 	char query_box_mem[GIDX_MAX_SIZE];
1272 	GIDX *query_box = (GIDX *)query_box_mem;
1273 	GIDX *entry_box;
1274 	double distance;
1275 
1276 	POSTGIS_DEBUGF(3, "[GIST] '%s' function called", __func__);
1277 
1278 	/* We are using '13' as the gist geography distance <-> strategy number */
1279 	if (strategy != 13)
1280 	{
1281 		elog(ERROR, "unrecognized strategy number: %d", strategy);
1282 		PG_RETURN_FLOAT8(FLT_MAX);
1283 	}
1284 
1285 	/* Null box should never make this far. */
1286 	if (gserialized_datum_get_gidx_p(query_datum, query_box) == LW_FAILURE)
1287 	{
1288 		POSTGIS_DEBUG(2, "[GIST] null query_gbox_index!");
1289 		PG_RETURN_FLOAT8(FLT_MAX);
1290 	}
1291 
1292 	/* When we hit leaf nodes, it's time to turn on recheck */
1293 	if (GIST_LEAF(entry))
1294 		*recheck = true;
1295 
1296 	/* Get the entry box */
1297 	entry_box = (GIDX *)DatumGetPointer(entry->key);
1298 
1299 	/* Return distances from key-based tests should always be */
1300 	/* the minimum possible distance, box-to-box */
1301 	/* We scale up to "world units" so that the box-to-box distances */
1302 	/* compare reasonably with the over-the-spheroid distances that */
1303 	/* the recheck process will turn up */
1304 	distance = WGS84_RADIUS * gidx_distance(entry_box, query_box, 0);
1305 	POSTGIS_DEBUGF(2, "[GIST] '%s' got distance %g", __func__, distance);
1306 
1307 	PG_RETURN_FLOAT8(distance);
1308 }
1309 
1310 /*
1311 ** GiST support function.
1312 ** Take in a query and an entry and return the "distance" between them.
1313 **
1314 ** Given an index entry p and a query value q, this function determines the
1315 ** index entry's "distance" from the query value. This function must be
1316 ** supplied if the operator class contains any ordering operators. A query
1317 ** using the ordering operator will be implemented by returning index entries
1318 ** with the smallest "distance" values first, so the results must be consistent
1319 ** with the operator's semantics. For a leaf index entry the result just
1320 ** represents the distance to the index entry; for an internal tree node, the
1321 ** result must be the smallest distance that any child entry could have.
1322 **
1323 ** Strategy 13 is centroid-based distance tests <<->>
1324 ** Strategy 20 is cpa based distance tests |=|
1325 */
1326 PG_FUNCTION_INFO_V1(gserialized_gist_distance);
gserialized_gist_distance(PG_FUNCTION_ARGS)1327 Datum gserialized_gist_distance(PG_FUNCTION_ARGS)
1328 {
1329 	GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
1330 	StrategyNumber strategy = (StrategyNumber)PG_GETARG_UINT16(2);
1331 	char query_box_mem[GIDX_MAX_SIZE];
1332 	GIDX *query_box = (GIDX *)query_box_mem;
1333 	GIDX *entry_box;
1334 	bool *recheck = (bool *)PG_GETARG_POINTER(4);
1335 
1336 	double distance;
1337 
1338 	POSTGIS_DEBUG(4, "[GIST] 'distance' function called");
1339 
1340 	/* Strategy 13 is <<->> */
1341 	/* Strategy 20 is |=| */
1342 	if (strategy != 13 && strategy != 20)
1343 	{
1344 		elog(ERROR, "unrecognized strategy number: %d", strategy);
1345 		PG_RETURN_FLOAT8(FLT_MAX);
1346 	}
1347 
1348 	/* Null box should never make this far. */
1349 	if (gserialized_datum_get_gidx_p(PG_GETARG_DATUM(1), query_box) == LW_FAILURE)
1350 	{
1351 		POSTGIS_DEBUG(4, "[GIST] null query_gbox_index!");
1352 		PG_RETURN_FLOAT8(FLT_MAX);
1353 	}
1354 
1355 	/* Get the entry box */
1356 	entry_box = (GIDX *)DatumGetPointer(entry->key);
1357 
1358 	/* Strategy 20 is |=| */
1359 	distance = gidx_distance(entry_box, query_box, strategy == 20);
1360 
1361 	/* Treat leaf node tests different from internal nodes */
1362 	if (GIST_LEAF(entry))
1363 		*recheck = true;
1364 
1365 	PG_RETURN_FLOAT8(distance);
1366 }
1367 
1368 /*
1369 ** Utility function to add entries to the axis partition lists in the
1370 ** picksplit function.
1371 */
1372 static void
gserialized_gist_picksplit_addlist(OffsetNumber * list,GIDX ** box_union,GIDX * box_current,int * pos,int num)1373 gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *box_current, int *pos, int num)
1374 {
1375 	if (*pos)
1376 		gidx_merge(box_union, box_current);
1377 	else
1378 	{
1379 		pfree(*box_union);
1380 		*box_union = gidx_copy(box_current);
1381 	}
1382 
1383 	list[*pos] = num;
1384 	(*pos)++;
1385 }
1386 
1387 /*
1388 ** Utility function check whether the number of entries two halves of the
1389 ** space constitute a "bad ratio" (poor balance).
1390 */
1391 static int
gserialized_gist_picksplit_badratio(int x,int y)1392 gserialized_gist_picksplit_badratio(int x, int y)
1393 {
1394 	POSTGIS_DEBUGF(4, "[GIST] checking split ratio (%d, %d)", x, y);
1395 	if ((y == 0) || (((float)x / (float)y) < LIMIT_RATIO) || (x == 0) || (((float)y / (float)x) < LIMIT_RATIO))
1396 		return true;
1397 
1398 	return false;
1399 }
1400 
1401 static bool
gserialized_gist_picksplit_badratios(int * pos,int dims)1402 gserialized_gist_picksplit_badratios(int *pos, int dims)
1403 {
1404 	int i;
1405 	for (i = 0; i < dims; i++)
1406 	{
1407 		if (gserialized_gist_picksplit_badratio(pos[2 * i], pos[2 * i + 1]) == false)
1408 			return false;
1409 	}
1410 	return true;
1411 }
1412 
1413 /*
1414 ** Where the picksplit algorithm cannot find any basis for splitting one way
1415 ** or another, we simply split the overflowing node in half.
1416 */
1417 static void
gserialized_gist_picksplit_fallback(GistEntryVector * entryvec,GIST_SPLITVEC * v)1418 gserialized_gist_picksplit_fallback(GistEntryVector *entryvec, GIST_SPLITVEC *v)
1419 {
1420 	OffsetNumber i, maxoff;
1421 	GIDX *unionL = NULL;
1422 	GIDX *unionR = NULL;
1423 	int nbytes;
1424 
1425 	POSTGIS_DEBUG(4, "[GIST] in fallback picksplit function");
1426 
1427 	maxoff = entryvec->n - 1;
1428 
1429 	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
1430 	v->spl_left = (OffsetNumber *)palloc(nbytes);
1431 	v->spl_right = (OffsetNumber *)palloc(nbytes);
1432 	v->spl_nleft = v->spl_nright = 0;
1433 
1434 	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1435 	{
1436 		GIDX *cur = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1437 
1438 		if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
1439 		{
1440 			v->spl_left[v->spl_nleft] = i;
1441 			if (!unionL)
1442 				unionL = gidx_copy(cur);
1443 			else
1444 				gidx_merge(&unionL, cur);
1445 			v->spl_nleft++;
1446 		}
1447 		else
1448 		{
1449 			v->spl_right[v->spl_nright] = i;
1450 			if (!unionR)
1451 				unionR = gidx_copy(cur);
1452 			else
1453 				gidx_merge(&unionR, cur);
1454 			v->spl_nright++;
1455 		}
1456 	}
1457 
1458 	if (v->spl_ldatum_exists)
1459 		gidx_merge(&unionL, (GIDX *)DatumGetPointer(v->spl_ldatum));
1460 
1461 	v->spl_ldatum = PointerGetDatum(unionL);
1462 
1463 	if (v->spl_rdatum_exists)
1464 		gidx_merge(&unionR, (GIDX *)DatumGetPointer(v->spl_rdatum));
1465 
1466 	v->spl_rdatum = PointerGetDatum(unionR);
1467 	v->spl_ldatum_exists = v->spl_rdatum_exists = false;
1468 }
1469 
1470 static void
gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC * v,OffsetNumber * list1,int nlist1,GIDX ** union1,OffsetNumber * list2,int nlist2,GIDX ** union2)1471 gserialized_gist_picksplit_constructsplit(GIST_SPLITVEC *v,
1472 					  OffsetNumber *list1,
1473 					  int nlist1,
1474 					  GIDX **union1,
1475 					  OffsetNumber *list2,
1476 					  int nlist2,
1477 					  GIDX **union2)
1478 {
1479 	bool firstToLeft = true;
1480 
1481 	POSTGIS_DEBUG(4, "[GIST] picksplit in constructsplit function");
1482 
1483 	if (v->spl_ldatum_exists || v->spl_rdatum_exists)
1484 	{
1485 		if (v->spl_ldatum_exists && v->spl_rdatum_exists)
1486 		{
1487 			GIDX *LRl = gidx_copy(*union1);
1488 			GIDX *LRr = gidx_copy(*union2);
1489 			GIDX *RLl = gidx_copy(*union2);
1490 			GIDX *RLr = gidx_copy(*union1);
1491 			double sizeLR, sizeRL;
1492 
1493 			gidx_merge(&LRl, (GIDX *)DatumGetPointer(v->spl_ldatum));
1494 			gidx_merge(&LRr, (GIDX *)DatumGetPointer(v->spl_rdatum));
1495 			gidx_merge(&RLl, (GIDX *)DatumGetPointer(v->spl_ldatum));
1496 			gidx_merge(&RLr, (GIDX *)DatumGetPointer(v->spl_rdatum));
1497 
1498 			sizeLR = gidx_inter_volume(LRl, LRr);
1499 			sizeRL = gidx_inter_volume(RLl, RLr);
1500 
1501 			POSTGIS_DEBUGF(4, "[GIST] sizeLR / sizeRL == %.12g / %.12g", sizeLR, sizeRL);
1502 
1503 			if (sizeLR > sizeRL)
1504 				firstToLeft = false;
1505 		}
1506 		else
1507 		{
1508 			float p1, p2;
1509 			GISTENTRY oldUnion, addon;
1510 
1511 			gistentryinit(oldUnion,
1512 				      (v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum,
1513 				      NULL,
1514 				      NULL,
1515 				      InvalidOffsetNumber,
1516 				      false);
1517 
1518 			gistentryinit(addon, PointerGetDatum(*union1), NULL, NULL, InvalidOffsetNumber, false);
1519 			DirectFunctionCall3(gserialized_gist_penalty,
1520 					    PointerGetDatum(&oldUnion),
1521 					    PointerGetDatum(&addon),
1522 					    PointerGetDatum(&p1));
1523 			gistentryinit(addon, PointerGetDatum(*union2), NULL, NULL, InvalidOffsetNumber, false);
1524 			DirectFunctionCall3(gserialized_gist_penalty,
1525 					    PointerGetDatum(&oldUnion),
1526 					    PointerGetDatum(&addon),
1527 					    PointerGetDatum(&p2));
1528 
1529 			POSTGIS_DEBUGF(4, "[GIST] p1 / p2 == %.12g / %.12g", p1, p2);
1530 
1531 			if ((v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2))
1532 				firstToLeft = false;
1533 		}
1534 	}
1535 
1536 	POSTGIS_DEBUGF(4, "[GIST] firstToLeft == %d", firstToLeft);
1537 
1538 	if (firstToLeft)
1539 	{
1540 		v->spl_left = list1;
1541 		v->spl_right = list2;
1542 		v->spl_nleft = nlist1;
1543 		v->spl_nright = nlist2;
1544 		if (v->spl_ldatum_exists)
1545 			gidx_merge(union1, (GIDX *)DatumGetPointer(v->spl_ldatum));
1546 		v->spl_ldatum = PointerGetDatum(*union1);
1547 		if (v->spl_rdatum_exists)
1548 			gidx_merge(union2, (GIDX *)DatumGetPointer(v->spl_rdatum));
1549 		v->spl_rdatum = PointerGetDatum(*union2);
1550 	}
1551 	else
1552 	{
1553 		v->spl_left = list2;
1554 		v->spl_right = list1;
1555 		v->spl_nleft = nlist2;
1556 		v->spl_nright = nlist1;
1557 		if (v->spl_ldatum_exists)
1558 			gidx_merge(union2, (GIDX *)DatumGetPointer(v->spl_ldatum));
1559 		v->spl_ldatum = PointerGetDatum(*union2);
1560 		if (v->spl_rdatum_exists)
1561 			gidx_merge(union1, (GIDX *)DatumGetPointer(v->spl_rdatum));
1562 		v->spl_rdatum = PointerGetDatum(*union1);
1563 	}
1564 
1565 	v->spl_ldatum_exists = v->spl_rdatum_exists = false;
1566 }
1567 
1568 #define BELOW(d) (2 * (d))
1569 #define ABOVE(d) ((2 * (d)) + 1)
1570 
1571 /*
1572 ** GiST support function. Split an overflowing node into two new nodes.
1573 ** Uses linear algorithm from Ang & Tan [2], dividing node extent into
1574 ** four quadrants, and splitting along the axis that most evenly distributes
1575 ** entries between the new nodes.
1576 ** TODO: Re-evaluate this in light of R*Tree picksplit approaches.
1577 */
1578 PG_FUNCTION_INFO_V1(gserialized_gist_picksplit);
gserialized_gist_picksplit(PG_FUNCTION_ARGS)1579 Datum gserialized_gist_picksplit(PG_FUNCTION_ARGS)
1580 {
1581 
1582 	GistEntryVector *entryvec = (GistEntryVector *)PG_GETARG_POINTER(0);
1583 
1584 	GIST_SPLITVEC *v = (GIST_SPLITVEC *)PG_GETARG_POINTER(1);
1585 	OffsetNumber i;
1586 	/* One union box for each half of the space. */
1587 	GIDX **box_union;
1588 	/* One offset number list for each half of the space. */
1589 	OffsetNumber **list;
1590 	/* One position index for each half of the space. */
1591 	int *pos;
1592 	GIDX *box_pageunion;
1593 	GIDX *box_current;
1594 	int direction = -1;
1595 	bool all_entries_equal = true;
1596 	OffsetNumber max_offset;
1597 	int nbytes, ndims_pageunion, d;
1598 	int posmin = entryvec->n;
1599 
1600 	POSTGIS_DEBUG(4, "[GIST] 'picksplit' function called");
1601 
1602 	/*
1603 	** First calculate the bounding box and maximum number of dimensions in this page.
1604 	*/
1605 
1606 	max_offset = entryvec->n - 1;
1607 	box_current = (GIDX *)DatumGetPointer(entryvec->vector[FirstOffsetNumber].key);
1608 	box_pageunion = gidx_copy(box_current);
1609 
1610 	/* Calculate the containing box (box_pageunion) for the whole page we are going to split. */
1611 	for (i = OffsetNumberNext(FirstOffsetNumber); i <= max_offset; i = OffsetNumberNext(i))
1612 	{
1613 		box_current = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1614 
1615 		if (all_entries_equal && !gidx_equals(box_pageunion, box_current))
1616 			all_entries_equal = false;
1617 
1618 		gidx_merge(&box_pageunion, box_current);
1619 	}
1620 
1621 	POSTGIS_DEBUGF(3, "[GIST] box_pageunion: %s", gidx_to_string(box_pageunion));
1622 
1623 	/* Every box in the page is the same! So, we split and just put half the boxes in each child. */
1624 	if (all_entries_equal)
1625 	{
1626 		POSTGIS_DEBUG(4, "[GIST] picksplit finds all entries equal!");
1627 		gserialized_gist_picksplit_fallback(entryvec, v);
1628 		PG_RETURN_POINTER(v);
1629 	}
1630 
1631 	/* Initialize memory structures. */
1632 	nbytes = (max_offset + 2) * sizeof(OffsetNumber);
1633 	ndims_pageunion = GIDX_NDIMS(box_pageunion);
1634 	POSTGIS_DEBUGF(4, "[GIST] ndims_pageunion == %d", ndims_pageunion);
1635 	pos = palloc(2 * ndims_pageunion * sizeof(int));
1636 	list = palloc(2 * ndims_pageunion * sizeof(OffsetNumber *));
1637 	box_union = palloc(2 * ndims_pageunion * sizeof(GIDX *));
1638 	for (d = 0; d < ndims_pageunion; d++)
1639 	{
1640 		list[BELOW(d)] = (OffsetNumber *)palloc(nbytes);
1641 		list[ABOVE(d)] = (OffsetNumber *)palloc(nbytes);
1642 		box_union[BELOW(d)] = gidx_new(ndims_pageunion);
1643 		box_union[ABOVE(d)] = gidx_new(ndims_pageunion);
1644 		pos[BELOW(d)] = 0;
1645 		pos[ABOVE(d)] = 0;
1646 	}
1647 
1648 	/*
1649 	** Assign each entry in the node to the volume partitions it belongs to,
1650 	** such as "above the x/y plane, left of the y/z plane, below the x/z plane".
1651 	** Each entry thereby ends up in three of the six partitions.
1652 	*/
1653 	POSTGIS_DEBUG(4, "[GIST] 'picksplit' calculating best split axis");
1654 	for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
1655 	{
1656 		box_current = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1657 
1658 		for (d = 0; d < ndims_pageunion; d++)
1659 		{
1660 			if (GIDX_GET_MIN(box_current, d) - GIDX_GET_MIN(box_pageunion, d) <
1661 			    GIDX_GET_MAX(box_pageunion, d) - GIDX_GET_MAX(box_current, d))
1662 				gserialized_gist_picksplit_addlist(
1663 				    list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1664 			else
1665 				gserialized_gist_picksplit_addlist(
1666 				    list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1667 		}
1668 	}
1669 
1670 	/*
1671 	** "Bad disposition", too many entries fell into one octant of the space, so no matter which
1672 	** plane we choose to split on, we're going to end up with a mostly full node. Where the
1673 	** data is pretty homogeneous (lots of duplicates) entries that are equidistant from the
1674 	** sides of the page union box can occasionally all end up in one place, leading
1675 	** to this condition.
1676 	*/
1677 	if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion))
1678 	{
1679 		/*
1680 		** Instead we split on center points and see if we do better.
1681 		** First calculate the average center point for each axis.
1682 		*/
1683 		double *avgCenter = palloc(ndims_pageunion * sizeof(double));
1684 
1685 		for (d = 0; d < ndims_pageunion; d++)
1686 			avgCenter[d] = 0.0;
1687 
1688 		POSTGIS_DEBUG(4, "[GIST] picksplit can't find good split axis, trying center point method");
1689 
1690 		for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
1691 		{
1692 			box_current = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1693 			for (d = 0; d < ndims_pageunion; d++)
1694 				avgCenter[d] += (GIDX_GET_MAX(box_current, d) + GIDX_GET_MIN(box_current, d)) / 2.0;
1695 		}
1696 		for (d = 0; d < ndims_pageunion; d++)
1697 		{
1698 			avgCenter[d] /= max_offset;
1699 			pos[BELOW(d)] = pos[ABOVE(d)] = 0; /* Re-initialize our counters. */
1700 			POSTGIS_DEBUGF(4, "[GIST] picksplit average center point[%d] = %.12g", d, avgCenter[d]);
1701 		}
1702 
1703 		/* For each of our entries... */
1704 		for (i = FirstOffsetNumber; i <= max_offset; i = OffsetNumberNext(i))
1705 		{
1706 			double center;
1707 			box_current = (GIDX *)DatumGetPointer(entryvec->vector[i].key);
1708 
1709 			for (d = 0; d < ndims_pageunion; d++)
1710 			{
1711 				center = (GIDX_GET_MIN(box_current, d) + GIDX_GET_MAX(box_current, d)) / 2.0;
1712 				if (center < avgCenter[d])
1713 					gserialized_gist_picksplit_addlist(
1714 					    list[BELOW(d)], &(box_union[BELOW(d)]), box_current, &(pos[BELOW(d)]), i);
1715 				else if (FPeq(center, avgCenter[d]))
1716 					if (pos[BELOW(d)] > pos[ABOVE(d)])
1717 						gserialized_gist_picksplit_addlist(list[ABOVE(d)],
1718 										   &(box_union[ABOVE(d)]),
1719 										   box_current,
1720 										   &(pos[ABOVE(d)]),
1721 										   i);
1722 					else
1723 						gserialized_gist_picksplit_addlist(list[BELOW(d)],
1724 										   &(box_union[BELOW(d)]),
1725 										   box_current,
1726 										   &(pos[BELOW(d)]),
1727 										   i);
1728 				else
1729 					gserialized_gist_picksplit_addlist(
1730 					    list[ABOVE(d)], &(box_union[ABOVE(d)]), box_current, &(pos[ABOVE(d)]), i);
1731 			}
1732 		}
1733 
1734 		/* Do we have a good disposition now? If not, screw it, just cut the node in half. */
1735 		if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion))
1736 		{
1737 			POSTGIS_DEBUG(4,
1738 				      "[GIST] picksplit still cannot find a good split! just cutting the node in half");
1739 			gserialized_gist_picksplit_fallback(entryvec, v);
1740 			PG_RETURN_POINTER(v);
1741 		}
1742 	}
1743 
1744 	/*
1745 	** Now, what splitting plane gives us the most even ratio of
1746 	** entries in our child pages? Since each split region has been apportioned entries
1747 	** against the same number of total entries, the axis that has the smallest maximum
1748 	** number of entries in its regions is the most evenly distributed.
1749 	** TODO: what if the distributions are equal in two or more axes?
1750 	*/
1751 	for (d = 0; d < ndims_pageunion; d++)
1752 	{
1753 		int posd = Max(pos[ABOVE(d)], pos[BELOW(d)]);
1754 		if (posd < posmin)
1755 		{
1756 			direction = d;
1757 			posmin = posd;
1758 		}
1759 	}
1760 	if (direction == -1 || posmin == entryvec->n)
1761 		elog(ERROR, "Error in building split, unable to determine split direction.");
1762 
1763 	POSTGIS_DEBUGF(3, "[GIST] 'picksplit' splitting on axis %d", direction);
1764 
1765 	gserialized_gist_picksplit_constructsplit(v,
1766 						  list[BELOW(direction)],
1767 						  pos[BELOW(direction)],
1768 						  &(box_union[BELOW(direction)]),
1769 						  list[ABOVE(direction)],
1770 						  pos[ABOVE(direction)],
1771 						  &(box_union[ABOVE(direction)]));
1772 
1773 	POSTGIS_DEBUGF(4, "[GIST] spl_ldatum: %s", gidx_to_string((GIDX *)v->spl_ldatum));
1774 	POSTGIS_DEBUGF(4, "[GIST] spl_rdatum: %s", gidx_to_string((GIDX *)v->spl_rdatum));
1775 
1776 	POSTGIS_DEBUGF(
1777 	    4,
1778 	    "[GIST] axis %d: parent range (%.12g, %.12g) left range (%.12g, %.12g), right range (%.12g, %.12g)",
1779 	    direction,
1780 	    GIDX_GET_MIN(box_pageunion, direction),
1781 	    GIDX_GET_MAX(box_pageunion, direction),
1782 	    GIDX_GET_MIN((GIDX *)v->spl_ldatum, direction),
1783 	    GIDX_GET_MAX((GIDX *)v->spl_ldatum, direction),
1784 	    GIDX_GET_MIN((GIDX *)v->spl_rdatum, direction),
1785 	    GIDX_GET_MAX((GIDX *)v->spl_rdatum, direction));
1786 
1787 	PG_RETURN_POINTER(v);
1788 }
1789 
1790 /*
1791 ** The GIDX key must be defined as a PostgreSQL type, even though it is only
1792 ** ever used internally. These no-op stubs are used to bind the type.
1793 */
1794 PG_FUNCTION_INFO_V1(gidx_in);
gidx_in(PG_FUNCTION_ARGS)1795 Datum gidx_in(PG_FUNCTION_ARGS)
1796 {
1797 	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("function gidx_in not implemented")));
1798 	PG_RETURN_POINTER(NULL);
1799 }
1800 
1801 PG_FUNCTION_INFO_V1(gidx_out);
gidx_out(PG_FUNCTION_ARGS)1802 Datum gidx_out(PG_FUNCTION_ARGS)
1803 {
1804 	GIDX *box = (GIDX *)PG_GETARG_POINTER(0);
1805 	char *result = gidx_to_string(box);
1806 	PG_RETURN_CSTRING(result);
1807 }
1808