1 /*-------------------------------------------------------------------------
2 *
3 * varlena.c
4 * Functions for the variable-length built-in types.
5 *
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/adt/varlena.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include <ctype.h>
18 #include <limits.h>
19
20 #include "access/hash.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_collation.h"
23 #include "catalog/pg_type.h"
24 #include "common/md5.h"
25 #include "lib/hyperloglog.h"
26 #include "libpq/pqformat.h"
27 #include "miscadmin.h"
28 #include "parser/scansup.h"
29 #include "port/pg_bswap.h"
30 #include "regex/regex.h"
31 #include "utils/builtins.h"
32 #include "utils/bytea.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/pg_locale.h"
36 #include "utils/sortsupport.h"
37 #include "utils/varlena.h"
38
39
40 /* GUC variable */
41 int bytea_output = BYTEA_OUTPUT_HEX;
42
43 typedef struct varlena unknown;
44 typedef struct varlena VarString;
45
46 typedef struct
47 {
48 bool use_wchar; /* T if multibyte encoding */
49 char *str1; /* use these if not use_wchar */
50 char *str2; /* note: these point to original texts */
51 pg_wchar *wstr1; /* use these if use_wchar */
52 pg_wchar *wstr2; /* note: these are palloc'd */
53 int len1; /* string lengths in logical characters */
54 int len2;
55 /* Skip table for Boyer-Moore-Horspool search algorithm: */
56 int skiptablemask; /* mask for ANDing with skiptable subscripts */
57 int skiptable[256]; /* skip distance for given mismatched char */
58 } TextPositionState;
59
60 typedef struct
61 {
62 char *buf1; /* 1st string, or abbreviation original string
63 * buf */
64 char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
65 int buflen1;
66 int buflen2;
67 int last_len1; /* Length of last buf1 string/strxfrm() input */
68 int last_len2; /* Length of last buf2 string/strxfrm() blob */
69 int last_returned; /* Last comparison result (cache) */
70 bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
71 bool collate_c;
72 bool bpchar; /* Sorting bpchar, not varchar/text/bytea? */
73 hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
74 hyperLogLogState full_card; /* Full key cardinality state */
75 double prop_card; /* Required cardinality proportion */
76 pg_locale_t locale;
77 } VarStringSortSupport;
78
79 /*
80 * This should be large enough that most strings will fit, but small enough
81 * that we feel comfortable putting it on the stack
82 */
83 #define TEXTBUFLEN 1024
84
85 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
86 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
87 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
88 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
89 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
90
91 #define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
92 #define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
93
94 static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
95 static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
96 static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
97 static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
98 static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
99 static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
100 static int32 text_length(Datum str);
101 static text *text_catenate(text *t1, text *t2);
102 static text *text_substring(Datum str,
103 int32 start,
104 int32 length,
105 bool length_not_specified);
106 static text *text_overlay(text *t1, text *t2, int sp, int sl);
107 static int text_position(text *t1, text *t2);
108 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
109 static int text_position_next(int start_pos, TextPositionState *state);
110 static void text_position_cleanup(TextPositionState *state);
111 static int text_cmp(text *arg1, text *arg2, Oid collid);
112 static bytea *bytea_catenate(bytea *t1, bytea *t2);
113 static bytea *bytea_substring(Datum str,
114 int S,
115 int L,
116 bool length_not_specified);
117 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
118 static void appendStringInfoText(StringInfo str, const text *t);
119 static Datum text_to_array_internal(PG_FUNCTION_ARGS);
120 static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
121 const char *fldsep, const char *null_string);
122 static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
123 static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
124 int *value);
125 static const char *text_format_parse_format(const char *start_ptr,
126 const char *end_ptr,
127 int *argpos, int *widthpos,
128 int *flags, int *width);
129 static void text_format_string_conversion(StringInfo buf, char conversion,
130 FmgrInfo *typOutputInfo,
131 Datum value, bool isNull,
132 int flags, int width);
133 static void text_format_append_string(StringInfo buf, const char *str,
134 int flags, int width);
135
136
137 /*****************************************************************************
138 * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
139 *****************************************************************************/
140
141 /*
142 * cstring_to_text
143 *
144 * Create a text value from a null-terminated C string.
145 *
146 * The new text value is freshly palloc'd with a full-size VARHDR.
147 */
148 text *
cstring_to_text(const char * s)149 cstring_to_text(const char *s)
150 {
151 return cstring_to_text_with_len(s, strlen(s));
152 }
153
154 /*
155 * cstring_to_text_with_len
156 *
157 * Same as cstring_to_text except the caller specifies the string length;
158 * the string need not be null_terminated.
159 */
160 text *
cstring_to_text_with_len(const char * s,int len)161 cstring_to_text_with_len(const char *s, int len)
162 {
163 text *result = (text *) palloc(len + VARHDRSZ);
164
165 SET_VARSIZE(result, len + VARHDRSZ);
166 memcpy(VARDATA(result), s, len);
167
168 return result;
169 }
170
171 /*
172 * text_to_cstring
173 *
174 * Create a palloc'd, null-terminated C string from a text value.
175 *
176 * We support being passed a compressed or toasted text value.
177 * This is a bit bogus since such values shouldn't really be referred to as
178 * "text *", but it seems useful for robustness. If we didn't handle that
179 * case here, we'd need another routine that did, anyway.
180 */
181 char *
text_to_cstring(const text * t)182 text_to_cstring(const text *t)
183 {
184 /* must cast away the const, unfortunately */
185 text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
186 int len = VARSIZE_ANY_EXHDR(tunpacked);
187 char *result;
188
189 result = (char *) palloc(len + 1);
190 memcpy(result, VARDATA_ANY(tunpacked), len);
191 result[len] = '\0';
192
193 if (tunpacked != t)
194 pfree(tunpacked);
195
196 return result;
197 }
198
199 /*
200 * text_to_cstring_buffer
201 *
202 * Copy a text value into a caller-supplied buffer of size dst_len.
203 *
204 * The text string is truncated if necessary to fit. The result is
205 * guaranteed null-terminated (unless dst_len == 0).
206 *
207 * We support being passed a compressed or toasted text value.
208 * This is a bit bogus since such values shouldn't really be referred to as
209 * "text *", but it seems useful for robustness. If we didn't handle that
210 * case here, we'd need another routine that did, anyway.
211 */
212 void
text_to_cstring_buffer(const text * src,char * dst,size_t dst_len)213 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
214 {
215 /* must cast away the const, unfortunately */
216 text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
217 size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
218
219 if (dst_len > 0)
220 {
221 dst_len--;
222 if (dst_len >= src_len)
223 dst_len = src_len;
224 else /* ensure truncation is encoding-safe */
225 dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
226 memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
227 dst[dst_len] = '\0';
228 }
229
230 if (srcunpacked != src)
231 pfree(srcunpacked);
232 }
233
234
235 /*****************************************************************************
236 * USER I/O ROUTINES *
237 *****************************************************************************/
238
239
240 #define VAL(CH) ((CH) - '0')
241 #define DIG(VAL) ((VAL) + '0')
242
243 /*
244 * byteain - converts from printable representation of byte array
245 *
246 * Non-printable characters must be passed as '\nnn' (octal) and are
247 * converted to internal form. '\' must be passed as '\\'.
248 * ereport(ERROR, ...) if bad form.
249 *
250 * BUGS:
251 * The input is scanned twice.
252 * The error checking of input is minimal.
253 */
254 Datum
byteain(PG_FUNCTION_ARGS)255 byteain(PG_FUNCTION_ARGS)
256 {
257 char *inputText = PG_GETARG_CSTRING(0);
258 char *tp;
259 char *rp;
260 int bc;
261 bytea *result;
262
263 /* Recognize hex input */
264 if (inputText[0] == '\\' && inputText[1] == 'x')
265 {
266 size_t len = strlen(inputText);
267
268 bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
269 result = palloc(bc);
270 bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
271 SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
272
273 PG_RETURN_BYTEA_P(result);
274 }
275
276 /* Else, it's the traditional escaped style */
277 for (bc = 0, tp = inputText; *tp != '\0'; bc++)
278 {
279 if (tp[0] != '\\')
280 tp++;
281 else if ((tp[0] == '\\') &&
282 (tp[1] >= '0' && tp[1] <= '3') &&
283 (tp[2] >= '0' && tp[2] <= '7') &&
284 (tp[3] >= '0' && tp[3] <= '7'))
285 tp += 4;
286 else if ((tp[0] == '\\') &&
287 (tp[1] == '\\'))
288 tp += 2;
289 else
290 {
291 /*
292 * one backslash, not followed by another or ### valid octal
293 */
294 ereport(ERROR,
295 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
296 errmsg("invalid input syntax for type %s", "bytea")));
297 }
298 }
299
300 bc += VARHDRSZ;
301
302 result = (bytea *) palloc(bc);
303 SET_VARSIZE(result, bc);
304
305 tp = inputText;
306 rp = VARDATA(result);
307 while (*tp != '\0')
308 {
309 if (tp[0] != '\\')
310 *rp++ = *tp++;
311 else if ((tp[0] == '\\') &&
312 (tp[1] >= '0' && tp[1] <= '3') &&
313 (tp[2] >= '0' && tp[2] <= '7') &&
314 (tp[3] >= '0' && tp[3] <= '7'))
315 {
316 bc = VAL(tp[1]);
317 bc <<= 3;
318 bc += VAL(tp[2]);
319 bc <<= 3;
320 *rp++ = bc + VAL(tp[3]);
321
322 tp += 4;
323 }
324 else if ((tp[0] == '\\') &&
325 (tp[1] == '\\'))
326 {
327 *rp++ = '\\';
328 tp += 2;
329 }
330 else
331 {
332 /*
333 * We should never get here. The first pass should not allow it.
334 */
335 ereport(ERROR,
336 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
337 errmsg("invalid input syntax for type %s", "bytea")));
338 }
339 }
340
341 PG_RETURN_BYTEA_P(result);
342 }
343
344 /*
345 * byteaout - converts to printable representation of byte array
346 *
347 * In the traditional escaped format, non-printable characters are
348 * printed as '\nnn' (octal) and '\' as '\\'.
349 */
350 Datum
byteaout(PG_FUNCTION_ARGS)351 byteaout(PG_FUNCTION_ARGS)
352 {
353 bytea *vlena = PG_GETARG_BYTEA_PP(0);
354 char *result;
355 char *rp;
356
357 if (bytea_output == BYTEA_OUTPUT_HEX)
358 {
359 /* Print hex format */
360 rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
361 *rp++ = '\\';
362 *rp++ = 'x';
363 rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
364 }
365 else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
366 {
367 /* Print traditional escaped format */
368 char *vp;
369 int len;
370 int i;
371
372 len = 1; /* empty string has 1 char */
373 vp = VARDATA_ANY(vlena);
374 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
375 {
376 if (*vp == '\\')
377 len += 2;
378 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
379 len += 4;
380 else
381 len++;
382 }
383 rp = result = (char *) palloc(len);
384 vp = VARDATA_ANY(vlena);
385 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
386 {
387 if (*vp == '\\')
388 {
389 *rp++ = '\\';
390 *rp++ = '\\';
391 }
392 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
393 {
394 int val; /* holds unprintable chars */
395
396 val = *vp;
397 rp[0] = '\\';
398 rp[3] = DIG(val & 07);
399 val >>= 3;
400 rp[2] = DIG(val & 07);
401 val >>= 3;
402 rp[1] = DIG(val & 03);
403 rp += 4;
404 }
405 else
406 *rp++ = *vp;
407 }
408 }
409 else
410 {
411 elog(ERROR, "unrecognized bytea_output setting: %d",
412 bytea_output);
413 rp = result = NULL; /* keep compiler quiet */
414 }
415 *rp = '\0';
416 PG_RETURN_CSTRING(result);
417 }
418
419 /*
420 * bytearecv - converts external binary format to bytea
421 */
422 Datum
bytearecv(PG_FUNCTION_ARGS)423 bytearecv(PG_FUNCTION_ARGS)
424 {
425 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
426 bytea *result;
427 int nbytes;
428
429 nbytes = buf->len - buf->cursor;
430 result = (bytea *) palloc(nbytes + VARHDRSZ);
431 SET_VARSIZE(result, nbytes + VARHDRSZ);
432 pq_copymsgbytes(buf, VARDATA(result), nbytes);
433 PG_RETURN_BYTEA_P(result);
434 }
435
436 /*
437 * byteasend - converts bytea to binary format
438 *
439 * This is a special case: just copy the input...
440 */
441 Datum
byteasend(PG_FUNCTION_ARGS)442 byteasend(PG_FUNCTION_ARGS)
443 {
444 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
445
446 PG_RETURN_BYTEA_P(vlena);
447 }
448
449 Datum
bytea_string_agg_transfn(PG_FUNCTION_ARGS)450 bytea_string_agg_transfn(PG_FUNCTION_ARGS)
451 {
452 StringInfo state;
453
454 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
455
456 /* Append the value unless null. */
457 if (!PG_ARGISNULL(1))
458 {
459 bytea *value = PG_GETARG_BYTEA_PP(1);
460
461 /* On the first time through, we ignore the delimiter. */
462 if (state == NULL)
463 state = makeStringAggState(fcinfo);
464 else if (!PG_ARGISNULL(2))
465 {
466 bytea *delim = PG_GETARG_BYTEA_PP(2);
467
468 appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
469 }
470
471 appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
472 }
473
474 /*
475 * The transition type for string_agg() is declared to be "internal",
476 * which is a pass-by-value type the same size as a pointer.
477 */
478 PG_RETURN_POINTER(state);
479 }
480
481 Datum
bytea_string_agg_finalfn(PG_FUNCTION_ARGS)482 bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
483 {
484 StringInfo state;
485
486 /* cannot be called directly because of internal-type argument */
487 Assert(AggCheckCallContext(fcinfo, NULL));
488
489 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
490
491 if (state != NULL)
492 {
493 bytea *result;
494
495 result = (bytea *) palloc(state->len + VARHDRSZ);
496 SET_VARSIZE(result, state->len + VARHDRSZ);
497 memcpy(VARDATA(result), state->data, state->len);
498 PG_RETURN_BYTEA_P(result);
499 }
500 else
501 PG_RETURN_NULL();
502 }
503
504 /*
505 * textin - converts "..." to internal representation
506 */
507 Datum
textin(PG_FUNCTION_ARGS)508 textin(PG_FUNCTION_ARGS)
509 {
510 char *inputText = PG_GETARG_CSTRING(0);
511
512 PG_RETURN_TEXT_P(cstring_to_text(inputText));
513 }
514
515 /*
516 * textout - converts internal representation to "..."
517 */
518 Datum
textout(PG_FUNCTION_ARGS)519 textout(PG_FUNCTION_ARGS)
520 {
521 Datum txt = PG_GETARG_DATUM(0);
522
523 PG_RETURN_CSTRING(TextDatumGetCString(txt));
524 }
525
526 /*
527 * textrecv - converts external binary format to text
528 */
529 Datum
textrecv(PG_FUNCTION_ARGS)530 textrecv(PG_FUNCTION_ARGS)
531 {
532 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
533 text *result;
534 char *str;
535 int nbytes;
536
537 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
538
539 result = cstring_to_text_with_len(str, nbytes);
540 pfree(str);
541 PG_RETURN_TEXT_P(result);
542 }
543
544 /*
545 * textsend - converts text to binary format
546 */
547 Datum
textsend(PG_FUNCTION_ARGS)548 textsend(PG_FUNCTION_ARGS)
549 {
550 text *t = PG_GETARG_TEXT_PP(0);
551 StringInfoData buf;
552
553 pq_begintypsend(&buf);
554 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
555 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
556 }
557
558
559 /*
560 * unknownin - converts "..." to internal representation
561 */
562 Datum
unknownin(PG_FUNCTION_ARGS)563 unknownin(PG_FUNCTION_ARGS)
564 {
565 char *str = PG_GETARG_CSTRING(0);
566
567 /* representation is same as cstring */
568 PG_RETURN_CSTRING(pstrdup(str));
569 }
570
571 /*
572 * unknownout - converts internal representation to "..."
573 */
574 Datum
unknownout(PG_FUNCTION_ARGS)575 unknownout(PG_FUNCTION_ARGS)
576 {
577 /* representation is same as cstring */
578 char *str = PG_GETARG_CSTRING(0);
579
580 PG_RETURN_CSTRING(pstrdup(str));
581 }
582
583 /*
584 * unknownrecv - converts external binary format to unknown
585 */
586 Datum
unknownrecv(PG_FUNCTION_ARGS)587 unknownrecv(PG_FUNCTION_ARGS)
588 {
589 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
590 char *str;
591 int nbytes;
592
593 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
594 /* representation is same as cstring */
595 PG_RETURN_CSTRING(str);
596 }
597
598 /*
599 * unknownsend - converts unknown to binary format
600 */
601 Datum
unknownsend(PG_FUNCTION_ARGS)602 unknownsend(PG_FUNCTION_ARGS)
603 {
604 /* representation is same as cstring */
605 char *str = PG_GETARG_CSTRING(0);
606 StringInfoData buf;
607
608 pq_begintypsend(&buf);
609 pq_sendtext(&buf, str, strlen(str));
610 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
611 }
612
613
614 /* ========== PUBLIC ROUTINES ========== */
615
616 /*
617 * textlen -
618 * returns the logical length of a text*
619 * (which is less than the VARSIZE of the text*)
620 */
621 Datum
textlen(PG_FUNCTION_ARGS)622 textlen(PG_FUNCTION_ARGS)
623 {
624 Datum str = PG_GETARG_DATUM(0);
625
626 /* try to avoid decompressing argument */
627 PG_RETURN_INT32(text_length(str));
628 }
629
630 /*
631 * text_length -
632 * Does the real work for textlen()
633 *
634 * This is broken out so it can be called directly by other string processing
635 * functions. Note that the argument is passed as a Datum, to indicate that
636 * it may still be in compressed form. We can avoid decompressing it at all
637 * in some cases.
638 */
639 static int32
text_length(Datum str)640 text_length(Datum str)
641 {
642 /* fastpath when max encoding length is one */
643 if (pg_database_encoding_max_length() == 1)
644 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
645 else
646 {
647 text *t = DatumGetTextPP(str);
648
649 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
650 VARSIZE_ANY_EXHDR(t)));
651 }
652 }
653
654 /*
655 * textoctetlen -
656 * returns the physical length of a text*
657 * (which is less than the VARSIZE of the text*)
658 */
659 Datum
textoctetlen(PG_FUNCTION_ARGS)660 textoctetlen(PG_FUNCTION_ARGS)
661 {
662 Datum str = PG_GETARG_DATUM(0);
663
664 /* We need not detoast the input at all */
665 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
666 }
667
668 /*
669 * textcat -
670 * takes two text* and returns a text* that is the concatenation of
671 * the two.
672 *
673 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
674 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
675 * Allocate space for output in all cases.
676 * XXX - thomas 1997-07-10
677 */
678 Datum
textcat(PG_FUNCTION_ARGS)679 textcat(PG_FUNCTION_ARGS)
680 {
681 text *t1 = PG_GETARG_TEXT_PP(0);
682 text *t2 = PG_GETARG_TEXT_PP(1);
683
684 PG_RETURN_TEXT_P(text_catenate(t1, t2));
685 }
686
687 /*
688 * text_catenate
689 * Guts of textcat(), broken out so it can be used by other functions
690 *
691 * Arguments can be in short-header form, but not compressed or out-of-line
692 */
693 static text *
text_catenate(text * t1,text * t2)694 text_catenate(text *t1, text *t2)
695 {
696 text *result;
697 int len1,
698 len2,
699 len;
700 char *ptr;
701
702 len1 = VARSIZE_ANY_EXHDR(t1);
703 len2 = VARSIZE_ANY_EXHDR(t2);
704
705 /* paranoia ... probably should throw error instead? */
706 if (len1 < 0)
707 len1 = 0;
708 if (len2 < 0)
709 len2 = 0;
710
711 len = len1 + len2 + VARHDRSZ;
712 result = (text *) palloc(len);
713
714 /* Set size of result string... */
715 SET_VARSIZE(result, len);
716
717 /* Fill data field of result string... */
718 ptr = VARDATA(result);
719 if (len1 > 0)
720 memcpy(ptr, VARDATA_ANY(t1), len1);
721 if (len2 > 0)
722 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
723
724 return result;
725 }
726
727 /*
728 * charlen_to_bytelen()
729 * Compute the number of bytes occupied by n characters starting at *p
730 *
731 * It is caller's responsibility that there actually are n characters;
732 * the string need not be null-terminated.
733 */
734 static int
charlen_to_bytelen(const char * p,int n)735 charlen_to_bytelen(const char *p, int n)
736 {
737 if (pg_database_encoding_max_length() == 1)
738 {
739 /* Optimization for single-byte encodings */
740 return n;
741 }
742 else
743 {
744 const char *s;
745
746 for (s = p; n > 0; n--)
747 s += pg_mblen(s);
748
749 return s - p;
750 }
751 }
752
753 /*
754 * text_substr()
755 * Return a substring starting at the specified position.
756 * - thomas 1997-12-31
757 *
758 * Input:
759 * - string
760 * - starting position (is one-based)
761 * - string length
762 *
763 * If the starting position is zero or less, then return from the start of the string
764 * adjusting the length to be consistent with the "negative start" per SQL.
765 * If the length is less than zero, return the remaining string.
766 *
767 * Added multibyte support.
768 * - Tatsuo Ishii 1998-4-21
769 * Changed behavior if starting position is less than one to conform to SQL behavior.
770 * Formerly returned the entire string; now returns a portion.
771 * - Thomas Lockhart 1998-12-10
772 * Now uses faster TOAST-slicing interface
773 * - John Gray 2002-02-22
774 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
775 * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
776 * error; if E < 1, return '', not entire string). Fixed MB related bug when
777 * S > LC and < LC + 4 sometimes garbage characters are returned.
778 * - Joe Conway 2002-08-10
779 */
780 Datum
text_substr(PG_FUNCTION_ARGS)781 text_substr(PG_FUNCTION_ARGS)
782 {
783 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
784 PG_GETARG_INT32(1),
785 PG_GETARG_INT32(2),
786 false));
787 }
788
789 /*
790 * text_substr_no_len -
791 * Wrapper to avoid opr_sanity failure due to
792 * one function accepting a different number of args.
793 */
794 Datum
text_substr_no_len(PG_FUNCTION_ARGS)795 text_substr_no_len(PG_FUNCTION_ARGS)
796 {
797 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
798 PG_GETARG_INT32(1),
799 -1, true));
800 }
801
802 /*
803 * text_substring -
804 * Does the real work for text_substr() and text_substr_no_len()
805 *
806 * This is broken out so it can be called directly by other string processing
807 * functions. Note that the argument is passed as a Datum, to indicate that
808 * it may still be in compressed/toasted form. We can avoid detoasting all
809 * of it in some cases.
810 *
811 * The result is always a freshly palloc'd datum.
812 */
813 static text *
text_substring(Datum str,int32 start,int32 length,bool length_not_specified)814 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
815 {
816 int32 eml = pg_database_encoding_max_length();
817 int32 S = start; /* start position */
818 int32 S1; /* adjusted start position */
819 int32 L1; /* adjusted substring length */
820
821 /* life is easy if the encoding max length is 1 */
822 if (eml == 1)
823 {
824 S1 = Max(S, 1);
825
826 if (length_not_specified) /* special case - get length to end of
827 * string */
828 L1 = -1;
829 else
830 {
831 /* end position */
832 int E = S + length;
833
834 /*
835 * A negative value for L is the only way for the end position to
836 * be before the start. SQL99 says to throw an error.
837 */
838 if (E < S)
839 ereport(ERROR,
840 (errcode(ERRCODE_SUBSTRING_ERROR),
841 errmsg("negative substring length not allowed")));
842
843 /*
844 * A zero or negative value for the end position can happen if the
845 * start was negative or one. SQL99 says to return a zero-length
846 * string.
847 */
848 if (E < 1)
849 return cstring_to_text("");
850
851 L1 = E - S1;
852 }
853
854 /*
855 * If the start position is past the end of the string, SQL99 says to
856 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
857 * that for us. Convert to zero-based starting position
858 */
859 return DatumGetTextPSlice(str, S1 - 1, L1);
860 }
861 else if (eml > 1)
862 {
863 /*
864 * When encoding max length is > 1, we can't get LC without
865 * detoasting, so we'll grab a conservatively large slice now and go
866 * back later to do the right thing
867 */
868 int32 slice_start;
869 int32 slice_size;
870 int32 slice_strlen;
871 text *slice;
872 int32 E1;
873 int32 i;
874 char *p;
875 char *s;
876 text *ret;
877
878 /*
879 * if S is past the end of the string, the tuple toaster will return a
880 * zero-length string to us
881 */
882 S1 = Max(S, 1);
883
884 /*
885 * We need to start at position zero because there is no way to know
886 * in advance which byte offset corresponds to the supplied start
887 * position.
888 */
889 slice_start = 0;
890
891 if (length_not_specified) /* special case - get length to end of
892 * string */
893 slice_size = L1 = -1;
894 else
895 {
896 int E = S + length;
897
898 /*
899 * A negative value for L is the only way for the end position to
900 * be before the start. SQL99 says to throw an error.
901 */
902 if (E < S)
903 ereport(ERROR,
904 (errcode(ERRCODE_SUBSTRING_ERROR),
905 errmsg("negative substring length not allowed")));
906
907 /*
908 * A zero or negative value for the end position can happen if the
909 * start was negative or one. SQL99 says to return a zero-length
910 * string.
911 */
912 if (E < 1)
913 return cstring_to_text("");
914
915 /*
916 * if E is past the end of the string, the tuple toaster will
917 * truncate the length for us
918 */
919 L1 = E - S1;
920
921 /*
922 * Total slice size in bytes can't be any longer than the start
923 * position plus substring length times the encoding max length.
924 */
925 slice_size = (S1 + L1) * eml;
926 }
927
928 /*
929 * If we're working with an untoasted source, no need to do an extra
930 * copying step.
931 */
932 if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
933 VARATT_IS_EXTERNAL(DatumGetPointer(str)))
934 slice = DatumGetTextPSlice(str, slice_start, slice_size);
935 else
936 slice = (text *) DatumGetPointer(str);
937
938 /* see if we got back an empty string */
939 if (VARSIZE_ANY_EXHDR(slice) == 0)
940 {
941 if (slice != (text *) DatumGetPointer(str))
942 pfree(slice);
943 return cstring_to_text("");
944 }
945
946 /* Now we can get the actual length of the slice in MB characters */
947 slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
948 VARSIZE_ANY_EXHDR(slice));
949
950 /*
951 * Check that the start position wasn't > slice_strlen. If so, SQL99
952 * says to return a zero-length string.
953 */
954 if (S1 > slice_strlen)
955 {
956 if (slice != (text *) DatumGetPointer(str))
957 pfree(slice);
958 return cstring_to_text("");
959 }
960
961 /*
962 * Adjust L1 and E1 now that we know the slice string length. Again
963 * remember that S1 is one based, and slice_start is zero based.
964 */
965 if (L1 > -1)
966 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
967 else
968 E1 = slice_start + 1 + slice_strlen;
969
970 /*
971 * Find the start position in the slice; remember S1 is not zero based
972 */
973 p = VARDATA_ANY(slice);
974 for (i = 0; i < S1 - 1; i++)
975 p += pg_mblen(p);
976
977 /* hang onto a pointer to our start position */
978 s = p;
979
980 /*
981 * Count the actual bytes used by the substring of the requested
982 * length.
983 */
984 for (i = S1; i < E1; i++)
985 p += pg_mblen(p);
986
987 ret = (text *) palloc(VARHDRSZ + (p - s));
988 SET_VARSIZE(ret, VARHDRSZ + (p - s));
989 memcpy(VARDATA(ret), s, (p - s));
990
991 if (slice != (text *) DatumGetPointer(str))
992 pfree(slice);
993
994 return ret;
995 }
996 else
997 elog(ERROR, "invalid backend encoding: encoding max length < 1");
998
999 /* not reached: suppress compiler warning */
1000 return NULL;
1001 }
1002
1003 /*
1004 * textoverlay
1005 * Replace specified substring of first string with second
1006 *
1007 * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1008 * This code is a direct implementation of what the standard says.
1009 */
1010 Datum
textoverlay(PG_FUNCTION_ARGS)1011 textoverlay(PG_FUNCTION_ARGS)
1012 {
1013 text *t1 = PG_GETARG_TEXT_PP(0);
1014 text *t2 = PG_GETARG_TEXT_PP(1);
1015 int sp = PG_GETARG_INT32(2); /* substring start position */
1016 int sl = PG_GETARG_INT32(3); /* substring length */
1017
1018 PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1019 }
1020
1021 Datum
textoverlay_no_len(PG_FUNCTION_ARGS)1022 textoverlay_no_len(PG_FUNCTION_ARGS)
1023 {
1024 text *t1 = PG_GETARG_TEXT_PP(0);
1025 text *t2 = PG_GETARG_TEXT_PP(1);
1026 int sp = PG_GETARG_INT32(2); /* substring start position */
1027 int sl;
1028
1029 sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1030 PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1031 }
1032
1033 static text *
text_overlay(text * t1,text * t2,int sp,int sl)1034 text_overlay(text *t1, text *t2, int sp, int sl)
1035 {
1036 text *result;
1037 text *s1;
1038 text *s2;
1039 int sp_pl_sl;
1040
1041 /*
1042 * Check for possible integer-overflow cases. For negative sp, throw a
1043 * "substring length" error because that's what should be expected
1044 * according to the spec's definition of OVERLAY().
1045 */
1046 if (sp <= 0)
1047 ereport(ERROR,
1048 (errcode(ERRCODE_SUBSTRING_ERROR),
1049 errmsg("negative substring length not allowed")));
1050 sp_pl_sl = sp + sl;
1051 if (sp_pl_sl <= sl)
1052 ereport(ERROR,
1053 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1054 errmsg("integer out of range")));
1055
1056 s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1057 s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1058 result = text_catenate(s1, t2);
1059 result = text_catenate(result, s2);
1060
1061 return result;
1062 }
1063
1064 /*
1065 * textpos -
1066 * Return the position of the specified substring.
1067 * Implements the SQL POSITION() function.
1068 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1069 * - thomas 1997-07-27
1070 */
1071 Datum
textpos(PG_FUNCTION_ARGS)1072 textpos(PG_FUNCTION_ARGS)
1073 {
1074 text *str = PG_GETARG_TEXT_PP(0);
1075 text *search_str = PG_GETARG_TEXT_PP(1);
1076
1077 PG_RETURN_INT32((int32) text_position(str, search_str));
1078 }
1079
1080 /*
1081 * text_position -
1082 * Does the real work for textpos()
1083 *
1084 * Inputs:
1085 * t1 - string to be searched
1086 * t2 - pattern to match within t1
1087 * Result:
1088 * Character index of the first matched char, starting from 1,
1089 * or 0 if no match.
1090 *
1091 * This is broken out so it can be called directly by other string processing
1092 * functions.
1093 */
1094 static int
text_position(text * t1,text * t2)1095 text_position(text *t1, text *t2)
1096 {
1097 TextPositionState state;
1098 int result;
1099
1100 text_position_setup(t1, t2, &state);
1101 result = text_position_next(1, &state);
1102 text_position_cleanup(&state);
1103 return result;
1104 }
1105
1106
1107 /*
1108 * text_position_setup, text_position_next, text_position_cleanup -
1109 * Component steps of text_position()
1110 *
1111 * These are broken out so that a string can be efficiently searched for
1112 * multiple occurrences of the same pattern. text_position_next may be
1113 * called multiple times with increasing values of start_pos, which is
1114 * the 1-based character position to start the search from. The "state"
1115 * variable is normally just a local variable in the caller.
1116 */
1117
1118 static void
text_position_setup(text * t1,text * t2,TextPositionState * state)1119 text_position_setup(text *t1, text *t2, TextPositionState *state)
1120 {
1121 int len1 = VARSIZE_ANY_EXHDR(t1);
1122 int len2 = VARSIZE_ANY_EXHDR(t2);
1123
1124 if (pg_database_encoding_max_length() == 1)
1125 {
1126 /* simple case - single byte encoding */
1127 state->use_wchar = false;
1128 state->str1 = VARDATA_ANY(t1);
1129 state->str2 = VARDATA_ANY(t2);
1130 state->len1 = len1;
1131 state->len2 = len2;
1132 }
1133 else
1134 {
1135 /* not as simple - multibyte encoding */
1136 pg_wchar *p1,
1137 *p2;
1138
1139 p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
1140 len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
1141 p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
1142 len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
1143
1144 state->use_wchar = true;
1145 state->wstr1 = p1;
1146 state->wstr2 = p2;
1147 state->len1 = len1;
1148 state->len2 = len2;
1149 }
1150
1151 /*
1152 * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1153 * notes we use the terminology that the "haystack" is the string to be
1154 * searched (t1) and the "needle" is the pattern being sought (t2).
1155 *
1156 * If the needle is empty or bigger than the haystack then there is no
1157 * point in wasting cycles initializing the table. We also choose not to
1158 * use B-M-H for needles of length 1, since the skip table can't possibly
1159 * save anything in that case.
1160 */
1161 if (len1 >= len2 && len2 > 1)
1162 {
1163 int searchlength = len1 - len2;
1164 int skiptablemask;
1165 int last;
1166 int i;
1167
1168 /*
1169 * First we must determine how much of the skip table to use. The
1170 * declaration of TextPositionState allows up to 256 elements, but for
1171 * short search problems we don't really want to have to initialize so
1172 * many elements --- it would take too long in comparison to the
1173 * actual search time. So we choose a useful skip table size based on
1174 * the haystack length minus the needle length. The closer the needle
1175 * length is to the haystack length the less useful skipping becomes.
1176 *
1177 * Note: since we use bit-masking to select table elements, the skip
1178 * table size MUST be a power of 2, and so the mask must be 2^N-1.
1179 */
1180 if (searchlength < 16)
1181 skiptablemask = 3;
1182 else if (searchlength < 64)
1183 skiptablemask = 7;
1184 else if (searchlength < 128)
1185 skiptablemask = 15;
1186 else if (searchlength < 512)
1187 skiptablemask = 31;
1188 else if (searchlength < 2048)
1189 skiptablemask = 63;
1190 else if (searchlength < 4096)
1191 skiptablemask = 127;
1192 else
1193 skiptablemask = 255;
1194 state->skiptablemask = skiptablemask;
1195
1196 /*
1197 * Initialize the skip table. We set all elements to the needle
1198 * length, since this is the correct skip distance for any character
1199 * not found in the needle.
1200 */
1201 for (i = 0; i <= skiptablemask; i++)
1202 state->skiptable[i] = len2;
1203
1204 /*
1205 * Now examine the needle. For each character except the last one,
1206 * set the corresponding table element to the appropriate skip
1207 * distance. Note that when two characters share the same skip table
1208 * entry, the one later in the needle must determine the skip
1209 * distance.
1210 */
1211 last = len2 - 1;
1212
1213 if (!state->use_wchar)
1214 {
1215 const char *str2 = state->str2;
1216
1217 for (i = 0; i < last; i++)
1218 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1219 }
1220 else
1221 {
1222 const pg_wchar *wstr2 = state->wstr2;
1223
1224 for (i = 0; i < last; i++)
1225 state->skiptable[wstr2[i] & skiptablemask] = last - i;
1226 }
1227 }
1228 }
1229
1230 static int
text_position_next(int start_pos,TextPositionState * state)1231 text_position_next(int start_pos, TextPositionState *state)
1232 {
1233 int haystack_len = state->len1;
1234 int needle_len = state->len2;
1235 int skiptablemask = state->skiptablemask;
1236
1237 Assert(start_pos > 0); /* else caller error */
1238
1239 if (needle_len <= 0)
1240 return start_pos; /* result for empty pattern */
1241
1242 start_pos--; /* adjust for zero based arrays */
1243
1244 /* Done if the needle can't possibly fit */
1245 if (haystack_len < start_pos + needle_len)
1246 return 0;
1247
1248 if (!state->use_wchar)
1249 {
1250 /* simple case - single byte encoding */
1251 const char *haystack = state->str1;
1252 const char *needle = state->str2;
1253 const char *haystack_end = &haystack[haystack_len];
1254 const char *hptr;
1255
1256 if (needle_len == 1)
1257 {
1258 /* No point in using B-M-H for a one-character needle */
1259 char nchar = *needle;
1260
1261 hptr = &haystack[start_pos];
1262 while (hptr < haystack_end)
1263 {
1264 if (*hptr == nchar)
1265 return hptr - haystack + 1;
1266 hptr++;
1267 }
1268 }
1269 else
1270 {
1271 const char *needle_last = &needle[needle_len - 1];
1272
1273 /* Start at startpos plus the length of the needle */
1274 hptr = &haystack[start_pos + needle_len - 1];
1275 while (hptr < haystack_end)
1276 {
1277 /* Match the needle scanning *backward* */
1278 const char *nptr;
1279 const char *p;
1280
1281 nptr = needle_last;
1282 p = hptr;
1283 while (*nptr == *p)
1284 {
1285 /* Matched it all? If so, return 1-based position */
1286 if (nptr == needle)
1287 return p - haystack + 1;
1288 nptr--, p--;
1289 }
1290
1291 /*
1292 * No match, so use the haystack char at hptr to decide how
1293 * far to advance. If the needle had any occurrence of that
1294 * character (or more precisely, one sharing the same
1295 * skiptable entry) before its last character, then we advance
1296 * far enough to align the last such needle character with
1297 * that haystack position. Otherwise we can advance by the
1298 * whole needle length.
1299 */
1300 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1301 }
1302 }
1303 }
1304 else
1305 {
1306 /* The multibyte char version. This works exactly the same way. */
1307 const pg_wchar *haystack = state->wstr1;
1308 const pg_wchar *needle = state->wstr2;
1309 const pg_wchar *haystack_end = &haystack[haystack_len];
1310 const pg_wchar *hptr;
1311
1312 if (needle_len == 1)
1313 {
1314 /* No point in using B-M-H for a one-character needle */
1315 pg_wchar nchar = *needle;
1316
1317 hptr = &haystack[start_pos];
1318 while (hptr < haystack_end)
1319 {
1320 if (*hptr == nchar)
1321 return hptr - haystack + 1;
1322 hptr++;
1323 }
1324 }
1325 else
1326 {
1327 const pg_wchar *needle_last = &needle[needle_len - 1];
1328
1329 /* Start at startpos plus the length of the needle */
1330 hptr = &haystack[start_pos + needle_len - 1];
1331 while (hptr < haystack_end)
1332 {
1333 /* Match the needle scanning *backward* */
1334 const pg_wchar *nptr;
1335 const pg_wchar *p;
1336
1337 nptr = needle_last;
1338 p = hptr;
1339 while (*nptr == *p)
1340 {
1341 /* Matched it all? If so, return 1-based position */
1342 if (nptr == needle)
1343 return p - haystack + 1;
1344 nptr--, p--;
1345 }
1346
1347 /*
1348 * No match, so use the haystack char at hptr to decide how
1349 * far to advance. If the needle had any occurrence of that
1350 * character (or more precisely, one sharing the same
1351 * skiptable entry) before its last character, then we advance
1352 * far enough to align the last such needle character with
1353 * that haystack position. Otherwise we can advance by the
1354 * whole needle length.
1355 */
1356 hptr += state->skiptable[*hptr & skiptablemask];
1357 }
1358 }
1359 }
1360
1361 return 0; /* not found */
1362 }
1363
1364 static void
text_position_cleanup(TextPositionState * state)1365 text_position_cleanup(TextPositionState *state)
1366 {
1367 if (state->use_wchar)
1368 {
1369 pfree(state->wstr1);
1370 pfree(state->wstr2);
1371 }
1372 }
1373
1374 /* varstr_cmp()
1375 * Comparison function for text strings with given lengths.
1376 * Includes locale support, but must copy strings to temporary memory
1377 * to allow null-termination for inputs to strcoll().
1378 * Returns an integer less than, equal to, or greater than zero, indicating
1379 * whether arg1 is less than, equal to, or greater than arg2.
1380 */
1381 int
varstr_cmp(char * arg1,int len1,char * arg2,int len2,Oid collid)1382 varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
1383 {
1384 int result;
1385
1386 /*
1387 * Unfortunately, there is no strncoll(), so in the non-C locale case we
1388 * have to do some memory copying. This turns out to be significantly
1389 * slower, so we optimize the case where LC_COLLATE is C. We also try to
1390 * optimize relatively-short strings by avoiding palloc/pfree overhead.
1391 */
1392 if (lc_collate_is_c(collid))
1393 {
1394 result = memcmp(arg1, arg2, Min(len1, len2));
1395 if ((result == 0) && (len1 != len2))
1396 result = (len1 < len2) ? -1 : 1;
1397 }
1398 else
1399 {
1400 char a1buf[TEXTBUFLEN];
1401 char a2buf[TEXTBUFLEN];
1402 char *a1p,
1403 *a2p;
1404 pg_locale_t mylocale = 0;
1405
1406 if (collid != DEFAULT_COLLATION_OID)
1407 {
1408 if (!OidIsValid(collid))
1409 {
1410 /*
1411 * This typically means that the parser could not resolve a
1412 * conflict of implicit collations, so report it that way.
1413 */
1414 ereport(ERROR,
1415 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1416 errmsg("could not determine which collation to use for string comparison"),
1417 errhint("Use the COLLATE clause to set the collation explicitly.")));
1418 }
1419 mylocale = pg_newlocale_from_collation(collid);
1420 }
1421
1422 /*
1423 * memcmp() can't tell us which of two unequal strings sorts first,
1424 * but it's a cheap way to tell if they're equal. Testing shows that
1425 * memcmp() followed by strcoll() is only trivially slower than
1426 * strcoll() by itself, so we don't lose much if this doesn't work out
1427 * very often, and if it does - for example, because there are many
1428 * equal strings in the input - then we win big by avoiding expensive
1429 * collation-aware comparisons.
1430 */
1431 if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1432 return 0;
1433
1434 #ifdef WIN32
1435 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1436 if (GetDatabaseEncoding() == PG_UTF8
1437 && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
1438 {
1439 int a1len;
1440 int a2len;
1441 int r;
1442
1443 if (len1 >= TEXTBUFLEN / 2)
1444 {
1445 a1len = len1 * 2 + 2;
1446 a1p = palloc(a1len);
1447 }
1448 else
1449 {
1450 a1len = TEXTBUFLEN;
1451 a1p = a1buf;
1452 }
1453 if (len2 >= TEXTBUFLEN / 2)
1454 {
1455 a2len = len2 * 2 + 2;
1456 a2p = palloc(a2len);
1457 }
1458 else
1459 {
1460 a2len = TEXTBUFLEN;
1461 a2p = a2buf;
1462 }
1463
1464 /* stupid Microsloth API does not work for zero-length input */
1465 if (len1 == 0)
1466 r = 0;
1467 else
1468 {
1469 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1470 (LPWSTR) a1p, a1len / 2);
1471 if (!r)
1472 ereport(ERROR,
1473 (errmsg("could not convert string to UTF-16: error code %lu",
1474 GetLastError())));
1475 }
1476 ((LPWSTR) a1p)[r] = 0;
1477
1478 if (len2 == 0)
1479 r = 0;
1480 else
1481 {
1482 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1483 (LPWSTR) a2p, a2len / 2);
1484 if (!r)
1485 ereport(ERROR,
1486 (errmsg("could not convert string to UTF-16: error code %lu",
1487 GetLastError())));
1488 }
1489 ((LPWSTR) a2p)[r] = 0;
1490
1491 errno = 0;
1492 #ifdef HAVE_LOCALE_T
1493 if (mylocale)
1494 result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
1495 else
1496 #endif
1497 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1498 if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1499 * headers */
1500 ereport(ERROR,
1501 (errmsg("could not compare Unicode strings: %m")));
1502
1503 /*
1504 * In some locales wcscoll() can claim that nonidentical strings
1505 * are equal. Believing that would be bad news for a number of
1506 * reasons, so we follow Perl's lead and sort "equal" strings
1507 * according to strcmp (on the UTF-8 representation).
1508 */
1509 if (result == 0)
1510 {
1511 result = memcmp(arg1, arg2, Min(len1, len2));
1512 if ((result == 0) && (len1 != len2))
1513 result = (len1 < len2) ? -1 : 1;
1514 }
1515
1516 if (a1p != a1buf)
1517 pfree(a1p);
1518 if (a2p != a2buf)
1519 pfree(a2p);
1520
1521 return result;
1522 }
1523 #endif /* WIN32 */
1524
1525 if (len1 >= TEXTBUFLEN)
1526 a1p = (char *) palloc(len1 + 1);
1527 else
1528 a1p = a1buf;
1529 if (len2 >= TEXTBUFLEN)
1530 a2p = (char *) palloc(len2 + 1);
1531 else
1532 a2p = a2buf;
1533
1534 memcpy(a1p, arg1, len1);
1535 a1p[len1] = '\0';
1536 memcpy(a2p, arg2, len2);
1537 a2p[len2] = '\0';
1538
1539 if (mylocale)
1540 {
1541 if (mylocale->provider == COLLPROVIDER_ICU)
1542 {
1543 #ifdef USE_ICU
1544 #ifdef HAVE_UCOL_STRCOLLUTF8
1545 if (GetDatabaseEncoding() == PG_UTF8)
1546 {
1547 UErrorCode status;
1548
1549 status = U_ZERO_ERROR;
1550 result = ucol_strcollUTF8(mylocale->info.icu.ucol,
1551 arg1, len1,
1552 arg2, len2,
1553 &status);
1554 if (U_FAILURE(status))
1555 ereport(ERROR,
1556 (errmsg("collation failed: %s", u_errorName(status))));
1557 }
1558 else
1559 #endif
1560 {
1561 int32_t ulen1,
1562 ulen2;
1563 UChar *uchar1,
1564 *uchar2;
1565
1566 ulen1 = icu_to_uchar(&uchar1, arg1, len1);
1567 ulen2 = icu_to_uchar(&uchar2, arg2, len2);
1568
1569 result = ucol_strcoll(mylocale->info.icu.ucol,
1570 uchar1, ulen1,
1571 uchar2, ulen2);
1572
1573 pfree(uchar1);
1574 pfree(uchar2);
1575 }
1576 #else /* not USE_ICU */
1577 /* shouldn't happen */
1578 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1579 #endif /* not USE_ICU */
1580 }
1581 else
1582 {
1583 #ifdef HAVE_LOCALE_T
1584 result = strcoll_l(a1p, a2p, mylocale->info.lt);
1585 #else
1586 /* shouldn't happen */
1587 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1588 #endif
1589 }
1590 }
1591 else
1592 result = strcoll(a1p, a2p);
1593
1594 /*
1595 * In some locales strcoll() can claim that nonidentical strings are
1596 * equal. Believing that would be bad news for a number of reasons,
1597 * so we follow Perl's lead and sort "equal" strings according to
1598 * strcmp().
1599 */
1600 if (result == 0)
1601 result = strcmp(a1p, a2p);
1602
1603 if (a1p != a1buf)
1604 pfree(a1p);
1605 if (a2p != a2buf)
1606 pfree(a2p);
1607 }
1608
1609 return result;
1610 }
1611
1612 /* text_cmp()
1613 * Internal comparison function for text strings.
1614 * Returns -1, 0 or 1
1615 */
1616 static int
text_cmp(text * arg1,text * arg2,Oid collid)1617 text_cmp(text *arg1, text *arg2, Oid collid)
1618 {
1619 char *a1p,
1620 *a2p;
1621 int len1,
1622 len2;
1623
1624 a1p = VARDATA_ANY(arg1);
1625 a2p = VARDATA_ANY(arg2);
1626
1627 len1 = VARSIZE_ANY_EXHDR(arg1);
1628 len2 = VARSIZE_ANY_EXHDR(arg2);
1629
1630 return varstr_cmp(a1p, len1, a2p, len2, collid);
1631 }
1632
1633 /*
1634 * Comparison functions for text strings.
1635 *
1636 * Note: btree indexes need these routines not to leak memory; therefore,
1637 * be careful to free working copies of toasted datums. Most places don't
1638 * need to be so careful.
1639 */
1640
1641 Datum
texteq(PG_FUNCTION_ARGS)1642 texteq(PG_FUNCTION_ARGS)
1643 {
1644 Datum arg1 = PG_GETARG_DATUM(0);
1645 Datum arg2 = PG_GETARG_DATUM(1);
1646 bool result;
1647 Size len1,
1648 len2;
1649
1650 /*
1651 * Since we only care about equality or not-equality, we can avoid all the
1652 * expense of strcoll() here, and just do bitwise comparison. In fact, we
1653 * don't even have to do a bitwise comparison if we can show the lengths
1654 * of the strings are unequal; which might save us from having to detoast
1655 * one or both values.
1656 */
1657 len1 = toast_raw_datum_size(arg1);
1658 len2 = toast_raw_datum_size(arg2);
1659 if (len1 != len2)
1660 result = false;
1661 else
1662 {
1663 text *targ1 = DatumGetTextPP(arg1);
1664 text *targ2 = DatumGetTextPP(arg2);
1665
1666 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1667 len1 - VARHDRSZ) == 0);
1668
1669 PG_FREE_IF_COPY(targ1, 0);
1670 PG_FREE_IF_COPY(targ2, 1);
1671 }
1672
1673 PG_RETURN_BOOL(result);
1674 }
1675
1676 Datum
textne(PG_FUNCTION_ARGS)1677 textne(PG_FUNCTION_ARGS)
1678 {
1679 Datum arg1 = PG_GETARG_DATUM(0);
1680 Datum arg2 = PG_GETARG_DATUM(1);
1681 bool result;
1682 Size len1,
1683 len2;
1684
1685 /* See comment in texteq() */
1686 len1 = toast_raw_datum_size(arg1);
1687 len2 = toast_raw_datum_size(arg2);
1688 if (len1 != len2)
1689 result = true;
1690 else
1691 {
1692 text *targ1 = DatumGetTextPP(arg1);
1693 text *targ2 = DatumGetTextPP(arg2);
1694
1695 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1696 len1 - VARHDRSZ) != 0);
1697
1698 PG_FREE_IF_COPY(targ1, 0);
1699 PG_FREE_IF_COPY(targ2, 1);
1700 }
1701
1702 PG_RETURN_BOOL(result);
1703 }
1704
1705 Datum
text_lt(PG_FUNCTION_ARGS)1706 text_lt(PG_FUNCTION_ARGS)
1707 {
1708 text *arg1 = PG_GETARG_TEXT_PP(0);
1709 text *arg2 = PG_GETARG_TEXT_PP(1);
1710 bool result;
1711
1712 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1713
1714 PG_FREE_IF_COPY(arg1, 0);
1715 PG_FREE_IF_COPY(arg2, 1);
1716
1717 PG_RETURN_BOOL(result);
1718 }
1719
1720 Datum
text_le(PG_FUNCTION_ARGS)1721 text_le(PG_FUNCTION_ARGS)
1722 {
1723 text *arg1 = PG_GETARG_TEXT_PP(0);
1724 text *arg2 = PG_GETARG_TEXT_PP(1);
1725 bool result;
1726
1727 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1728
1729 PG_FREE_IF_COPY(arg1, 0);
1730 PG_FREE_IF_COPY(arg2, 1);
1731
1732 PG_RETURN_BOOL(result);
1733 }
1734
1735 Datum
text_gt(PG_FUNCTION_ARGS)1736 text_gt(PG_FUNCTION_ARGS)
1737 {
1738 text *arg1 = PG_GETARG_TEXT_PP(0);
1739 text *arg2 = PG_GETARG_TEXT_PP(1);
1740 bool result;
1741
1742 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1743
1744 PG_FREE_IF_COPY(arg1, 0);
1745 PG_FREE_IF_COPY(arg2, 1);
1746
1747 PG_RETURN_BOOL(result);
1748 }
1749
1750 Datum
text_ge(PG_FUNCTION_ARGS)1751 text_ge(PG_FUNCTION_ARGS)
1752 {
1753 text *arg1 = PG_GETARG_TEXT_PP(0);
1754 text *arg2 = PG_GETARG_TEXT_PP(1);
1755 bool result;
1756
1757 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1758
1759 PG_FREE_IF_COPY(arg1, 0);
1760 PG_FREE_IF_COPY(arg2, 1);
1761
1762 PG_RETURN_BOOL(result);
1763 }
1764
1765 Datum
bttextcmp(PG_FUNCTION_ARGS)1766 bttextcmp(PG_FUNCTION_ARGS)
1767 {
1768 text *arg1 = PG_GETARG_TEXT_PP(0);
1769 text *arg2 = PG_GETARG_TEXT_PP(1);
1770 int32 result;
1771
1772 result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1773
1774 PG_FREE_IF_COPY(arg1, 0);
1775 PG_FREE_IF_COPY(arg2, 1);
1776
1777 PG_RETURN_INT32(result);
1778 }
1779
1780 Datum
bttextsortsupport(PG_FUNCTION_ARGS)1781 bttextsortsupport(PG_FUNCTION_ARGS)
1782 {
1783 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1784 Oid collid = ssup->ssup_collation;
1785 MemoryContext oldcontext;
1786
1787 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1788
1789 /* Use generic string SortSupport */
1790 varstr_sortsupport(ssup, collid, false);
1791
1792 MemoryContextSwitchTo(oldcontext);
1793
1794 PG_RETURN_VOID();
1795 }
1796
1797 /*
1798 * Generic sortsupport interface for character type's operator classes.
1799 * Includes locale support, and support for BpChar semantics (i.e. removing
1800 * trailing spaces before comparison).
1801 *
1802 * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1803 * same representation. Callers that always use the C collation (e.g.
1804 * non-collatable type callers like bytea) may have NUL bytes in their strings;
1805 * this will not work with any other collation, though.
1806 */
1807 void
varstr_sortsupport(SortSupport ssup,Oid collid,bool bpchar)1808 varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
1809 {
1810 bool abbreviate = ssup->abbreviate;
1811 bool collate_c = false;
1812 VarStringSortSupport *sss;
1813 pg_locale_t locale = 0;
1814
1815 /*
1816 * If possible, set ssup->comparator to a function which can be used to
1817 * directly compare two datums. If we can do this, we'll avoid the
1818 * overhead of a trip through the fmgr layer for every comparison, which
1819 * can be substantial.
1820 *
1821 * Most typically, we'll set the comparator to varstrfastcmp_locale, which
1822 * uses strcoll() to perform comparisons and knows about the special
1823 * requirements of BpChar callers. However, if LC_COLLATE = C, we can
1824 * make things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
1825 * both of which use memcmp() rather than strcoll().
1826 */
1827 if (lc_collate_is_c(collid))
1828 {
1829 if (!bpchar)
1830 ssup->comparator = varstrfastcmp_c;
1831 else
1832 ssup->comparator = bpcharfastcmp_c;
1833
1834 collate_c = true;
1835 }
1836 else
1837 {
1838 /*
1839 * We need a collation-sensitive comparison. To make things faster,
1840 * we'll figure out the collation based on the locale id and cache the
1841 * result.
1842 */
1843 if (collid != DEFAULT_COLLATION_OID)
1844 {
1845 if (!OidIsValid(collid))
1846 {
1847 /*
1848 * This typically means that the parser could not resolve a
1849 * conflict of implicit collations, so report it that way.
1850 */
1851 ereport(ERROR,
1852 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1853 errmsg("could not determine which collation to use for string comparison"),
1854 errhint("Use the COLLATE clause to set the collation explicitly.")));
1855 }
1856 locale = pg_newlocale_from_collation(collid);
1857 }
1858
1859 /*
1860 * There is a further exception on Windows. When the database
1861 * encoding is UTF-8 and we are not using the C collation, complex
1862 * hacks are required. We don't currently have a comparator that
1863 * handles that case, so we fall back on the slow method of having the
1864 * sort code invoke bttextcmp() (in the case of text) via the fmgr
1865 * trampoline. ICU locales work just the same on Windows, however.
1866 */
1867 #ifdef WIN32
1868 if (GetDatabaseEncoding() == PG_UTF8 &&
1869 !(locale && locale->provider == COLLPROVIDER_ICU))
1870 return;
1871 #endif
1872
1873 ssup->comparator = varstrfastcmp_locale;
1874 }
1875
1876 /*
1877 * Unfortunately, it seems that abbreviation for non-C collations is
1878 * broken on many common platforms; testing of multiple versions of glibc
1879 * reveals that, for many locales, strcoll() and strxfrm() do not return
1880 * consistent results, which is fatal to this optimization. While no
1881 * other libc other than Cygwin has so far been shown to have a problem,
1882 * we take the conservative course of action for right now and disable
1883 * this categorically. (Users who are certain this isn't a problem on
1884 * their system can define TRUST_STRXFRM.)
1885 *
1886 * Even apart from the risk of broken locales, it's possible that there
1887 * are platforms where the use of abbreviated keys should be disabled at
1888 * compile time. Having only 4 byte datums could make worst-case
1889 * performance drastically more likely, for example. Moreover, macOS's
1890 * strxfrm() implementation is known to not effectively concentrate a
1891 * significant amount of entropy from the original string in earlier
1892 * transformed blobs. It's possible that other supported platforms are
1893 * similarly encumbered. So, if we ever get past disabling this
1894 * categorically, we may still want or need to disable it for particular
1895 * platforms.
1896 */
1897 #ifndef TRUST_STRXFRM
1898 if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
1899 abbreviate = false;
1900 #endif
1901
1902 /*
1903 * If we're using abbreviated keys, or if we're using a locale-aware
1904 * comparison, we need to initialize a StringSortSupport object. Both
1905 * cases will make use of the temporary buffers we initialize here for
1906 * scratch space (and to detect requirement for BpChar semantics from
1907 * caller), and the abbreviation case requires additional state.
1908 */
1909 if (abbreviate || !collate_c)
1910 {
1911 sss = palloc(sizeof(VarStringSortSupport));
1912 sss->buf1 = palloc(TEXTBUFLEN);
1913 sss->buflen1 = TEXTBUFLEN;
1914 sss->buf2 = palloc(TEXTBUFLEN);
1915 sss->buflen2 = TEXTBUFLEN;
1916 /* Start with invalid values */
1917 sss->last_len1 = -1;
1918 sss->last_len2 = -1;
1919 /* Initialize */
1920 sss->last_returned = 0;
1921 sss->locale = locale;
1922
1923 /*
1924 * To avoid somehow confusing a strxfrm() blob and an original string,
1925 * constantly keep track of the variety of data that buf1 and buf2
1926 * currently contain.
1927 *
1928 * Comparisons may be interleaved with conversion calls. Frequently,
1929 * conversions and comparisons are batched into two distinct phases,
1930 * but the correctness of caching cannot hinge upon this. For
1931 * comparison caching, buffer state is only trusted if cache_blob is
1932 * found set to false, whereas strxfrm() caching only trusts the state
1933 * when cache_blob is found set to true.
1934 *
1935 * Arbitrarily initialize cache_blob to true.
1936 */
1937 sss->cache_blob = true;
1938 sss->collate_c = collate_c;
1939 sss->bpchar = bpchar;
1940 ssup->ssup_extra = sss;
1941
1942 /*
1943 * If possible, plan to use the abbreviated keys optimization. The
1944 * core code may switch back to authoritative comparator should
1945 * abbreviation be aborted.
1946 */
1947 if (abbreviate)
1948 {
1949 sss->prop_card = 0.20;
1950 initHyperLogLog(&sss->abbr_card, 10);
1951 initHyperLogLog(&sss->full_card, 10);
1952 ssup->abbrev_full_comparator = ssup->comparator;
1953 ssup->comparator = varstrcmp_abbrev;
1954 ssup->abbrev_converter = varstr_abbrev_convert;
1955 ssup->abbrev_abort = varstr_abbrev_abort;
1956 }
1957 }
1958 }
1959
1960 /*
1961 * sortsupport comparison func (for C locale case)
1962 */
1963 static int
varstrfastcmp_c(Datum x,Datum y,SortSupport ssup)1964 varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
1965 {
1966 VarString *arg1 = DatumGetVarStringPP(x);
1967 VarString *arg2 = DatumGetVarStringPP(y);
1968 char *a1p,
1969 *a2p;
1970 int len1,
1971 len2,
1972 result;
1973
1974 a1p = VARDATA_ANY(arg1);
1975 a2p = VARDATA_ANY(arg2);
1976
1977 len1 = VARSIZE_ANY_EXHDR(arg1);
1978 len2 = VARSIZE_ANY_EXHDR(arg2);
1979
1980 result = memcmp(a1p, a2p, Min(len1, len2));
1981 if ((result == 0) && (len1 != len2))
1982 result = (len1 < len2) ? -1 : 1;
1983
1984 /* We can't afford to leak memory here. */
1985 if (PointerGetDatum(arg1) != x)
1986 pfree(arg1);
1987 if (PointerGetDatum(arg2) != y)
1988 pfree(arg2);
1989
1990 return result;
1991 }
1992
1993 /*
1994 * sortsupport comparison func (for BpChar C locale case)
1995 *
1996 * BpChar outsources its sortsupport to this module. Specialization for the
1997 * varstr_sortsupport BpChar case, modeled on
1998 * internal_bpchar_pattern_compare().
1999 */
2000 static int
bpcharfastcmp_c(Datum x,Datum y,SortSupport ssup)2001 bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
2002 {
2003 BpChar *arg1 = DatumGetBpCharPP(x);
2004 BpChar *arg2 = DatumGetBpCharPP(y);
2005 char *a1p,
2006 *a2p;
2007 int len1,
2008 len2,
2009 result;
2010
2011 a1p = VARDATA_ANY(arg1);
2012 a2p = VARDATA_ANY(arg2);
2013
2014 len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2015 len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2016
2017 result = memcmp(a1p, a2p, Min(len1, len2));
2018 if ((result == 0) && (len1 != len2))
2019 result = (len1 < len2) ? -1 : 1;
2020
2021 /* We can't afford to leak memory here. */
2022 if (PointerGetDatum(arg1) != x)
2023 pfree(arg1);
2024 if (PointerGetDatum(arg2) != y)
2025 pfree(arg2);
2026
2027 return result;
2028 }
2029
2030 /*
2031 * sortsupport comparison func (for locale case)
2032 */
2033 static int
varstrfastcmp_locale(Datum x,Datum y,SortSupport ssup)2034 varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
2035 {
2036 VarString *arg1 = DatumGetVarStringPP(x);
2037 VarString *arg2 = DatumGetVarStringPP(y);
2038 bool arg1_match;
2039 VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
2040
2041 /* working state */
2042 char *a1p,
2043 *a2p;
2044 int len1,
2045 len2,
2046 result;
2047
2048 a1p = VARDATA_ANY(arg1);
2049 a2p = VARDATA_ANY(arg2);
2050
2051 len1 = VARSIZE_ANY_EXHDR(arg1);
2052 len2 = VARSIZE_ANY_EXHDR(arg2);
2053
2054 /* Fast pre-check for equality, as discussed in varstr_cmp() */
2055 if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2056 {
2057 /*
2058 * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2059 * last_len2. Existing contents of buffers might still be used by
2060 * next call.
2061 *
2062 * It's fine to allow the comparison of BpChar padding bytes here,
2063 * even though that implies that the memcmp() will usually be
2064 * performed for BpChar callers (though multibyte characters could
2065 * still prevent that from occurring). The memcmp() is still very
2066 * cheap, and BpChar's funny semantics have us remove trailing spaces
2067 * (not limited to padding), so we need make no distinction between
2068 * padding space characters and "real" space characters.
2069 */
2070 result = 0;
2071 goto done;
2072 }
2073
2074 if (sss->bpchar)
2075 {
2076 /* Get true number of bytes, ignoring trailing spaces */
2077 len1 = bpchartruelen(a1p, len1);
2078 len2 = bpchartruelen(a2p, len2);
2079 }
2080
2081 if (len1 >= sss->buflen1)
2082 {
2083 pfree(sss->buf1);
2084 sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2085 sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2086 }
2087 if (len2 >= sss->buflen2)
2088 {
2089 pfree(sss->buf2);
2090 sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2091 sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2092 }
2093
2094 /*
2095 * We're likely to be asked to compare the same strings repeatedly, and
2096 * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2097 * comparisons, even though in general there is no reason to think that
2098 * that will work out (every string datum may be unique). Caching does
2099 * not slow things down measurably when it doesn't work out, and can speed
2100 * things up by rather a lot when it does. In part, this is because the
2101 * memcmp() compares data from cachelines that are needed in L1 cache even
2102 * when the last comparison's result cannot be reused.
2103 */
2104 arg1_match = true;
2105 if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2106 {
2107 arg1_match = false;
2108 memcpy(sss->buf1, a1p, len1);
2109 sss->buf1[len1] = '\0';
2110 sss->last_len1 = len1;
2111 }
2112
2113 /*
2114 * If we're comparing the same two strings as last time, we can return the
2115 * same answer without calling strcoll() again. This is more likely than
2116 * it seems (at least with moderate to low cardinality sets), because
2117 * quicksort compares the same pivot against many values.
2118 */
2119 if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2120 {
2121 memcpy(sss->buf2, a2p, len2);
2122 sss->buf2[len2] = '\0';
2123 sss->last_len2 = len2;
2124 }
2125 else if (arg1_match && !sss->cache_blob)
2126 {
2127 /* Use result cached following last actual strcoll() call */
2128 result = sss->last_returned;
2129 goto done;
2130 }
2131
2132 if (sss->locale)
2133 {
2134 if (sss->locale->provider == COLLPROVIDER_ICU)
2135 {
2136 #ifdef USE_ICU
2137 #ifdef HAVE_UCOL_STRCOLLUTF8
2138 if (GetDatabaseEncoding() == PG_UTF8)
2139 {
2140 UErrorCode status;
2141
2142 status = U_ZERO_ERROR;
2143 result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
2144 a1p, len1,
2145 a2p, len2,
2146 &status);
2147 if (U_FAILURE(status))
2148 ereport(ERROR,
2149 (errmsg("collation failed: %s", u_errorName(status))));
2150 }
2151 else
2152 #endif
2153 {
2154 int32_t ulen1,
2155 ulen2;
2156 UChar *uchar1,
2157 *uchar2;
2158
2159 ulen1 = icu_to_uchar(&uchar1, a1p, len1);
2160 ulen2 = icu_to_uchar(&uchar2, a2p, len2);
2161
2162 result = ucol_strcoll(sss->locale->info.icu.ucol,
2163 uchar1, ulen1,
2164 uchar2, ulen2);
2165
2166 pfree(uchar1);
2167 pfree(uchar2);
2168 }
2169 #else /* not USE_ICU */
2170 /* shouldn't happen */
2171 elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2172 #endif /* not USE_ICU */
2173 }
2174 else
2175 {
2176 #ifdef HAVE_LOCALE_T
2177 result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
2178 #else
2179 /* shouldn't happen */
2180 elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2181 #endif
2182 }
2183 }
2184 else
2185 result = strcoll(sss->buf1, sss->buf2);
2186
2187 /*
2188 * In some locales strcoll() can claim that nonidentical strings are
2189 * equal. Believing that would be bad news for a number of reasons, so we
2190 * follow Perl's lead and sort "equal" strings according to strcmp().
2191 */
2192 if (result == 0)
2193 result = strcmp(sss->buf1, sss->buf2);
2194
2195 /* Cache result, perhaps saving an expensive strcoll() call next time */
2196 sss->cache_blob = false;
2197 sss->last_returned = result;
2198 done:
2199 /* We can't afford to leak memory here. */
2200 if (PointerGetDatum(arg1) != x)
2201 pfree(arg1);
2202 if (PointerGetDatum(arg2) != y)
2203 pfree(arg2);
2204
2205 return result;
2206 }
2207
2208 /*
2209 * Abbreviated key comparison func
2210 */
2211 static int
varstrcmp_abbrev(Datum x,Datum y,SortSupport ssup)2212 varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
2213 {
2214 /*
2215 * When 0 is returned, the core system will call varstrfastcmp_c()
2216 * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale(). Even a
2217 * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2218 * authoritatively, for the same reason that there is a strcoll()
2219 * tie-breaker call to strcmp() in varstr_cmp().
2220 */
2221 if (x > y)
2222 return 1;
2223 else if (x == y)
2224 return 0;
2225 else
2226 return -1;
2227 }
2228
2229 /*
2230 * Conversion routine for sortsupport. Converts original to abbreviated key
2231 * representation. Our encoding strategy is simple -- pack the first 8 bytes
2232 * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2233 * stored in reverse order), and treat it as an unsigned integer. When the "C"
2234 * locale is used, or in case of bytea, just memcpy() from original instead.
2235 */
2236 static Datum
varstr_abbrev_convert(Datum original,SortSupport ssup)2237 varstr_abbrev_convert(Datum original, SortSupport ssup)
2238 {
2239 VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
2240 VarString *authoritative = DatumGetVarStringPP(original);
2241 char *authoritative_data = VARDATA_ANY(authoritative);
2242
2243 /* working state */
2244 Datum res;
2245 char *pres;
2246 int len;
2247 uint32 hash;
2248
2249 pres = (char *) &res;
2250 /* memset(), so any non-overwritten bytes are NUL */
2251 memset(pres, 0, sizeof(Datum));
2252 len = VARSIZE_ANY_EXHDR(authoritative);
2253
2254 /* Get number of bytes, ignoring trailing spaces */
2255 if (sss->bpchar)
2256 len = bpchartruelen(authoritative_data, len);
2257
2258 /*
2259 * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2260 * abbreviate keys. The full comparator for the C locale is always
2261 * memcmp(). It would be incorrect to allow bytea callers (callers that
2262 * always force the C collation -- bytea isn't a collatable type, but this
2263 * approach is convenient) to use strxfrm(). This is because bytea
2264 * strings may contain NUL bytes. Besides, this should be faster, too.
2265 *
2266 * More generally, it's okay that bytea callers can have NUL bytes in
2267 * strings because varstrcmp_abbrev() need not make a distinction between
2268 * terminating NUL bytes, and NUL bytes representing actual NULs in the
2269 * authoritative representation. Hopefully a comparison at or past one
2270 * abbreviated key's terminating NUL byte will resolve the comparison
2271 * without consulting the authoritative representation; specifically, some
2272 * later non-NUL byte in the longer string can resolve the comparison
2273 * against a subsequent terminating NUL in the shorter string. There will
2274 * usually be what is effectively a "length-wise" resolution there and
2275 * then.
2276 *
2277 * If that doesn't work out -- if all bytes in the longer string
2278 * positioned at or past the offset of the smaller string's (first)
2279 * terminating NUL are actually representative of NUL bytes in the
2280 * authoritative binary string (perhaps with some *terminating* NUL bytes
2281 * towards the end of the longer string iff it happens to still be small)
2282 * -- then an authoritative tie-breaker will happen, and do the right
2283 * thing: explicitly consider string length.
2284 */
2285 if (sss->collate_c)
2286 memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2287 else
2288 {
2289 Size bsize;
2290 #ifdef USE_ICU
2291 int32_t ulen = -1;
2292 UChar *uchar = NULL;
2293 #endif
2294
2295 /*
2296 * We're not using the C collation, so fall back on strxfrm or ICU
2297 * analogs.
2298 */
2299
2300 /* By convention, we use buffer 1 to store and NUL-terminate */
2301 if (len >= sss->buflen1)
2302 {
2303 pfree(sss->buf1);
2304 sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2305 sss->buf1 = palloc(sss->buflen1);
2306 }
2307
2308 /* Might be able to reuse strxfrm() blob from last call */
2309 if (sss->last_len1 == len && sss->cache_blob &&
2310 memcmp(sss->buf1, authoritative_data, len) == 0)
2311 {
2312 memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2313 /* No change affecting cardinality, so no hashing required */
2314 goto done;
2315 }
2316
2317 memcpy(sss->buf1, authoritative_data, len);
2318
2319 /*
2320 * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
2321 * necessary for ICU, but doesn't hurt.
2322 */
2323 sss->buf1[len] = '\0';
2324 sss->last_len1 = len;
2325
2326 #ifdef USE_ICU
2327 /* When using ICU and not UTF8, convert string to UChar. */
2328 if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
2329 GetDatabaseEncoding() != PG_UTF8)
2330 ulen = icu_to_uchar(&uchar, sss->buf1, len);
2331 #endif
2332
2333 /*
2334 * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
2335 * and try again. Both of these functions have the result buffer
2336 * content undefined if the result did not fit, so we need to retry
2337 * until everything fits, even though we only need the first few bytes
2338 * in the end. When using ucol_nextSortKeyPart(), however, we only
2339 * ask for as many bytes as we actually need.
2340 */
2341 for (;;)
2342 {
2343 #ifdef USE_ICU
2344 if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
2345 {
2346 /*
2347 * When using UTF8, use the iteration interface so we only
2348 * need to produce as many bytes as we actually need.
2349 */
2350 if (GetDatabaseEncoding() == PG_UTF8)
2351 {
2352 UCharIterator iter;
2353 uint32_t state[2];
2354 UErrorCode status;
2355
2356 uiter_setUTF8(&iter, sss->buf1, len);
2357 state[0] = state[1] = 0; /* won't need that again */
2358 status = U_ZERO_ERROR;
2359 bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
2360 &iter,
2361 state,
2362 (uint8_t *) sss->buf2,
2363 Min(sizeof(Datum), sss->buflen2),
2364 &status);
2365 if (U_FAILURE(status))
2366 ereport(ERROR,
2367 (errmsg("sort key generation failed: %s",
2368 u_errorName(status))));
2369 }
2370 else
2371 bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
2372 uchar, ulen,
2373 (uint8_t *) sss->buf2, sss->buflen2);
2374 }
2375 else
2376 #endif
2377 #ifdef HAVE_LOCALE_T
2378 if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
2379 bsize = strxfrm_l(sss->buf2, sss->buf1,
2380 sss->buflen2, sss->locale->info.lt);
2381 else
2382 #endif
2383 bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2384
2385 sss->last_len2 = bsize;
2386 if (bsize < sss->buflen2)
2387 break;
2388
2389 /*
2390 * Grow buffer and retry.
2391 */
2392 pfree(sss->buf2);
2393 sss->buflen2 = Max(bsize + 1,
2394 Min(sss->buflen2 * 2, MaxAllocSize));
2395 sss->buf2 = palloc(sss->buflen2);
2396 }
2397
2398 /*
2399 * Every Datum byte is always compared. This is safe because the
2400 * strxfrm() blob is itself NUL terminated, leaving no danger of
2401 * misinterpreting any NUL bytes not intended to be interpreted as
2402 * logically representing termination.
2403 *
2404 * (Actually, even if there were NUL bytes in the blob it would be
2405 * okay. See remarks on bytea case above.)
2406 */
2407 memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2408
2409 #ifdef USE_ICU
2410 if (uchar)
2411 pfree(uchar);
2412 #endif
2413 }
2414
2415 /*
2416 * Maintain approximate cardinality of both abbreviated keys and original,
2417 * authoritative keys using HyperLogLog. Used as cheap insurance against
2418 * the worst case, where we do many string transformations for no saving
2419 * in full strcoll()-based comparisons. These statistics are used by
2420 * varstr_abbrev_abort().
2421 *
2422 * First, Hash key proper, or a significant fraction of it. Mix in length
2423 * in order to compensate for cases where differences are past
2424 * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2425 */
2426 hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2427 Min(len, PG_CACHE_LINE_SIZE)));
2428
2429 if (len > PG_CACHE_LINE_SIZE)
2430 hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2431
2432 addHyperLogLog(&sss->full_card, hash);
2433
2434 /* Hash abbreviated key */
2435 #if SIZEOF_DATUM == 8
2436 {
2437 uint32 lohalf,
2438 hihalf;
2439
2440 lohalf = (uint32) res;
2441 hihalf = (uint32) (res >> 32);
2442 hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2443 }
2444 #else /* SIZEOF_DATUM != 8 */
2445 hash = DatumGetUInt32(hash_uint32((uint32) res));
2446 #endif
2447
2448 addHyperLogLog(&sss->abbr_card, hash);
2449
2450 /* Cache result, perhaps saving an expensive strxfrm() call next time */
2451 sss->cache_blob = true;
2452 done:
2453
2454 /*
2455 * Byteswap on little-endian machines.
2456 *
2457 * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2458 * comparator) works correctly on all platforms. If we didn't do this,
2459 * the comparator would have to call memcmp() with a pair of pointers to
2460 * the first byte of each abbreviated key, which is slower.
2461 */
2462 res = DatumBigEndianToNative(res);
2463
2464 /* Don't leak memory here */
2465 if (PointerGetDatum(authoritative) != original)
2466 pfree(authoritative);
2467
2468 return res;
2469 }
2470
2471 /*
2472 * Callback for estimating effectiveness of abbreviated key optimization, using
2473 * heuristic rules. Returns value indicating if the abbreviation optimization
2474 * should be aborted, based on its projected effectiveness.
2475 */
2476 static bool
varstr_abbrev_abort(int memtupcount,SortSupport ssup)2477 varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2478 {
2479 VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
2480 double abbrev_distinct,
2481 key_distinct;
2482
2483 Assert(ssup->abbreviate);
2484
2485 /* Have a little patience */
2486 if (memtupcount < 100)
2487 return false;
2488
2489 abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2490 key_distinct = estimateHyperLogLog(&sss->full_card);
2491
2492 /*
2493 * Clamp cardinality estimates to at least one distinct value. While
2494 * NULLs are generally disregarded, if only NULL values were seen so far,
2495 * that might misrepresent costs if we failed to clamp.
2496 */
2497 if (abbrev_distinct <= 1.0)
2498 abbrev_distinct = 1.0;
2499
2500 if (key_distinct <= 1.0)
2501 key_distinct = 1.0;
2502
2503 /*
2504 * In the worst case all abbreviated keys are identical, while at the same
2505 * time there are differences within full key strings not captured in
2506 * abbreviations.
2507 */
2508 #ifdef TRACE_SORT
2509 if (trace_sort)
2510 {
2511 double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2512
2513 elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2514 "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2515 memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2516 sss->prop_card);
2517 }
2518 #endif
2519
2520 /*
2521 * If the number of distinct abbreviated keys approximately matches the
2522 * number of distinct authoritative original keys, that's reason enough to
2523 * proceed. We can win even with a very low cardinality set if most
2524 * tie-breakers only memcmp(). This is by far the most important
2525 * consideration.
2526 *
2527 * While comparisons that are resolved at the abbreviated key level are
2528 * considerably cheaper than tie-breakers resolved with memcmp(), both of
2529 * those two outcomes are so much cheaper than a full strcoll() once
2530 * sorting is underway that it doesn't seem worth it to weigh abbreviated
2531 * cardinality against the overall size of the set in order to more
2532 * accurately model costs. Assume that an abbreviated comparison, and an
2533 * abbreviated comparison with a cheap memcmp()-based authoritative
2534 * resolution are equivalent.
2535 */
2536 if (abbrev_distinct > key_distinct * sss->prop_card)
2537 {
2538 /*
2539 * When we have exceeded 10,000 tuples, decay required cardinality
2540 * aggressively for next call.
2541 *
2542 * This is useful because the number of comparisons required on
2543 * average increases at a linearithmic rate, and at roughly 10,000
2544 * tuples that factor will start to dominate over the linear costs of
2545 * string transformation (this is a conservative estimate). The decay
2546 * rate is chosen to be a little less aggressive than halving -- which
2547 * (since we're called at points at which memtupcount has doubled)
2548 * would never see the cost model actually abort past the first call
2549 * following a decay. This decay rate is mostly a precaution against
2550 * a sudden, violent swing in how well abbreviated cardinality tracks
2551 * full key cardinality. The decay also serves to prevent a marginal
2552 * case from being aborted too late, when too much has already been
2553 * invested in string transformation.
2554 *
2555 * It's possible for sets of several million distinct strings with
2556 * mere tens of thousands of distinct abbreviated keys to still
2557 * benefit very significantly. This will generally occur provided
2558 * each abbreviated key is a proxy for a roughly uniform number of the
2559 * set's full keys. If it isn't so, we hope to catch that early and
2560 * abort. If it isn't caught early, by the time the problem is
2561 * apparent it's probably not worth aborting.
2562 */
2563 if (memtupcount > 10000)
2564 sss->prop_card *= 0.65;
2565
2566 return false;
2567 }
2568
2569 /*
2570 * Abort abbreviation strategy.
2571 *
2572 * The worst case, where all abbreviated keys are identical while all
2573 * original strings differ will typically only see a regression of about
2574 * 10% in execution time for small to medium sized lists of strings.
2575 * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2576 * often expect very large improvements, particularly with sets of strings
2577 * of moderately high to high abbreviated cardinality. There is little to
2578 * lose but much to gain, which our strategy reflects.
2579 */
2580 #ifdef TRACE_SORT
2581 if (trace_sort)
2582 elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2583 "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2584 memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2585 #endif
2586
2587 return true;
2588 }
2589
2590 Datum
text_larger(PG_FUNCTION_ARGS)2591 text_larger(PG_FUNCTION_ARGS)
2592 {
2593 text *arg1 = PG_GETARG_TEXT_PP(0);
2594 text *arg2 = PG_GETARG_TEXT_PP(1);
2595 text *result;
2596
2597 result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2598
2599 PG_RETURN_TEXT_P(result);
2600 }
2601
2602 Datum
text_smaller(PG_FUNCTION_ARGS)2603 text_smaller(PG_FUNCTION_ARGS)
2604 {
2605 text *arg1 = PG_GETARG_TEXT_PP(0);
2606 text *arg2 = PG_GETARG_TEXT_PP(1);
2607 text *result;
2608
2609 result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2610
2611 PG_RETURN_TEXT_P(result);
2612 }
2613
2614
2615 /*
2616 * The following operators support character-by-character comparison
2617 * of text datums, to allow building indexes suitable for LIKE clauses.
2618 * Note that the regular texteq/textne comparison operators, and regular
2619 * support functions 1 and 2 with "C" collation are assumed to be
2620 * compatible with these!
2621 */
2622
2623 static int
internal_text_pattern_compare(text * arg1,text * arg2)2624 internal_text_pattern_compare(text *arg1, text *arg2)
2625 {
2626 int result;
2627 int len1,
2628 len2;
2629
2630 len1 = VARSIZE_ANY_EXHDR(arg1);
2631 len2 = VARSIZE_ANY_EXHDR(arg2);
2632
2633 result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2634 if (result != 0)
2635 return result;
2636 else if (len1 < len2)
2637 return -1;
2638 else if (len1 > len2)
2639 return 1;
2640 else
2641 return 0;
2642 }
2643
2644
2645 Datum
text_pattern_lt(PG_FUNCTION_ARGS)2646 text_pattern_lt(PG_FUNCTION_ARGS)
2647 {
2648 text *arg1 = PG_GETARG_TEXT_PP(0);
2649 text *arg2 = PG_GETARG_TEXT_PP(1);
2650 int result;
2651
2652 result = internal_text_pattern_compare(arg1, arg2);
2653
2654 PG_FREE_IF_COPY(arg1, 0);
2655 PG_FREE_IF_COPY(arg2, 1);
2656
2657 PG_RETURN_BOOL(result < 0);
2658 }
2659
2660
2661 Datum
text_pattern_le(PG_FUNCTION_ARGS)2662 text_pattern_le(PG_FUNCTION_ARGS)
2663 {
2664 text *arg1 = PG_GETARG_TEXT_PP(0);
2665 text *arg2 = PG_GETARG_TEXT_PP(1);
2666 int result;
2667
2668 result = internal_text_pattern_compare(arg1, arg2);
2669
2670 PG_FREE_IF_COPY(arg1, 0);
2671 PG_FREE_IF_COPY(arg2, 1);
2672
2673 PG_RETURN_BOOL(result <= 0);
2674 }
2675
2676
2677 Datum
text_pattern_ge(PG_FUNCTION_ARGS)2678 text_pattern_ge(PG_FUNCTION_ARGS)
2679 {
2680 text *arg1 = PG_GETARG_TEXT_PP(0);
2681 text *arg2 = PG_GETARG_TEXT_PP(1);
2682 int result;
2683
2684 result = internal_text_pattern_compare(arg1, arg2);
2685
2686 PG_FREE_IF_COPY(arg1, 0);
2687 PG_FREE_IF_COPY(arg2, 1);
2688
2689 PG_RETURN_BOOL(result >= 0);
2690 }
2691
2692
2693 Datum
text_pattern_gt(PG_FUNCTION_ARGS)2694 text_pattern_gt(PG_FUNCTION_ARGS)
2695 {
2696 text *arg1 = PG_GETARG_TEXT_PP(0);
2697 text *arg2 = PG_GETARG_TEXT_PP(1);
2698 int result;
2699
2700 result = internal_text_pattern_compare(arg1, arg2);
2701
2702 PG_FREE_IF_COPY(arg1, 0);
2703 PG_FREE_IF_COPY(arg2, 1);
2704
2705 PG_RETURN_BOOL(result > 0);
2706 }
2707
2708
2709 Datum
bttext_pattern_cmp(PG_FUNCTION_ARGS)2710 bttext_pattern_cmp(PG_FUNCTION_ARGS)
2711 {
2712 text *arg1 = PG_GETARG_TEXT_PP(0);
2713 text *arg2 = PG_GETARG_TEXT_PP(1);
2714 int result;
2715
2716 result = internal_text_pattern_compare(arg1, arg2);
2717
2718 PG_FREE_IF_COPY(arg1, 0);
2719 PG_FREE_IF_COPY(arg2, 1);
2720
2721 PG_RETURN_INT32(result);
2722 }
2723
2724
2725 Datum
bttext_pattern_sortsupport(PG_FUNCTION_ARGS)2726 bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
2727 {
2728 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
2729 MemoryContext oldcontext;
2730
2731 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
2732
2733 /* Use generic string SortSupport, forcing "C" collation */
2734 varstr_sortsupport(ssup, C_COLLATION_OID, false);
2735
2736 MemoryContextSwitchTo(oldcontext);
2737
2738 PG_RETURN_VOID();
2739 }
2740
2741
2742 /*-------------------------------------------------------------
2743 * byteaoctetlen
2744 *
2745 * get the number of bytes contained in an instance of type 'bytea'
2746 *-------------------------------------------------------------
2747 */
2748 Datum
byteaoctetlen(PG_FUNCTION_ARGS)2749 byteaoctetlen(PG_FUNCTION_ARGS)
2750 {
2751 Datum str = PG_GETARG_DATUM(0);
2752
2753 /* We need not detoast the input at all */
2754 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
2755 }
2756
2757 /*
2758 * byteacat -
2759 * takes two bytea* and returns a bytea* that is the concatenation of
2760 * the two.
2761 *
2762 * Cloned from textcat and modified as required.
2763 */
2764 Datum
byteacat(PG_FUNCTION_ARGS)2765 byteacat(PG_FUNCTION_ARGS)
2766 {
2767 bytea *t1 = PG_GETARG_BYTEA_PP(0);
2768 bytea *t2 = PG_GETARG_BYTEA_PP(1);
2769
2770 PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
2771 }
2772
2773 /*
2774 * bytea_catenate
2775 * Guts of byteacat(), broken out so it can be used by other functions
2776 *
2777 * Arguments can be in short-header form, but not compressed or out-of-line
2778 */
2779 static bytea *
bytea_catenate(bytea * t1,bytea * t2)2780 bytea_catenate(bytea *t1, bytea *t2)
2781 {
2782 bytea *result;
2783 int len1,
2784 len2,
2785 len;
2786 char *ptr;
2787
2788 len1 = VARSIZE_ANY_EXHDR(t1);
2789 len2 = VARSIZE_ANY_EXHDR(t2);
2790
2791 /* paranoia ... probably should throw error instead? */
2792 if (len1 < 0)
2793 len1 = 0;
2794 if (len2 < 0)
2795 len2 = 0;
2796
2797 len = len1 + len2 + VARHDRSZ;
2798 result = (bytea *) palloc(len);
2799
2800 /* Set size of result string... */
2801 SET_VARSIZE(result, len);
2802
2803 /* Fill data field of result string... */
2804 ptr = VARDATA(result);
2805 if (len1 > 0)
2806 memcpy(ptr, VARDATA_ANY(t1), len1);
2807 if (len2 > 0)
2808 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
2809
2810 return result;
2811 }
2812
2813 #define PG_STR_GET_BYTEA(str_) \
2814 DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
2815
2816 /*
2817 * bytea_substr()
2818 * Return a substring starting at the specified position.
2819 * Cloned from text_substr and modified as required.
2820 *
2821 * Input:
2822 * - string
2823 * - starting position (is one-based)
2824 * - string length (optional)
2825 *
2826 * If the starting position is zero or less, then return from the start of the string
2827 * adjusting the length to be consistent with the "negative start" per SQL.
2828 * If the length is less than zero, an ERROR is thrown. If no third argument
2829 * (length) is provided, the length to the end of the string is assumed.
2830 */
2831 Datum
bytea_substr(PG_FUNCTION_ARGS)2832 bytea_substr(PG_FUNCTION_ARGS)
2833 {
2834 PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
2835 PG_GETARG_INT32(1),
2836 PG_GETARG_INT32(2),
2837 false));
2838 }
2839
2840 /*
2841 * bytea_substr_no_len -
2842 * Wrapper to avoid opr_sanity failure due to
2843 * one function accepting a different number of args.
2844 */
2845 Datum
bytea_substr_no_len(PG_FUNCTION_ARGS)2846 bytea_substr_no_len(PG_FUNCTION_ARGS)
2847 {
2848 PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
2849 PG_GETARG_INT32(1),
2850 -1,
2851 true));
2852 }
2853
2854 static bytea *
bytea_substring(Datum str,int S,int L,bool length_not_specified)2855 bytea_substring(Datum str,
2856 int S,
2857 int L,
2858 bool length_not_specified)
2859 {
2860 int S1; /* adjusted start position */
2861 int L1; /* adjusted substring length */
2862
2863 S1 = Max(S, 1);
2864
2865 if (length_not_specified)
2866 {
2867 /*
2868 * Not passed a length - DatumGetByteaPSlice() grabs everything to the
2869 * end of the string if we pass it a negative value for length.
2870 */
2871 L1 = -1;
2872 }
2873 else
2874 {
2875 /* end position */
2876 int E = S + L;
2877
2878 /*
2879 * A negative value for L is the only way for the end position to be
2880 * before the start. SQL99 says to throw an error.
2881 */
2882 if (E < S)
2883 ereport(ERROR,
2884 (errcode(ERRCODE_SUBSTRING_ERROR),
2885 errmsg("negative substring length not allowed")));
2886
2887 /*
2888 * A zero or negative value for the end position can happen if the
2889 * start was negative or one. SQL99 says to return a zero-length
2890 * string.
2891 */
2892 if (E < 1)
2893 return PG_STR_GET_BYTEA("");
2894
2895 L1 = E - S1;
2896 }
2897
2898 /*
2899 * If the start position is past the end of the string, SQL99 says to
2900 * return a zero-length string -- DatumGetByteaPSlice() will do that for
2901 * us. Convert to zero-based starting position
2902 */
2903 return DatumGetByteaPSlice(str, S1 - 1, L1);
2904 }
2905
2906 /*
2907 * byteaoverlay
2908 * Replace specified substring of first string with second
2909 *
2910 * The SQL standard defines OVERLAY() in terms of substring and concatenation.
2911 * This code is a direct implementation of what the standard says.
2912 */
2913 Datum
byteaoverlay(PG_FUNCTION_ARGS)2914 byteaoverlay(PG_FUNCTION_ARGS)
2915 {
2916 bytea *t1 = PG_GETARG_BYTEA_PP(0);
2917 bytea *t2 = PG_GETARG_BYTEA_PP(1);
2918 int sp = PG_GETARG_INT32(2); /* substring start position */
2919 int sl = PG_GETARG_INT32(3); /* substring length */
2920
2921 PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2922 }
2923
2924 Datum
byteaoverlay_no_len(PG_FUNCTION_ARGS)2925 byteaoverlay_no_len(PG_FUNCTION_ARGS)
2926 {
2927 bytea *t1 = PG_GETARG_BYTEA_PP(0);
2928 bytea *t2 = PG_GETARG_BYTEA_PP(1);
2929 int sp = PG_GETARG_INT32(2); /* substring start position */
2930 int sl;
2931
2932 sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
2933 PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
2934 }
2935
2936 static bytea *
bytea_overlay(bytea * t1,bytea * t2,int sp,int sl)2937 bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
2938 {
2939 bytea *result;
2940 bytea *s1;
2941 bytea *s2;
2942 int sp_pl_sl;
2943
2944 /*
2945 * Check for possible integer-overflow cases. For negative sp, throw a
2946 * "substring length" error because that's what should be expected
2947 * according to the spec's definition of OVERLAY().
2948 */
2949 if (sp <= 0)
2950 ereport(ERROR,
2951 (errcode(ERRCODE_SUBSTRING_ERROR),
2952 errmsg("negative substring length not allowed")));
2953 sp_pl_sl = sp + sl;
2954 if (sp_pl_sl <= sl)
2955 ereport(ERROR,
2956 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
2957 errmsg("integer out of range")));
2958
2959 s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
2960 s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
2961 result = bytea_catenate(s1, t2);
2962 result = bytea_catenate(result, s2);
2963
2964 return result;
2965 }
2966
2967 /*
2968 * byteapos -
2969 * Return the position of the specified substring.
2970 * Implements the SQL POSITION() function.
2971 * Cloned from textpos and modified as required.
2972 */
2973 Datum
byteapos(PG_FUNCTION_ARGS)2974 byteapos(PG_FUNCTION_ARGS)
2975 {
2976 bytea *t1 = PG_GETARG_BYTEA_PP(0);
2977 bytea *t2 = PG_GETARG_BYTEA_PP(1);
2978 int pos;
2979 int px,
2980 p;
2981 int len1,
2982 len2;
2983 char *p1,
2984 *p2;
2985
2986 len1 = VARSIZE_ANY_EXHDR(t1);
2987 len2 = VARSIZE_ANY_EXHDR(t2);
2988
2989 if (len2 <= 0)
2990 PG_RETURN_INT32(1); /* result for empty pattern */
2991
2992 p1 = VARDATA_ANY(t1);
2993 p2 = VARDATA_ANY(t2);
2994
2995 pos = 0;
2996 px = (len1 - len2);
2997 for (p = 0; p <= px; p++)
2998 {
2999 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
3000 {
3001 pos = p + 1;
3002 break;
3003 };
3004 p1++;
3005 };
3006
3007 PG_RETURN_INT32(pos);
3008 }
3009
3010 /*-------------------------------------------------------------
3011 * byteaGetByte
3012 *
3013 * this routine treats "bytea" as an array of bytes.
3014 * It returns the Nth byte (a number between 0 and 255).
3015 *-------------------------------------------------------------
3016 */
3017 Datum
byteaGetByte(PG_FUNCTION_ARGS)3018 byteaGetByte(PG_FUNCTION_ARGS)
3019 {
3020 bytea *v = PG_GETARG_BYTEA_PP(0);
3021 int32 n = PG_GETARG_INT32(1);
3022 int len;
3023 int byte;
3024
3025 len = VARSIZE_ANY_EXHDR(v);
3026
3027 if (n < 0 || n >= len)
3028 ereport(ERROR,
3029 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3030 errmsg("index %d out of valid range, 0..%d",
3031 n, len - 1)));
3032
3033 byte = ((unsigned char *) VARDATA_ANY(v))[n];
3034
3035 PG_RETURN_INT32(byte);
3036 }
3037
3038 /*-------------------------------------------------------------
3039 * byteaGetBit
3040 *
3041 * This routine treats a "bytea" type like an array of bits.
3042 * It returns the value of the Nth bit (0 or 1).
3043 *
3044 *-------------------------------------------------------------
3045 */
3046 Datum
byteaGetBit(PG_FUNCTION_ARGS)3047 byteaGetBit(PG_FUNCTION_ARGS)
3048 {
3049 bytea *v = PG_GETARG_BYTEA_PP(0);
3050 int32 n = PG_GETARG_INT32(1);
3051 int byteNo,
3052 bitNo;
3053 int len;
3054 int byte;
3055
3056 len = VARSIZE_ANY_EXHDR(v);
3057
3058 /* Do comparison arithmetic in int64 in case len exceeds INT_MAX/8 */
3059 if (n < 0 || n >= (int64) len * 8)
3060 ereport(ERROR,
3061 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3062 errmsg("index %d out of valid range, 0..%d",
3063 n, (int) Min((int64) len * 8 - 1, INT_MAX))));
3064
3065 byteNo = n / 8;
3066 bitNo = n % 8;
3067
3068 byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3069
3070 if (byte & (1 << bitNo))
3071 PG_RETURN_INT32(1);
3072 else
3073 PG_RETURN_INT32(0);
3074 }
3075
3076 /*-------------------------------------------------------------
3077 * byteaSetByte
3078 *
3079 * Given an instance of type 'bytea' creates a new one with
3080 * the Nth byte set to the given value.
3081 *
3082 *-------------------------------------------------------------
3083 */
3084 Datum
byteaSetByte(PG_FUNCTION_ARGS)3085 byteaSetByte(PG_FUNCTION_ARGS)
3086 {
3087 bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3088 int32 n = PG_GETARG_INT32(1);
3089 int32 newByte = PG_GETARG_INT32(2);
3090 int len;
3091
3092 len = VARSIZE(res) - VARHDRSZ;
3093
3094 if (n < 0 || n >= len)
3095 ereport(ERROR,
3096 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3097 errmsg("index %d out of valid range, 0..%d",
3098 n, len - 1)));
3099
3100 /*
3101 * Now set the byte.
3102 */
3103 ((unsigned char *) VARDATA(res))[n] = newByte;
3104
3105 PG_RETURN_BYTEA_P(res);
3106 }
3107
3108 /*-------------------------------------------------------------
3109 * byteaSetBit
3110 *
3111 * Given an instance of type 'bytea' creates a new one with
3112 * the Nth bit set to the given value.
3113 *
3114 *-------------------------------------------------------------
3115 */
3116 Datum
byteaSetBit(PG_FUNCTION_ARGS)3117 byteaSetBit(PG_FUNCTION_ARGS)
3118 {
3119 bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3120 int32 n = PG_GETARG_INT32(1);
3121 int32 newBit = PG_GETARG_INT32(2);
3122 int len;
3123 int oldByte,
3124 newByte;
3125 int byteNo,
3126 bitNo;
3127
3128 len = VARSIZE(res) - VARHDRSZ;
3129
3130 /* Do comparison arithmetic in int64 in case len exceeds INT_MAX/8 */
3131 if (n < 0 || n >= (int64) len * 8)
3132 ereport(ERROR,
3133 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3134 errmsg("index %d out of valid range, 0..%d",
3135 n, (int) Min((int64) len * 8 - 1, INT_MAX))));
3136
3137 byteNo = n / 8;
3138 bitNo = n % 8;
3139
3140 /*
3141 * sanity check!
3142 */
3143 if (newBit != 0 && newBit != 1)
3144 ereport(ERROR,
3145 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3146 errmsg("new bit must be 0 or 1")));
3147
3148 /*
3149 * Update the byte.
3150 */
3151 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3152
3153 if (newBit == 0)
3154 newByte = oldByte & (~(1 << bitNo));
3155 else
3156 newByte = oldByte | (1 << bitNo);
3157
3158 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3159
3160 PG_RETURN_BYTEA_P(res);
3161 }
3162
3163
3164 /* text_name()
3165 * Converts a text type to a Name type.
3166 */
3167 Datum
text_name(PG_FUNCTION_ARGS)3168 text_name(PG_FUNCTION_ARGS)
3169 {
3170 text *s = PG_GETARG_TEXT_PP(0);
3171 Name result;
3172 int len;
3173
3174 len = VARSIZE_ANY_EXHDR(s);
3175
3176 /* Truncate oversize input */
3177 if (len >= NAMEDATALEN)
3178 len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3179
3180 /* We use palloc0 here to ensure result is zero-padded */
3181 result = (Name) palloc0(NAMEDATALEN);
3182 memcpy(NameStr(*result), VARDATA_ANY(s), len);
3183
3184 PG_RETURN_NAME(result);
3185 }
3186
3187 /* name_text()
3188 * Converts a Name type to a text type.
3189 */
3190 Datum
name_text(PG_FUNCTION_ARGS)3191 name_text(PG_FUNCTION_ARGS)
3192 {
3193 Name s = PG_GETARG_NAME(0);
3194
3195 PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
3196 }
3197
3198
3199 /*
3200 * textToQualifiedNameList - convert a text object to list of names
3201 *
3202 * This implements the input parsing needed by nextval() and other
3203 * functions that take a text parameter representing a qualified name.
3204 * We split the name at dots, downcase if not double-quoted, and
3205 * truncate names if they're too long.
3206 */
3207 List *
textToQualifiedNameList(text * textval)3208 textToQualifiedNameList(text *textval)
3209 {
3210 char *rawname;
3211 List *result = NIL;
3212 List *namelist;
3213 ListCell *l;
3214
3215 /* Convert to C string (handles possible detoasting). */
3216 /* Note we rely on being able to modify rawname below. */
3217 rawname = text_to_cstring(textval);
3218
3219 if (!SplitIdentifierString(rawname, '.', &namelist))
3220 ereport(ERROR,
3221 (errcode(ERRCODE_INVALID_NAME),
3222 errmsg("invalid name syntax")));
3223
3224 if (namelist == NIL)
3225 ereport(ERROR,
3226 (errcode(ERRCODE_INVALID_NAME),
3227 errmsg("invalid name syntax")));
3228
3229 foreach(l, namelist)
3230 {
3231 char *curname = (char *) lfirst(l);
3232
3233 result = lappend(result, makeString(pstrdup(curname)));
3234 }
3235
3236 pfree(rawname);
3237 list_free(namelist);
3238
3239 return result;
3240 }
3241
3242 /*
3243 * SplitIdentifierString --- parse a string containing identifiers
3244 *
3245 * This is the guts of textToQualifiedNameList, and is exported for use in
3246 * other situations such as parsing GUC variables. In the GUC case, it's
3247 * important to avoid memory leaks, so the API is designed to minimize the
3248 * amount of stuff that needs to be allocated and freed.
3249 *
3250 * Inputs:
3251 * rawstring: the input string; must be overwritable! On return, it's
3252 * been modified to contain the separated identifiers.
3253 * separator: the separator punctuation expected between identifiers
3254 * (typically '.' or ','). Whitespace may also appear around
3255 * identifiers.
3256 * Outputs:
3257 * namelist: filled with a palloc'd list of pointers to identifiers within
3258 * rawstring. Caller should list_free() this even on error return.
3259 *
3260 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
3261 *
3262 * Note that an empty string is considered okay here, though not in
3263 * textToQualifiedNameList.
3264 */
3265 bool
SplitIdentifierString(char * rawstring,char separator,List ** namelist)3266 SplitIdentifierString(char *rawstring, char separator,
3267 List **namelist)
3268 {
3269 char *nextp = rawstring;
3270 bool done = false;
3271
3272 *namelist = NIL;
3273
3274 while (scanner_isspace(*nextp))
3275 nextp++; /* skip leading whitespace */
3276
3277 if (*nextp == '\0')
3278 return true; /* allow empty string */
3279
3280 /* At the top of the loop, we are at start of a new identifier. */
3281 do
3282 {
3283 char *curname;
3284 char *endp;
3285
3286 if (*nextp == '"')
3287 {
3288 /* Quoted name --- collapse quote-quote pairs, no downcasing */
3289 curname = nextp + 1;
3290 for (;;)
3291 {
3292 endp = strchr(nextp + 1, '"');
3293 if (endp == NULL)
3294 return false; /* mismatched quotes */
3295 if (endp[1] != '"')
3296 break; /* found end of quoted name */
3297 /* Collapse adjacent quotes into one quote, and look again */
3298 memmove(endp, endp + 1, strlen(endp));
3299 nextp = endp;
3300 }
3301 /* endp now points at the terminating quote */
3302 nextp = endp + 1;
3303 }
3304 else
3305 {
3306 /* Unquoted name --- extends to separator or whitespace */
3307 char *downname;
3308 int len;
3309
3310 curname = nextp;
3311 while (*nextp && *nextp != separator &&
3312 !scanner_isspace(*nextp))
3313 nextp++;
3314 endp = nextp;
3315 if (curname == nextp)
3316 return false; /* empty unquoted name not allowed */
3317
3318 /*
3319 * Downcase the identifier, using same code as main lexer does.
3320 *
3321 * XXX because we want to overwrite the input in-place, we cannot
3322 * support a downcasing transformation that increases the string
3323 * length. This is not a problem given the current implementation
3324 * of downcase_truncate_identifier, but we'll probably have to do
3325 * something about this someday.
3326 */
3327 len = endp - curname;
3328 downname = downcase_truncate_identifier(curname, len, false);
3329 Assert(strlen(downname) <= len);
3330 strncpy(curname, downname, len); /* strncpy is required here */
3331 pfree(downname);
3332 }
3333
3334 while (scanner_isspace(*nextp))
3335 nextp++; /* skip trailing whitespace */
3336
3337 if (*nextp == separator)
3338 {
3339 nextp++;
3340 while (scanner_isspace(*nextp))
3341 nextp++; /* skip leading whitespace for next */
3342 /* we expect another name, so done remains false */
3343 }
3344 else if (*nextp == '\0')
3345 done = true;
3346 else
3347 return false; /* invalid syntax */
3348
3349 /* Now safe to overwrite separator with a null */
3350 *endp = '\0';
3351
3352 /* Truncate name if it's overlength */
3353 truncate_identifier(curname, strlen(curname), false);
3354
3355 /*
3356 * Finished isolating current name --- add it to list
3357 */
3358 *namelist = lappend(*namelist, curname);
3359
3360 /* Loop back if we didn't reach end of string */
3361 } while (!done);
3362
3363 return true;
3364 }
3365
3366
3367 /*
3368 * SplitDirectoriesString --- parse a string containing file/directory names
3369 *
3370 * This works fine on file names too; the function name is historical.
3371 *
3372 * This is similar to SplitIdentifierString, except that the parsing
3373 * rules are meant to handle pathnames instead of identifiers: there is
3374 * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3375 * and we apply canonicalize_path() to each extracted string. Because of the
3376 * last, the returned strings are separately palloc'd rather than being
3377 * pointers into rawstring --- but we still scribble on rawstring.
3378 *
3379 * Inputs:
3380 * rawstring: the input string; must be modifiable!
3381 * separator: the separator punctuation expected between directories
3382 * (typically ',' or ';'). Whitespace may also appear around
3383 * directories.
3384 * Outputs:
3385 * namelist: filled with a palloc'd list of directory names.
3386 * Caller should list_free_deep() this even on error return.
3387 *
3388 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
3389 *
3390 * Note that an empty string is considered okay here.
3391 */
3392 bool
SplitDirectoriesString(char * rawstring,char separator,List ** namelist)3393 SplitDirectoriesString(char *rawstring, char separator,
3394 List **namelist)
3395 {
3396 char *nextp = rawstring;
3397 bool done = false;
3398
3399 *namelist = NIL;
3400
3401 while (scanner_isspace(*nextp))
3402 nextp++; /* skip leading whitespace */
3403
3404 if (*nextp == '\0')
3405 return true; /* allow empty string */
3406
3407 /* At the top of the loop, we are at start of a new directory. */
3408 do
3409 {
3410 char *curname;
3411 char *endp;
3412
3413 if (*nextp == '"')
3414 {
3415 /* Quoted name --- collapse quote-quote pairs */
3416 curname = nextp + 1;
3417 for (;;)
3418 {
3419 endp = strchr(nextp + 1, '"');
3420 if (endp == NULL)
3421 return false; /* mismatched quotes */
3422 if (endp[1] != '"')
3423 break; /* found end of quoted name */
3424 /* Collapse adjacent quotes into one quote, and look again */
3425 memmove(endp, endp + 1, strlen(endp));
3426 nextp = endp;
3427 }
3428 /* endp now points at the terminating quote */
3429 nextp = endp + 1;
3430 }
3431 else
3432 {
3433 /* Unquoted name --- extends to separator or end of string */
3434 curname = endp = nextp;
3435 while (*nextp && *nextp != separator)
3436 {
3437 /* trailing whitespace should not be included in name */
3438 if (!scanner_isspace(*nextp))
3439 endp = nextp + 1;
3440 nextp++;
3441 }
3442 if (curname == endp)
3443 return false; /* empty unquoted name not allowed */
3444 }
3445
3446 while (scanner_isspace(*nextp))
3447 nextp++; /* skip trailing whitespace */
3448
3449 if (*nextp == separator)
3450 {
3451 nextp++;
3452 while (scanner_isspace(*nextp))
3453 nextp++; /* skip leading whitespace for next */
3454 /* we expect another name, so done remains false */
3455 }
3456 else if (*nextp == '\0')
3457 done = true;
3458 else
3459 return false; /* invalid syntax */
3460
3461 /* Now safe to overwrite separator with a null */
3462 *endp = '\0';
3463
3464 /* Truncate path if it's overlength */
3465 if (strlen(curname) >= MAXPGPATH)
3466 curname[MAXPGPATH - 1] = '\0';
3467
3468 /*
3469 * Finished isolating current name --- add it to list
3470 */
3471 curname = pstrdup(curname);
3472 canonicalize_path(curname);
3473 *namelist = lappend(*namelist, curname);
3474
3475 /* Loop back if we didn't reach end of string */
3476 } while (!done);
3477
3478 return true;
3479 }
3480
3481
3482 /*
3483 * SplitGUCList --- parse a string containing identifiers or file names
3484 *
3485 * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
3486 * presuming whether the elements will be taken as identifiers or file names.
3487 * We assume the input has already been through flatten_set_variable_args(),
3488 * so that we need never downcase (if appropriate, that was done already).
3489 * Nor do we ever truncate, since we don't know the correct max length.
3490 * We disallow embedded whitespace for simplicity (it shouldn't matter,
3491 * because any embedded whitespace should have led to double-quoting).
3492 * Otherwise the API is identical to SplitIdentifierString.
3493 *
3494 * XXX it's annoying to have so many copies of this string-splitting logic.
3495 * However, it's not clear that having one function with a bunch of option
3496 * flags would be much better.
3497 *
3498 * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
3499 * Be sure to update that if you have to change this.
3500 *
3501 * Inputs:
3502 * rawstring: the input string; must be overwritable! On return, it's
3503 * been modified to contain the separated identifiers.
3504 * separator: the separator punctuation expected between identifiers
3505 * (typically '.' or ','). Whitespace may also appear around
3506 * identifiers.
3507 * Outputs:
3508 * namelist: filled with a palloc'd list of pointers to identifiers within
3509 * rawstring. Caller should list_free() this even on error return.
3510 *
3511 * Returns true if okay, false if there is a syntax error in the string.
3512 */
3513 bool
SplitGUCList(char * rawstring,char separator,List ** namelist)3514 SplitGUCList(char *rawstring, char separator,
3515 List **namelist)
3516 {
3517 char *nextp = rawstring;
3518 bool done = false;
3519
3520 *namelist = NIL;
3521
3522 while (scanner_isspace(*nextp))
3523 nextp++; /* skip leading whitespace */
3524
3525 if (*nextp == '\0')
3526 return true; /* allow empty string */
3527
3528 /* At the top of the loop, we are at start of a new identifier. */
3529 do
3530 {
3531 char *curname;
3532 char *endp;
3533
3534 if (*nextp == '"')
3535 {
3536 /* Quoted name --- collapse quote-quote pairs */
3537 curname = nextp + 1;
3538 for (;;)
3539 {
3540 endp = strchr(nextp + 1, '"');
3541 if (endp == NULL)
3542 return false; /* mismatched quotes */
3543 if (endp[1] != '"')
3544 break; /* found end of quoted name */
3545 /* Collapse adjacent quotes into one quote, and look again */
3546 memmove(endp, endp + 1, strlen(endp));
3547 nextp = endp;
3548 }
3549 /* endp now points at the terminating quote */
3550 nextp = endp + 1;
3551 }
3552 else
3553 {
3554 /* Unquoted name --- extends to separator or whitespace */
3555 curname = nextp;
3556 while (*nextp && *nextp != separator &&
3557 !scanner_isspace(*nextp))
3558 nextp++;
3559 endp = nextp;
3560 if (curname == nextp)
3561 return false; /* empty unquoted name not allowed */
3562 }
3563
3564 while (scanner_isspace(*nextp))
3565 nextp++; /* skip trailing whitespace */
3566
3567 if (*nextp == separator)
3568 {
3569 nextp++;
3570 while (scanner_isspace(*nextp))
3571 nextp++; /* skip leading whitespace for next */
3572 /* we expect another name, so done remains false */
3573 }
3574 else if (*nextp == '\0')
3575 done = true;
3576 else
3577 return false; /* invalid syntax */
3578
3579 /* Now safe to overwrite separator with a null */
3580 *endp = '\0';
3581
3582 /*
3583 * Finished isolating current name --- add it to list
3584 */
3585 *namelist = lappend(*namelist, curname);
3586
3587 /* Loop back if we didn't reach end of string */
3588 } while (!done);
3589
3590 return true;
3591 }
3592
3593
3594 /*****************************************************************************
3595 * Comparison Functions used for bytea
3596 *
3597 * Note: btree indexes need these routines not to leak memory; therefore,
3598 * be careful to free working copies of toasted datums. Most places don't
3599 * need to be so careful.
3600 *****************************************************************************/
3601
3602 Datum
byteaeq(PG_FUNCTION_ARGS)3603 byteaeq(PG_FUNCTION_ARGS)
3604 {
3605 Datum arg1 = PG_GETARG_DATUM(0);
3606 Datum arg2 = PG_GETARG_DATUM(1);
3607 bool result;
3608 Size len1,
3609 len2;
3610
3611 /*
3612 * We can use a fast path for unequal lengths, which might save us from
3613 * having to detoast one or both values.
3614 */
3615 len1 = toast_raw_datum_size(arg1);
3616 len2 = toast_raw_datum_size(arg2);
3617 if (len1 != len2)
3618 result = false;
3619 else
3620 {
3621 bytea *barg1 = DatumGetByteaPP(arg1);
3622 bytea *barg2 = DatumGetByteaPP(arg2);
3623
3624 result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3625 len1 - VARHDRSZ) == 0);
3626
3627 PG_FREE_IF_COPY(barg1, 0);
3628 PG_FREE_IF_COPY(barg2, 1);
3629 }
3630
3631 PG_RETURN_BOOL(result);
3632 }
3633
3634 Datum
byteane(PG_FUNCTION_ARGS)3635 byteane(PG_FUNCTION_ARGS)
3636 {
3637 Datum arg1 = PG_GETARG_DATUM(0);
3638 Datum arg2 = PG_GETARG_DATUM(1);
3639 bool result;
3640 Size len1,
3641 len2;
3642
3643 /*
3644 * We can use a fast path for unequal lengths, which might save us from
3645 * having to detoast one or both values.
3646 */
3647 len1 = toast_raw_datum_size(arg1);
3648 len2 = toast_raw_datum_size(arg2);
3649 if (len1 != len2)
3650 result = true;
3651 else
3652 {
3653 bytea *barg1 = DatumGetByteaPP(arg1);
3654 bytea *barg2 = DatumGetByteaPP(arg2);
3655
3656 result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
3657 len1 - VARHDRSZ) != 0);
3658
3659 PG_FREE_IF_COPY(barg1, 0);
3660 PG_FREE_IF_COPY(barg2, 1);
3661 }
3662
3663 PG_RETURN_BOOL(result);
3664 }
3665
3666 Datum
bytealt(PG_FUNCTION_ARGS)3667 bytealt(PG_FUNCTION_ARGS)
3668 {
3669 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3670 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3671 int len1,
3672 len2;
3673 int cmp;
3674
3675 len1 = VARSIZE_ANY_EXHDR(arg1);
3676 len2 = VARSIZE_ANY_EXHDR(arg2);
3677
3678 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3679
3680 PG_FREE_IF_COPY(arg1, 0);
3681 PG_FREE_IF_COPY(arg2, 1);
3682
3683 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
3684 }
3685
3686 Datum
byteale(PG_FUNCTION_ARGS)3687 byteale(PG_FUNCTION_ARGS)
3688 {
3689 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3690 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3691 int len1,
3692 len2;
3693 int cmp;
3694
3695 len1 = VARSIZE_ANY_EXHDR(arg1);
3696 len2 = VARSIZE_ANY_EXHDR(arg2);
3697
3698 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3699
3700 PG_FREE_IF_COPY(arg1, 0);
3701 PG_FREE_IF_COPY(arg2, 1);
3702
3703 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
3704 }
3705
3706 Datum
byteagt(PG_FUNCTION_ARGS)3707 byteagt(PG_FUNCTION_ARGS)
3708 {
3709 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3710 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3711 int len1,
3712 len2;
3713 int cmp;
3714
3715 len1 = VARSIZE_ANY_EXHDR(arg1);
3716 len2 = VARSIZE_ANY_EXHDR(arg2);
3717
3718 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3719
3720 PG_FREE_IF_COPY(arg1, 0);
3721 PG_FREE_IF_COPY(arg2, 1);
3722
3723 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
3724 }
3725
3726 Datum
byteage(PG_FUNCTION_ARGS)3727 byteage(PG_FUNCTION_ARGS)
3728 {
3729 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3730 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3731 int len1,
3732 len2;
3733 int cmp;
3734
3735 len1 = VARSIZE_ANY_EXHDR(arg1);
3736 len2 = VARSIZE_ANY_EXHDR(arg2);
3737
3738 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3739
3740 PG_FREE_IF_COPY(arg1, 0);
3741 PG_FREE_IF_COPY(arg2, 1);
3742
3743 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
3744 }
3745
3746 Datum
byteacmp(PG_FUNCTION_ARGS)3747 byteacmp(PG_FUNCTION_ARGS)
3748 {
3749 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
3750 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
3751 int len1,
3752 len2;
3753 int cmp;
3754
3755 len1 = VARSIZE_ANY_EXHDR(arg1);
3756 len2 = VARSIZE_ANY_EXHDR(arg2);
3757
3758 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3759 if ((cmp == 0) && (len1 != len2))
3760 cmp = (len1 < len2) ? -1 : 1;
3761
3762 PG_FREE_IF_COPY(arg1, 0);
3763 PG_FREE_IF_COPY(arg2, 1);
3764
3765 PG_RETURN_INT32(cmp);
3766 }
3767
3768 Datum
bytea_sortsupport(PG_FUNCTION_ARGS)3769 bytea_sortsupport(PG_FUNCTION_ARGS)
3770 {
3771 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
3772 MemoryContext oldcontext;
3773
3774 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3775
3776 /* Use generic string SortSupport, forcing "C" collation */
3777 varstr_sortsupport(ssup, C_COLLATION_OID, false);
3778
3779 MemoryContextSwitchTo(oldcontext);
3780
3781 PG_RETURN_VOID();
3782 }
3783
3784 /*
3785 * appendStringInfoText
3786 *
3787 * Append a text to str.
3788 * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
3789 */
3790 static void
appendStringInfoText(StringInfo str,const text * t)3791 appendStringInfoText(StringInfo str, const text *t)
3792 {
3793 appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
3794 }
3795
3796 /*
3797 * replace_text
3798 * replace all occurrences of 'old_sub_str' in 'orig_str'
3799 * with 'new_sub_str' to form 'new_str'
3800 *
3801 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
3802 * otherwise returns 'new_str'
3803 */
3804 Datum
replace_text(PG_FUNCTION_ARGS)3805 replace_text(PG_FUNCTION_ARGS)
3806 {
3807 text *src_text = PG_GETARG_TEXT_PP(0);
3808 text *from_sub_text = PG_GETARG_TEXT_PP(1);
3809 text *to_sub_text = PG_GETARG_TEXT_PP(2);
3810 int src_text_len;
3811 int from_sub_text_len;
3812 TextPositionState state;
3813 text *ret_text;
3814 int start_posn;
3815 int curr_posn;
3816 int chunk_len;
3817 char *start_ptr;
3818 StringInfoData str;
3819
3820 text_position_setup(src_text, from_sub_text, &state);
3821
3822 /*
3823 * Note: we check the converted string length, not the original, because
3824 * they could be different if the input contained invalid encoding.
3825 */
3826 src_text_len = state.len1;
3827 from_sub_text_len = state.len2;
3828
3829 /* Return unmodified source string if empty source or pattern */
3830 if (src_text_len < 1 || from_sub_text_len < 1)
3831 {
3832 text_position_cleanup(&state);
3833 PG_RETURN_TEXT_P(src_text);
3834 }
3835
3836 start_posn = 1;
3837 curr_posn = text_position_next(1, &state);
3838
3839 /* When the from_sub_text is not found, there is nothing to do. */
3840 if (curr_posn == 0)
3841 {
3842 text_position_cleanup(&state);
3843 PG_RETURN_TEXT_P(src_text);
3844 }
3845
3846 /* start_ptr points to the start_posn'th character of src_text */
3847 start_ptr = VARDATA_ANY(src_text);
3848
3849 initStringInfo(&str);
3850
3851 do
3852 {
3853 CHECK_FOR_INTERRUPTS();
3854
3855 /* copy the data skipped over by last text_position_next() */
3856 chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
3857 appendBinaryStringInfo(&str, start_ptr, chunk_len);
3858
3859 appendStringInfoText(&str, to_sub_text);
3860
3861 start_posn = curr_posn;
3862 start_ptr += chunk_len;
3863 start_posn += from_sub_text_len;
3864 start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
3865
3866 curr_posn = text_position_next(start_posn, &state);
3867 }
3868 while (curr_posn > 0);
3869
3870 /* copy trailing data */
3871 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
3872 appendBinaryStringInfo(&str, start_ptr, chunk_len);
3873
3874 text_position_cleanup(&state);
3875
3876 ret_text = cstring_to_text_with_len(str.data, str.len);
3877 pfree(str.data);
3878
3879 PG_RETURN_TEXT_P(ret_text);
3880 }
3881
3882 /*
3883 * check_replace_text_has_escape_char
3884 *
3885 * check whether replace_text contains escape char.
3886 */
3887 static bool
check_replace_text_has_escape_char(const text * replace_text)3888 check_replace_text_has_escape_char(const text *replace_text)
3889 {
3890 const char *p = VARDATA_ANY(replace_text);
3891 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3892
3893 if (pg_database_encoding_max_length() == 1)
3894 {
3895 for (; p < p_end; p++)
3896 {
3897 if (*p == '\\')
3898 return true;
3899 }
3900 }
3901 else
3902 {
3903 for (; p < p_end; p += pg_mblen(p))
3904 {
3905 if (*p == '\\')
3906 return true;
3907 }
3908 }
3909
3910 return false;
3911 }
3912
3913 /*
3914 * appendStringInfoRegexpSubstr
3915 *
3916 * Append replace_text to str, substituting regexp back references for
3917 * \n escapes. start_ptr is the start of the match in the source string,
3918 * at logical character position data_pos.
3919 */
3920 static void
appendStringInfoRegexpSubstr(StringInfo str,text * replace_text,regmatch_t * pmatch,char * start_ptr,int data_pos)3921 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
3922 regmatch_t *pmatch,
3923 char *start_ptr, int data_pos)
3924 {
3925 const char *p = VARDATA_ANY(replace_text);
3926 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
3927 int eml = pg_database_encoding_max_length();
3928
3929 for (;;)
3930 {
3931 const char *chunk_start = p;
3932 int so;
3933 int eo;
3934
3935 /* Find next escape char. */
3936 if (eml == 1)
3937 {
3938 for (; p < p_end && *p != '\\'; p++)
3939 /* nothing */ ;
3940 }
3941 else
3942 {
3943 for (; p < p_end && *p != '\\'; p += pg_mblen(p))
3944 /* nothing */ ;
3945 }
3946
3947 /* Copy the text we just scanned over, if any. */
3948 if (p > chunk_start)
3949 appendBinaryStringInfo(str, chunk_start, p - chunk_start);
3950
3951 /* Done if at end of string, else advance over escape char. */
3952 if (p >= p_end)
3953 break;
3954 p++;
3955
3956 if (p >= p_end)
3957 {
3958 /* Escape at very end of input. Treat same as unexpected char */
3959 appendStringInfoChar(str, '\\');
3960 break;
3961 }
3962
3963 if (*p >= '1' && *p <= '9')
3964 {
3965 /* Use the back reference of regexp. */
3966 int idx = *p - '0';
3967
3968 so = pmatch[idx].rm_so;
3969 eo = pmatch[idx].rm_eo;
3970 p++;
3971 }
3972 else if (*p == '&')
3973 {
3974 /* Use the entire matched string. */
3975 so = pmatch[0].rm_so;
3976 eo = pmatch[0].rm_eo;
3977 p++;
3978 }
3979 else if (*p == '\\')
3980 {
3981 /* \\ means transfer one \ to output. */
3982 appendStringInfoChar(str, '\\');
3983 p++;
3984 continue;
3985 }
3986 else
3987 {
3988 /*
3989 * If escape char is not followed by any expected char, just treat
3990 * it as ordinary data to copy. (XXX would it be better to throw
3991 * an error?)
3992 */
3993 appendStringInfoChar(str, '\\');
3994 continue;
3995 }
3996
3997 if (so != -1 && eo != -1)
3998 {
3999 /*
4000 * Copy the text that is back reference of regexp. Note so and eo
4001 * are counted in characters not bytes.
4002 */
4003 char *chunk_start;
4004 int chunk_len;
4005
4006 Assert(so >= data_pos);
4007 chunk_start = start_ptr;
4008 chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
4009 chunk_len = charlen_to_bytelen(chunk_start, eo - so);
4010 appendBinaryStringInfo(str, chunk_start, chunk_len);
4011 }
4012 }
4013 }
4014
4015 #define REGEXP_REPLACE_BACKREF_CNT 10
4016
4017 /*
4018 * replace_text_regexp
4019 *
4020 * replace text that matches to regexp in src_text to replace_text.
4021 *
4022 * Note: to avoid having to include regex.h in builtins.h, we declare
4023 * the regexp argument as void *, but really it's regex_t *.
4024 */
4025 text *
replace_text_regexp(text * src_text,void * regexp,text * replace_text,bool glob)4026 replace_text_regexp(text *src_text, void *regexp,
4027 text *replace_text, bool glob)
4028 {
4029 text *ret_text;
4030 regex_t *re = (regex_t *) regexp;
4031 int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4032 StringInfoData buf;
4033 regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
4034 pg_wchar *data;
4035 size_t data_len;
4036 int search_start;
4037 int data_pos;
4038 char *start_ptr;
4039 bool have_escape;
4040
4041 initStringInfo(&buf);
4042
4043 /* Convert data string to wide characters. */
4044 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
4045 data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
4046
4047 /* Check whether replace_text has escape char. */
4048 have_escape = check_replace_text_has_escape_char(replace_text);
4049
4050 /* start_ptr points to the data_pos'th character of src_text */
4051 start_ptr = (char *) VARDATA_ANY(src_text);
4052 data_pos = 0;
4053
4054 search_start = 0;
4055 while (search_start <= data_len)
4056 {
4057 int regexec_result;
4058
4059 CHECK_FOR_INTERRUPTS();
4060
4061 regexec_result = pg_regexec(re,
4062 data,
4063 data_len,
4064 search_start,
4065 NULL, /* no details */
4066 REGEXP_REPLACE_BACKREF_CNT,
4067 pmatch,
4068 0);
4069
4070 if (regexec_result == REG_NOMATCH)
4071 break;
4072
4073 if (regexec_result != REG_OKAY)
4074 {
4075 char errMsg[100];
4076
4077 CHECK_FOR_INTERRUPTS();
4078 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
4079 ereport(ERROR,
4080 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
4081 errmsg("regular expression failed: %s", errMsg)));
4082 }
4083
4084 /*
4085 * Copy the text to the left of the match position. Note we are given
4086 * character not byte indexes.
4087 */
4088 if (pmatch[0].rm_so - data_pos > 0)
4089 {
4090 int chunk_len;
4091
4092 chunk_len = charlen_to_bytelen(start_ptr,
4093 pmatch[0].rm_so - data_pos);
4094 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4095
4096 /*
4097 * Advance start_ptr over that text, to avoid multiple rescans of
4098 * it if the replace_text contains multiple back-references.
4099 */
4100 start_ptr += chunk_len;
4101 data_pos = pmatch[0].rm_so;
4102 }
4103
4104 /*
4105 * Copy the replace_text. Process back references when the
4106 * replace_text has escape characters.
4107 */
4108 if (have_escape)
4109 appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
4110 start_ptr, data_pos);
4111 else
4112 appendStringInfoText(&buf, replace_text);
4113
4114 /* Advance start_ptr and data_pos over the matched text. */
4115 start_ptr += charlen_to_bytelen(start_ptr,
4116 pmatch[0].rm_eo - data_pos);
4117 data_pos = pmatch[0].rm_eo;
4118
4119 /*
4120 * When global option is off, replace the first instance only.
4121 */
4122 if (!glob)
4123 break;
4124
4125 /*
4126 * Advance search position. Normally we start the next search at the
4127 * end of the previous match; but if the match was of zero length, we
4128 * have to advance by one character, or we'd just find the same match
4129 * again.
4130 */
4131 search_start = data_pos;
4132 if (pmatch[0].rm_so == pmatch[0].rm_eo)
4133 search_start++;
4134 }
4135
4136 /*
4137 * Copy the text to the right of the last match.
4138 */
4139 if (data_pos < data_len)
4140 {
4141 int chunk_len;
4142
4143 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4144 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4145 }
4146
4147 ret_text = cstring_to_text_with_len(buf.data, buf.len);
4148 pfree(buf.data);
4149 pfree(data);
4150
4151 return ret_text;
4152 }
4153
4154 /*
4155 * split_text
4156 * parse input string
4157 * return ord item (1 based)
4158 * based on provided field separator
4159 */
4160 Datum
split_text(PG_FUNCTION_ARGS)4161 split_text(PG_FUNCTION_ARGS)
4162 {
4163 text *inputstring = PG_GETARG_TEXT_PP(0);
4164 text *fldsep = PG_GETARG_TEXT_PP(1);
4165 int fldnum = PG_GETARG_INT32(2);
4166 int inputstring_len;
4167 int fldsep_len;
4168 TextPositionState state;
4169 int start_posn;
4170 int end_posn;
4171 text *result_text;
4172
4173 /* field number is 1 based */
4174 if (fldnum < 1)
4175 ereport(ERROR,
4176 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4177 errmsg("field position must be greater than zero")));
4178
4179 text_position_setup(inputstring, fldsep, &state);
4180
4181 /*
4182 * Note: we check the converted string length, not the original, because
4183 * they could be different if the input contained invalid encoding.
4184 */
4185 inputstring_len = state.len1;
4186 fldsep_len = state.len2;
4187
4188 /* return empty string for empty input string */
4189 if (inputstring_len < 1)
4190 {
4191 text_position_cleanup(&state);
4192 PG_RETURN_TEXT_P(cstring_to_text(""));
4193 }
4194
4195 /* empty field separator */
4196 if (fldsep_len < 1)
4197 {
4198 text_position_cleanup(&state);
4199 /* if first field, return input string, else empty string */
4200 if (fldnum == 1)
4201 PG_RETURN_TEXT_P(inputstring);
4202 else
4203 PG_RETURN_TEXT_P(cstring_to_text(""));
4204 }
4205
4206 /* identify bounds of first field */
4207 start_posn = 1;
4208 end_posn = text_position_next(1, &state);
4209
4210 /* special case if fldsep not found at all */
4211 if (end_posn == 0)
4212 {
4213 text_position_cleanup(&state);
4214 /* if field 1 requested, return input string, else empty string */
4215 if (fldnum == 1)
4216 PG_RETURN_TEXT_P(inputstring);
4217 else
4218 PG_RETURN_TEXT_P(cstring_to_text(""));
4219 }
4220
4221 while (end_posn > 0 && --fldnum > 0)
4222 {
4223 /* identify bounds of next field */
4224 start_posn = end_posn + fldsep_len;
4225 end_posn = text_position_next(start_posn, &state);
4226 }
4227
4228 text_position_cleanup(&state);
4229
4230 if (fldnum > 0)
4231 {
4232 /* N'th field separator not found */
4233 /* if last field requested, return it, else empty string */
4234 if (fldnum == 1)
4235 result_text = text_substring(PointerGetDatum(inputstring),
4236 start_posn,
4237 -1,
4238 true);
4239 else
4240 result_text = cstring_to_text("");
4241 }
4242 else
4243 {
4244 /* non-last field requested */
4245 result_text = text_substring(PointerGetDatum(inputstring),
4246 start_posn,
4247 end_posn - start_posn,
4248 false);
4249 }
4250
4251 PG_RETURN_TEXT_P(result_text);
4252 }
4253
4254 /*
4255 * Convenience function to return true when two text params are equal.
4256 */
4257 static bool
text_isequal(text * txt1,text * txt2)4258 text_isequal(text *txt1, text *txt2)
4259 {
4260 return DatumGetBool(DirectFunctionCall2(texteq,
4261 PointerGetDatum(txt1),
4262 PointerGetDatum(txt2)));
4263 }
4264
4265 /*
4266 * text_to_array
4267 * parse input string and return text array of elements,
4268 * based on provided field separator
4269 */
4270 Datum
text_to_array(PG_FUNCTION_ARGS)4271 text_to_array(PG_FUNCTION_ARGS)
4272 {
4273 return text_to_array_internal(fcinfo);
4274 }
4275
4276 /*
4277 * text_to_array_null
4278 * parse input string and return text array of elements,
4279 * based on provided field separator and null string
4280 *
4281 * This is a separate entry point only to prevent the regression tests from
4282 * complaining about different argument sets for the same internal function.
4283 */
4284 Datum
text_to_array_null(PG_FUNCTION_ARGS)4285 text_to_array_null(PG_FUNCTION_ARGS)
4286 {
4287 return text_to_array_internal(fcinfo);
4288 }
4289
4290 /*
4291 * common code for text_to_array and text_to_array_null functions
4292 *
4293 * These are not strict so we have to test for null inputs explicitly.
4294 */
4295 static Datum
text_to_array_internal(PG_FUNCTION_ARGS)4296 text_to_array_internal(PG_FUNCTION_ARGS)
4297 {
4298 text *inputstring;
4299 text *fldsep;
4300 text *null_string;
4301 int inputstring_len;
4302 int fldsep_len;
4303 char *start_ptr;
4304 text *result_text;
4305 bool is_null;
4306 ArrayBuildState *astate = NULL;
4307
4308 /* when input string is NULL, then result is NULL too */
4309 if (PG_ARGISNULL(0))
4310 PG_RETURN_NULL();
4311
4312 inputstring = PG_GETARG_TEXT_PP(0);
4313
4314 /* fldsep can be NULL */
4315 if (!PG_ARGISNULL(1))
4316 fldsep = PG_GETARG_TEXT_PP(1);
4317 else
4318 fldsep = NULL;
4319
4320 /* null_string can be NULL or omitted */
4321 if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4322 null_string = PG_GETARG_TEXT_PP(2);
4323 else
4324 null_string = NULL;
4325
4326 if (fldsep != NULL)
4327 {
4328 /*
4329 * Normal case with non-null fldsep. Use the text_position machinery
4330 * to search for occurrences of fldsep.
4331 */
4332 TextPositionState state;
4333 int fldnum;
4334 int start_posn;
4335 int end_posn;
4336 int chunk_len;
4337
4338 text_position_setup(inputstring, fldsep, &state);
4339
4340 /*
4341 * Note: we check the converted string length, not the original,
4342 * because they could be different if the input contained invalid
4343 * encoding.
4344 */
4345 inputstring_len = state.len1;
4346 fldsep_len = state.len2;
4347
4348 /* return empty array for empty input string */
4349 if (inputstring_len < 1)
4350 {
4351 text_position_cleanup(&state);
4352 PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
4353 }
4354
4355 /*
4356 * empty field separator: return the input string as a one-element
4357 * array
4358 */
4359 if (fldsep_len < 1)
4360 {
4361 Datum elems[1];
4362 bool nulls[1];
4363 int dims[1];
4364 int lbs[1];
4365
4366 text_position_cleanup(&state);
4367 /* single element can be a NULL too */
4368 is_null = null_string ? text_isequal(inputstring, null_string) : false;
4369
4370 elems[0] = PointerGetDatum(inputstring);
4371 nulls[0] = is_null;
4372 dims[0] = 1;
4373 lbs[0] = 1;
4374 /* XXX: this hardcodes assumptions about the text type */
4375 PG_RETURN_ARRAYTYPE_P(construct_md_array(elems, nulls,
4376 1, dims, lbs,
4377 TEXTOID, -1, false, 'i'));
4378 }
4379
4380 start_posn = 1;
4381 /* start_ptr points to the start_posn'th character of inputstring */
4382 start_ptr = VARDATA_ANY(inputstring);
4383
4384 for (fldnum = 1;; fldnum++) /* field number is 1 based */
4385 {
4386 CHECK_FOR_INTERRUPTS();
4387
4388 end_posn = text_position_next(start_posn, &state);
4389
4390 if (end_posn == 0)
4391 {
4392 /* fetch last field */
4393 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4394 }
4395 else
4396 {
4397 /* fetch non-last field */
4398 chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
4399 }
4400
4401 /* must build a temp text datum to pass to accumArrayResult */
4402 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4403 is_null = null_string ? text_isequal(result_text, null_string) : false;
4404
4405 /* stash away this field */
4406 astate = accumArrayResult(astate,
4407 PointerGetDatum(result_text),
4408 is_null,
4409 TEXTOID,
4410 CurrentMemoryContext);
4411
4412 pfree(result_text);
4413
4414 if (end_posn == 0)
4415 break;
4416
4417 start_posn = end_posn;
4418 start_ptr += chunk_len;
4419 start_posn += fldsep_len;
4420 start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
4421 }
4422
4423 text_position_cleanup(&state);
4424 }
4425 else
4426 {
4427 /*
4428 * When fldsep is NULL, each character in the inputstring becomes an
4429 * element in the result array. The separator is effectively the
4430 * space between characters.
4431 */
4432 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4433
4434 /* return empty array for empty input string */
4435 if (inputstring_len < 1)
4436 PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
4437
4438 start_ptr = VARDATA_ANY(inputstring);
4439
4440 while (inputstring_len > 0)
4441 {
4442 int chunk_len = pg_mblen(start_ptr);
4443
4444 CHECK_FOR_INTERRUPTS();
4445
4446 /* must build a temp text datum to pass to accumArrayResult */
4447 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4448 is_null = null_string ? text_isequal(result_text, null_string) : false;
4449
4450 /* stash away this field */
4451 astate = accumArrayResult(astate,
4452 PointerGetDatum(result_text),
4453 is_null,
4454 TEXTOID,
4455 CurrentMemoryContext);
4456
4457 pfree(result_text);
4458
4459 start_ptr += chunk_len;
4460 inputstring_len -= chunk_len;
4461 }
4462 }
4463
4464 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
4465 CurrentMemoryContext));
4466 }
4467
4468 /*
4469 * array_to_text
4470 * concatenate Cstring representation of input array elements
4471 * using provided field separator
4472 */
4473 Datum
array_to_text(PG_FUNCTION_ARGS)4474 array_to_text(PG_FUNCTION_ARGS)
4475 {
4476 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
4477 char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4478
4479 PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4480 }
4481
4482 /*
4483 * array_to_text_null
4484 * concatenate Cstring representation of input array elements
4485 * using provided field separator and null string
4486 *
4487 * This version is not strict so we have to test for null inputs explicitly.
4488 */
4489 Datum
array_to_text_null(PG_FUNCTION_ARGS)4490 array_to_text_null(PG_FUNCTION_ARGS)
4491 {
4492 ArrayType *v;
4493 char *fldsep;
4494 char *null_string;
4495
4496 /* returns NULL when first or second parameter is NULL */
4497 if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4498 PG_RETURN_NULL();
4499
4500 v = PG_GETARG_ARRAYTYPE_P(0);
4501 fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4502
4503 /* NULL null string is passed through as a null pointer */
4504 if (!PG_ARGISNULL(2))
4505 null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4506 else
4507 null_string = NULL;
4508
4509 PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4510 }
4511
4512 /*
4513 * common code for array_to_text and array_to_text_null functions
4514 */
4515 static text *
array_to_text_internal(FunctionCallInfo fcinfo,ArrayType * v,const char * fldsep,const char * null_string)4516 array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
4517 const char *fldsep, const char *null_string)
4518 {
4519 text *result;
4520 int nitems,
4521 *dims,
4522 ndims;
4523 Oid element_type;
4524 int typlen;
4525 bool typbyval;
4526 char typalign;
4527 StringInfoData buf;
4528 bool printed = false;
4529 char *p;
4530 bits8 *bitmap;
4531 int bitmask;
4532 int i;
4533 ArrayMetaState *my_extra;
4534
4535 ndims = ARR_NDIM(v);
4536 dims = ARR_DIMS(v);
4537 nitems = ArrayGetNItems(ndims, dims);
4538
4539 /* if there are no elements, return an empty string */
4540 if (nitems == 0)
4541 return cstring_to_text_with_len("", 0);
4542
4543 element_type = ARR_ELEMTYPE(v);
4544 initStringInfo(&buf);
4545
4546 /*
4547 * We arrange to look up info about element type, including its output
4548 * conversion proc, only once per series of calls, assuming the element
4549 * type doesn't change underneath us.
4550 */
4551 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4552 if (my_extra == NULL)
4553 {
4554 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4555 sizeof(ArrayMetaState));
4556 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4557 my_extra->element_type = ~element_type;
4558 }
4559
4560 if (my_extra->element_type != element_type)
4561 {
4562 /*
4563 * Get info about element type, including its output conversion proc
4564 */
4565 get_type_io_data(element_type, IOFunc_output,
4566 &my_extra->typlen, &my_extra->typbyval,
4567 &my_extra->typalign, &my_extra->typdelim,
4568 &my_extra->typioparam, &my_extra->typiofunc);
4569 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4570 fcinfo->flinfo->fn_mcxt);
4571 my_extra->element_type = element_type;
4572 }
4573 typlen = my_extra->typlen;
4574 typbyval = my_extra->typbyval;
4575 typalign = my_extra->typalign;
4576
4577 p = ARR_DATA_PTR(v);
4578 bitmap = ARR_NULLBITMAP(v);
4579 bitmask = 1;
4580
4581 for (i = 0; i < nitems; i++)
4582 {
4583 Datum itemvalue;
4584 char *value;
4585
4586 /* Get source element, checking for NULL */
4587 if (bitmap && (*bitmap & bitmask) == 0)
4588 {
4589 /* if null_string is NULL, we just ignore null elements */
4590 if (null_string != NULL)
4591 {
4592 if (printed)
4593 appendStringInfo(&buf, "%s%s", fldsep, null_string);
4594 else
4595 appendStringInfoString(&buf, null_string);
4596 printed = true;
4597 }
4598 }
4599 else
4600 {
4601 itemvalue = fetch_att(p, typbyval, typlen);
4602
4603 value = OutputFunctionCall(&my_extra->proc, itemvalue);
4604
4605 if (printed)
4606 appendStringInfo(&buf, "%s%s", fldsep, value);
4607 else
4608 appendStringInfoString(&buf, value);
4609 printed = true;
4610
4611 p = att_addlength_pointer(p, typlen, p);
4612 p = (char *) att_align_nominal(p, typalign);
4613 }
4614
4615 /* advance bitmap pointer if any */
4616 if (bitmap)
4617 {
4618 bitmask <<= 1;
4619 if (bitmask == 0x100)
4620 {
4621 bitmap++;
4622 bitmask = 1;
4623 }
4624 }
4625 }
4626
4627 result = cstring_to_text_with_len(buf.data, buf.len);
4628 pfree(buf.data);
4629
4630 return result;
4631 }
4632
4633 #define HEXBASE 16
4634 /*
4635 * Convert an int32 to a string containing a base 16 (hex) representation of
4636 * the number.
4637 */
4638 Datum
to_hex32(PG_FUNCTION_ARGS)4639 to_hex32(PG_FUNCTION_ARGS)
4640 {
4641 uint32 value = (uint32) PG_GETARG_INT32(0);
4642 char *ptr;
4643 const char *digits = "0123456789abcdef";
4644 char buf[32]; /* bigger than needed, but reasonable */
4645
4646 ptr = buf + sizeof(buf) - 1;
4647 *ptr = '\0';
4648
4649 do
4650 {
4651 *--ptr = digits[value % HEXBASE];
4652 value /= HEXBASE;
4653 } while (ptr > buf && value);
4654
4655 PG_RETURN_TEXT_P(cstring_to_text(ptr));
4656 }
4657
4658 /*
4659 * Convert an int64 to a string containing a base 16 (hex) representation of
4660 * the number.
4661 */
4662 Datum
to_hex64(PG_FUNCTION_ARGS)4663 to_hex64(PG_FUNCTION_ARGS)
4664 {
4665 uint64 value = (uint64) PG_GETARG_INT64(0);
4666 char *ptr;
4667 const char *digits = "0123456789abcdef";
4668 char buf[32]; /* bigger than needed, but reasonable */
4669
4670 ptr = buf + sizeof(buf) - 1;
4671 *ptr = '\0';
4672
4673 do
4674 {
4675 *--ptr = digits[value % HEXBASE];
4676 value /= HEXBASE;
4677 } while (ptr > buf && value);
4678
4679 PG_RETURN_TEXT_P(cstring_to_text(ptr));
4680 }
4681
4682 /*
4683 * Create an md5 hash of a text string and return it as hex
4684 *
4685 * md5 produces a 16 byte (128 bit) hash; double it for hex
4686 */
4687 #define MD5_HASH_LEN 32
4688
4689 Datum
md5_text(PG_FUNCTION_ARGS)4690 md5_text(PG_FUNCTION_ARGS)
4691 {
4692 text *in_text = PG_GETARG_TEXT_PP(0);
4693 size_t len;
4694 char hexsum[MD5_HASH_LEN + 1];
4695
4696 /* Calculate the length of the buffer using varlena metadata */
4697 len = VARSIZE_ANY_EXHDR(in_text);
4698
4699 /* get the hash result */
4700 if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
4701 ereport(ERROR,
4702 (errcode(ERRCODE_OUT_OF_MEMORY),
4703 errmsg("out of memory")));
4704
4705 /* convert to text and return it */
4706 PG_RETURN_TEXT_P(cstring_to_text(hexsum));
4707 }
4708
4709 /*
4710 * Create an md5 hash of a bytea field and return it as a hex string:
4711 * 16-byte md5 digest is represented in 32 hex characters.
4712 */
4713 Datum
md5_bytea(PG_FUNCTION_ARGS)4714 md5_bytea(PG_FUNCTION_ARGS)
4715 {
4716 bytea *in = PG_GETARG_BYTEA_PP(0);
4717 size_t len;
4718 char hexsum[MD5_HASH_LEN + 1];
4719
4720 len = VARSIZE_ANY_EXHDR(in);
4721 if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
4722 ereport(ERROR,
4723 (errcode(ERRCODE_OUT_OF_MEMORY),
4724 errmsg("out of memory")));
4725
4726 PG_RETURN_TEXT_P(cstring_to_text(hexsum));
4727 }
4728
4729 /*
4730 * Return the size of a datum, possibly compressed
4731 *
4732 * Works on any data type
4733 */
4734 Datum
pg_column_size(PG_FUNCTION_ARGS)4735 pg_column_size(PG_FUNCTION_ARGS)
4736 {
4737 Datum value = PG_GETARG_DATUM(0);
4738 int32 result;
4739 int typlen;
4740
4741 /* On first call, get the input type's typlen, and save at *fn_extra */
4742 if (fcinfo->flinfo->fn_extra == NULL)
4743 {
4744 /* Lookup the datatype of the supplied argument */
4745 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
4746
4747 typlen = get_typlen(argtypeid);
4748 if (typlen == 0) /* should not happen */
4749 elog(ERROR, "cache lookup failed for type %u", argtypeid);
4750
4751 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4752 sizeof(int));
4753 *((int *) fcinfo->flinfo->fn_extra) = typlen;
4754 }
4755 else
4756 typlen = *((int *) fcinfo->flinfo->fn_extra);
4757
4758 if (typlen == -1)
4759 {
4760 /* varlena type, possibly toasted */
4761 result = toast_datum_size(value);
4762 }
4763 else if (typlen == -2)
4764 {
4765 /* cstring */
4766 result = strlen(DatumGetCString(value)) + 1;
4767 }
4768 else
4769 {
4770 /* ordinary fixed-width type */
4771 result = typlen;
4772 }
4773
4774 PG_RETURN_INT32(result);
4775 }
4776
4777 /*
4778 * string_agg - Concatenates values and returns string.
4779 *
4780 * Syntax: string_agg(value text, delimiter text) RETURNS text
4781 *
4782 * Note: Any NULL values are ignored. The first-call delimiter isn't
4783 * actually used at all, and on subsequent calls the delimiter precedes
4784 * the associated value.
4785 */
4786
4787 /* subroutine to initialize state */
4788 static StringInfo
makeStringAggState(FunctionCallInfo fcinfo)4789 makeStringAggState(FunctionCallInfo fcinfo)
4790 {
4791 StringInfo state;
4792 MemoryContext aggcontext;
4793 MemoryContext oldcontext;
4794
4795 if (!AggCheckCallContext(fcinfo, &aggcontext))
4796 {
4797 /* cannot be called directly because of internal-type argument */
4798 elog(ERROR, "string_agg_transfn called in non-aggregate context");
4799 }
4800
4801 /*
4802 * Create state in aggregate context. It'll stay there across subsequent
4803 * calls.
4804 */
4805 oldcontext = MemoryContextSwitchTo(aggcontext);
4806 state = makeStringInfo();
4807 MemoryContextSwitchTo(oldcontext);
4808
4809 return state;
4810 }
4811
4812 Datum
string_agg_transfn(PG_FUNCTION_ARGS)4813 string_agg_transfn(PG_FUNCTION_ARGS)
4814 {
4815 StringInfo state;
4816
4817 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4818
4819 /* Append the value unless null. */
4820 if (!PG_ARGISNULL(1))
4821 {
4822 /* On the first time through, we ignore the delimiter. */
4823 if (state == NULL)
4824 state = makeStringAggState(fcinfo);
4825 else if (!PG_ARGISNULL(2))
4826 appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
4827
4828 appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
4829 }
4830
4831 /*
4832 * The transition type for string_agg() is declared to be "internal",
4833 * which is a pass-by-value type the same size as a pointer.
4834 */
4835 PG_RETURN_POINTER(state);
4836 }
4837
4838 Datum
string_agg_finalfn(PG_FUNCTION_ARGS)4839 string_agg_finalfn(PG_FUNCTION_ARGS)
4840 {
4841 StringInfo state;
4842
4843 /* cannot be called directly because of internal-type argument */
4844 Assert(AggCheckCallContext(fcinfo, NULL));
4845
4846 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
4847
4848 if (state != NULL)
4849 PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
4850 else
4851 PG_RETURN_NULL();
4852 }
4853
4854 /*
4855 * Implementation of both concat() and concat_ws().
4856 *
4857 * sepstr is the separator string to place between values.
4858 * argidx identifies the first argument to concatenate (counting from zero).
4859 * Returns NULL if result should be NULL, else text value.
4860 */
4861 static text *
concat_internal(const char * sepstr,int argidx,FunctionCallInfo fcinfo)4862 concat_internal(const char *sepstr, int argidx,
4863 FunctionCallInfo fcinfo)
4864 {
4865 text *result;
4866 StringInfoData str;
4867 bool first_arg = true;
4868 int i;
4869
4870 /*
4871 * concat(VARIADIC some-array) is essentially equivalent to
4872 * array_to_text(), ie concat the array elements with the given separator.
4873 * So we just pass the case off to that code.
4874 */
4875 if (get_fn_expr_variadic(fcinfo->flinfo))
4876 {
4877 ArrayType *arr;
4878
4879 /* Should have just the one argument */
4880 Assert(argidx == PG_NARGS() - 1);
4881
4882 /* concat(VARIADIC NULL) is defined as NULL */
4883 if (PG_ARGISNULL(argidx))
4884 return NULL;
4885
4886 /*
4887 * Non-null argument had better be an array. We assume that any call
4888 * context that could let get_fn_expr_variadic return true will have
4889 * checked that a VARIADIC-labeled parameter actually is an array. So
4890 * it should be okay to just Assert that it's an array rather than
4891 * doing a full-fledged error check.
4892 */
4893 Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
4894
4895 /* OK, safe to fetch the array value */
4896 arr = PG_GETARG_ARRAYTYPE_P(argidx);
4897
4898 /*
4899 * And serialize the array. We tell array_to_text to ignore null
4900 * elements, which matches the behavior of the loop below.
4901 */
4902 return array_to_text_internal(fcinfo, arr, sepstr, NULL);
4903 }
4904
4905 /* Normal case without explicit VARIADIC marker */
4906 initStringInfo(&str);
4907
4908 for (i = argidx; i < PG_NARGS(); i++)
4909 {
4910 if (!PG_ARGISNULL(i))
4911 {
4912 Datum value = PG_GETARG_DATUM(i);
4913 Oid valtype;
4914 Oid typOutput;
4915 bool typIsVarlena;
4916
4917 /* add separator if appropriate */
4918 if (first_arg)
4919 first_arg = false;
4920 else
4921 appendStringInfoString(&str, sepstr);
4922
4923 /* call the appropriate type output function, append the result */
4924 valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
4925 if (!OidIsValid(valtype))
4926 elog(ERROR, "could not determine data type of concat() input");
4927 getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
4928 appendStringInfoString(&str,
4929 OidOutputFunctionCall(typOutput, value));
4930 }
4931 }
4932
4933 result = cstring_to_text_with_len(str.data, str.len);
4934 pfree(str.data);
4935
4936 return result;
4937 }
4938
4939 /*
4940 * Concatenate all arguments. NULL arguments are ignored.
4941 */
4942 Datum
text_concat(PG_FUNCTION_ARGS)4943 text_concat(PG_FUNCTION_ARGS)
4944 {
4945 text *result;
4946
4947 result = concat_internal("", 0, fcinfo);
4948 if (result == NULL)
4949 PG_RETURN_NULL();
4950 PG_RETURN_TEXT_P(result);
4951 }
4952
4953 /*
4954 * Concatenate all but first argument value with separators. The first
4955 * parameter is used as the separator. NULL arguments are ignored.
4956 */
4957 Datum
text_concat_ws(PG_FUNCTION_ARGS)4958 text_concat_ws(PG_FUNCTION_ARGS)
4959 {
4960 char *sep;
4961 text *result;
4962
4963 /* return NULL when separator is NULL */
4964 if (PG_ARGISNULL(0))
4965 PG_RETURN_NULL();
4966 sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
4967
4968 result = concat_internal(sep, 1, fcinfo);
4969 if (result == NULL)
4970 PG_RETURN_NULL();
4971 PG_RETURN_TEXT_P(result);
4972 }
4973
4974 /*
4975 * Return first n characters in the string. When n is negative,
4976 * return all but last |n| characters.
4977 */
4978 Datum
text_left(PG_FUNCTION_ARGS)4979 text_left(PG_FUNCTION_ARGS)
4980 {
4981 text *str = PG_GETARG_TEXT_PP(0);
4982 const char *p = VARDATA_ANY(str);
4983 int len = VARSIZE_ANY_EXHDR(str);
4984 int n = PG_GETARG_INT32(1);
4985 int rlen;
4986
4987 if (n < 0)
4988 n = pg_mbstrlen_with_len(p, len) + n;
4989 rlen = pg_mbcharcliplen(p, len, n);
4990
4991 PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
4992 }
4993
4994 /*
4995 * Return last n characters in the string. When n is negative,
4996 * return all but first |n| characters.
4997 */
4998 Datum
text_right(PG_FUNCTION_ARGS)4999 text_right(PG_FUNCTION_ARGS)
5000 {
5001 text *str = PG_GETARG_TEXT_PP(0);
5002 const char *p = VARDATA_ANY(str);
5003 int len = VARSIZE_ANY_EXHDR(str);
5004 int n = PG_GETARG_INT32(1);
5005 int off;
5006
5007 if (n < 0)
5008 n = -n;
5009 else
5010 n = pg_mbstrlen_with_len(p, len) - n;
5011 off = pg_mbcharcliplen(p, len, n);
5012
5013 PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
5014 }
5015
5016 /*
5017 * Return reversed string
5018 */
5019 Datum
text_reverse(PG_FUNCTION_ARGS)5020 text_reverse(PG_FUNCTION_ARGS)
5021 {
5022 text *str = PG_GETARG_TEXT_PP(0);
5023 const char *p = VARDATA_ANY(str);
5024 int len = VARSIZE_ANY_EXHDR(str);
5025 const char *endp = p + len;
5026 text *result;
5027 char *dst;
5028
5029 result = palloc(len + VARHDRSZ);
5030 dst = (char *) VARDATA(result) + len;
5031 SET_VARSIZE(result, len + VARHDRSZ);
5032
5033 if (pg_database_encoding_max_length() > 1)
5034 {
5035 /* multibyte version */
5036 while (p < endp)
5037 {
5038 int sz;
5039
5040 sz = pg_mblen(p);
5041 dst -= sz;
5042 memcpy(dst, p, sz);
5043 p += sz;
5044 }
5045 }
5046 else
5047 {
5048 /* single byte version */
5049 while (p < endp)
5050 *(--dst) = *p++;
5051 }
5052
5053 PG_RETURN_TEXT_P(result);
5054 }
5055
5056
5057 /*
5058 * Support macros for text_format()
5059 */
5060 #define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
5061
5062 #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
5063 do { \
5064 if (++(ptr) >= (end_ptr)) \
5065 ereport(ERROR, \
5066 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
5067 errmsg("unterminated format() type specifier"), \
5068 errhint("For a single \"%%\" use \"%%%%\"."))); \
5069 } while (0)
5070
5071 /*
5072 * Returns a formatted string
5073 */
5074 Datum
text_format(PG_FUNCTION_ARGS)5075 text_format(PG_FUNCTION_ARGS)
5076 {
5077 text *fmt;
5078 StringInfoData str;
5079 const char *cp;
5080 const char *start_ptr;
5081 const char *end_ptr;
5082 text *result;
5083 int arg;
5084 bool funcvariadic;
5085 int nargs;
5086 Datum *elements = NULL;
5087 bool *nulls = NULL;
5088 Oid element_type = InvalidOid;
5089 Oid prev_type = InvalidOid;
5090 Oid prev_width_type = InvalidOid;
5091 FmgrInfo typoutputfinfo;
5092 FmgrInfo typoutputinfo_width;
5093
5094 /* When format string is null, immediately return null */
5095 if (PG_ARGISNULL(0))
5096 PG_RETURN_NULL();
5097
5098 /* If argument is marked VARIADIC, expand array into elements */
5099 if (get_fn_expr_variadic(fcinfo->flinfo))
5100 {
5101 ArrayType *arr;
5102 int16 elmlen;
5103 bool elmbyval;
5104 char elmalign;
5105 int nitems;
5106
5107 /* Should have just the one argument */
5108 Assert(PG_NARGS() == 2);
5109
5110 /* If argument is NULL, we treat it as zero-length array */
5111 if (PG_ARGISNULL(1))
5112 nitems = 0;
5113 else
5114 {
5115 /*
5116 * Non-null argument had better be an array. We assume that any
5117 * call context that could let get_fn_expr_variadic return true
5118 * will have checked that a VARIADIC-labeled parameter actually is
5119 * an array. So it should be okay to just Assert that it's an
5120 * array rather than doing a full-fledged error check.
5121 */
5122 Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
5123
5124 /* OK, safe to fetch the array value */
5125 arr = PG_GETARG_ARRAYTYPE_P(1);
5126
5127 /* Get info about array element type */
5128 element_type = ARR_ELEMTYPE(arr);
5129 get_typlenbyvalalign(element_type,
5130 &elmlen, &elmbyval, &elmalign);
5131
5132 /* Extract all array elements */
5133 deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5134 &elements, &nulls, &nitems);
5135 }
5136
5137 nargs = nitems + 1;
5138 funcvariadic = true;
5139 }
5140 else
5141 {
5142 /* Non-variadic case, we'll process the arguments individually */
5143 nargs = PG_NARGS();
5144 funcvariadic = false;
5145 }
5146
5147 /* Setup for main loop. */
5148 fmt = PG_GETARG_TEXT_PP(0);
5149 start_ptr = VARDATA_ANY(fmt);
5150 end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5151 initStringInfo(&str);
5152 arg = 1; /* next argument position to print */
5153
5154 /* Scan format string, looking for conversion specifiers. */
5155 for (cp = start_ptr; cp < end_ptr; cp++)
5156 {
5157 int argpos;
5158 int widthpos;
5159 int flags;
5160 int width;
5161 Datum value;
5162 bool isNull;
5163 Oid typid;
5164
5165 /*
5166 * If it's not the start of a conversion specifier, just copy it to
5167 * the output buffer.
5168 */
5169 if (*cp != '%')
5170 {
5171 appendStringInfoCharMacro(&str, *cp);
5172 continue;
5173 }
5174
5175 ADVANCE_PARSE_POINTER(cp, end_ptr);
5176
5177 /* Easy case: %% outputs a single % */
5178 if (*cp == '%')
5179 {
5180 appendStringInfoCharMacro(&str, *cp);
5181 continue;
5182 }
5183
5184 /* Parse the optional portions of the format specifier */
5185 cp = text_format_parse_format(cp, end_ptr,
5186 &argpos, &widthpos,
5187 &flags, &width);
5188
5189 /*
5190 * Next we should see the main conversion specifier. Whether or not
5191 * an argument position was present, it's known that at least one
5192 * character remains in the string at this point. Experience suggests
5193 * that it's worth checking that that character is one of the expected
5194 * ones before we try to fetch arguments, so as to produce the least
5195 * confusing response to a mis-formatted specifier.
5196 */
5197 if (strchr("sIL", *cp) == NULL)
5198 ereport(ERROR,
5199 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5200 errmsg("unrecognized format() type specifier \"%c\"",
5201 *cp),
5202 errhint("For a single \"%%\" use \"%%%%\".")));
5203
5204 /* If indirect width was specified, get its value */
5205 if (widthpos >= 0)
5206 {
5207 /* Collect the specified or next argument position */
5208 if (widthpos > 0)
5209 arg = widthpos;
5210 if (arg >= nargs)
5211 ereport(ERROR,
5212 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5213 errmsg("too few arguments for format()")));
5214
5215 /* Get the value and type of the selected argument */
5216 if (!funcvariadic)
5217 {
5218 value = PG_GETARG_DATUM(arg);
5219 isNull = PG_ARGISNULL(arg);
5220 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5221 }
5222 else
5223 {
5224 value = elements[arg - 1];
5225 isNull = nulls[arg - 1];
5226 typid = element_type;
5227 }
5228 if (!OidIsValid(typid))
5229 elog(ERROR, "could not determine data type of format() input");
5230
5231 arg++;
5232
5233 /* We can treat NULL width the same as zero */
5234 if (isNull)
5235 width = 0;
5236 else if (typid == INT4OID)
5237 width = DatumGetInt32(value);
5238 else if (typid == INT2OID)
5239 width = DatumGetInt16(value);
5240 else
5241 {
5242 /* For less-usual datatypes, convert to text then to int */
5243 char *str;
5244
5245 if (typid != prev_width_type)
5246 {
5247 Oid typoutputfunc;
5248 bool typIsVarlena;
5249
5250 getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5251 fmgr_info(typoutputfunc, &typoutputinfo_width);
5252 prev_width_type = typid;
5253 }
5254
5255 str = OutputFunctionCall(&typoutputinfo_width, value);
5256
5257 /* pg_atoi will complain about bad data or overflow */
5258 width = pg_atoi(str, sizeof(int), '\0');
5259
5260 pfree(str);
5261 }
5262 }
5263
5264 /* Collect the specified or next argument position */
5265 if (argpos > 0)
5266 arg = argpos;
5267 if (arg >= nargs)
5268 ereport(ERROR,
5269 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5270 errmsg("too few arguments for format()")));
5271
5272 /* Get the value and type of the selected argument */
5273 if (!funcvariadic)
5274 {
5275 value = PG_GETARG_DATUM(arg);
5276 isNull = PG_ARGISNULL(arg);
5277 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5278 }
5279 else
5280 {
5281 value = elements[arg - 1];
5282 isNull = nulls[arg - 1];
5283 typid = element_type;
5284 }
5285 if (!OidIsValid(typid))
5286 elog(ERROR, "could not determine data type of format() input");
5287
5288 arg++;
5289
5290 /*
5291 * Get the appropriate typOutput function, reusing previous one if
5292 * same type as previous argument. That's particularly useful in the
5293 * variadic-array case, but often saves work even for ordinary calls.
5294 */
5295 if (typid != prev_type)
5296 {
5297 Oid typoutputfunc;
5298 bool typIsVarlena;
5299
5300 getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5301 fmgr_info(typoutputfunc, &typoutputfinfo);
5302 prev_type = typid;
5303 }
5304
5305 /*
5306 * And now we can format the value.
5307 */
5308 switch (*cp)
5309 {
5310 case 's':
5311 case 'I':
5312 case 'L':
5313 text_format_string_conversion(&str, *cp, &typoutputfinfo,
5314 value, isNull,
5315 flags, width);
5316 break;
5317 default:
5318 /* should not get here, because of previous check */
5319 ereport(ERROR,
5320 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5321 errmsg("unrecognized format() type specifier \"%c\"",
5322 *cp),
5323 errhint("For a single \"%%\" use \"%%%%\".")));
5324 break;
5325 }
5326 }
5327
5328 /* Don't need deconstruct_array results anymore. */
5329 if (elements != NULL)
5330 pfree(elements);
5331 if (nulls != NULL)
5332 pfree(nulls);
5333
5334 /* Generate results. */
5335 result = cstring_to_text_with_len(str.data, str.len);
5336 pfree(str.data);
5337
5338 PG_RETURN_TEXT_P(result);
5339 }
5340
5341 /*
5342 * Parse contiguous digits as a decimal number.
5343 *
5344 * Returns true if some digits could be parsed.
5345 * The value is returned into *value, and *ptr is advanced to the next
5346 * character to be parsed.
5347 *
5348 * Note parsing invariant: at least one character is known available before
5349 * string end (end_ptr) at entry, and this is still true at exit.
5350 */
5351 static bool
text_format_parse_digits(const char ** ptr,const char * end_ptr,int * value)5352 text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5353 {
5354 bool found = false;
5355 const char *cp = *ptr;
5356 int val = 0;
5357
5358 while (*cp >= '0' && *cp <= '9')
5359 {
5360 int newval = val * 10 + (*cp - '0');
5361
5362 if (newval / 10 != val) /* overflow? */
5363 ereport(ERROR,
5364 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5365 errmsg("number is out of range")));
5366 val = newval;
5367 ADVANCE_PARSE_POINTER(cp, end_ptr);
5368 found = true;
5369 }
5370
5371 *ptr = cp;
5372 *value = val;
5373
5374 return found;
5375 }
5376
5377 /*
5378 * Parse a format specifier (generally following the SUS printf spec).
5379 *
5380 * We have already advanced over the initial '%', and we are looking for
5381 * [argpos][flags][width]type (but the type character is not consumed here).
5382 *
5383 * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5384 * Output parameters:
5385 * argpos: argument position for value to be printed. -1 means unspecified.
5386 * widthpos: argument position for width. Zero means the argument position
5387 * was unspecified (ie, take the next arg) and -1 means no width
5388 * argument (width was omitted or specified as a constant).
5389 * flags: bitmask of flags.
5390 * width: directly-specified width value. Zero means the width was omitted
5391 * (note it's not necessary to distinguish this case from an explicit
5392 * zero width value).
5393 *
5394 * The function result is the next character position to be parsed, ie, the
5395 * location where the type character is/should be.
5396 *
5397 * Note parsing invariant: at least one character is known available before
5398 * string end (end_ptr) at entry, and this is still true at exit.
5399 */
5400 static const char *
text_format_parse_format(const char * start_ptr,const char * end_ptr,int * argpos,int * widthpos,int * flags,int * width)5401 text_format_parse_format(const char *start_ptr, const char *end_ptr,
5402 int *argpos, int *widthpos,
5403 int *flags, int *width)
5404 {
5405 const char *cp = start_ptr;
5406 int n;
5407
5408 /* set defaults for output parameters */
5409 *argpos = -1;
5410 *widthpos = -1;
5411 *flags = 0;
5412 *width = 0;
5413
5414 /* try to identify first number */
5415 if (text_format_parse_digits(&cp, end_ptr, &n))
5416 {
5417 if (*cp != '$')
5418 {
5419 /* Must be just a width and a type, so we're done */
5420 *width = n;
5421 return cp;
5422 }
5423 /* The number was argument position */
5424 *argpos = n;
5425 /* Explicit 0 for argument index is immediately refused */
5426 if (n == 0)
5427 ereport(ERROR,
5428 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5429 errmsg("format specifies argument 0, but arguments are numbered from 1")));
5430 ADVANCE_PARSE_POINTER(cp, end_ptr);
5431 }
5432
5433 /* Handle flags (only minus is supported now) */
5434 while (*cp == '-')
5435 {
5436 *flags |= TEXT_FORMAT_FLAG_MINUS;
5437 ADVANCE_PARSE_POINTER(cp, end_ptr);
5438 }
5439
5440 if (*cp == '*')
5441 {
5442 /* Handle indirect width */
5443 ADVANCE_PARSE_POINTER(cp, end_ptr);
5444 if (text_format_parse_digits(&cp, end_ptr, &n))
5445 {
5446 /* number in this position must be closed by $ */
5447 if (*cp != '$')
5448 ereport(ERROR,
5449 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5450 errmsg("width argument position must be ended by \"$\"")));
5451 /* The number was width argument position */
5452 *widthpos = n;
5453 /* Explicit 0 for argument index is immediately refused */
5454 if (n == 0)
5455 ereport(ERROR,
5456 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5457 errmsg("format specifies argument 0, but arguments are numbered from 1")));
5458 ADVANCE_PARSE_POINTER(cp, end_ptr);
5459 }
5460 else
5461 *widthpos = 0; /* width's argument position is unspecified */
5462 }
5463 else
5464 {
5465 /* Check for direct width specification */
5466 if (text_format_parse_digits(&cp, end_ptr, &n))
5467 *width = n;
5468 }
5469
5470 /* cp should now be pointing at type character */
5471 return cp;
5472 }
5473
5474 /*
5475 * Format a %s, %I, or %L conversion
5476 */
5477 static void
text_format_string_conversion(StringInfo buf,char conversion,FmgrInfo * typOutputInfo,Datum value,bool isNull,int flags,int width)5478 text_format_string_conversion(StringInfo buf, char conversion,
5479 FmgrInfo *typOutputInfo,
5480 Datum value, bool isNull,
5481 int flags, int width)
5482 {
5483 char *str;
5484
5485 /* Handle NULL arguments before trying to stringify the value. */
5486 if (isNull)
5487 {
5488 if (conversion == 's')
5489 text_format_append_string(buf, "", flags, width);
5490 else if (conversion == 'L')
5491 text_format_append_string(buf, "NULL", flags, width);
5492 else if (conversion == 'I')
5493 ereport(ERROR,
5494 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5495 errmsg("null values cannot be formatted as an SQL identifier")));
5496 return;
5497 }
5498
5499 /* Stringify. */
5500 str = OutputFunctionCall(typOutputInfo, value);
5501
5502 /* Escape. */
5503 if (conversion == 'I')
5504 {
5505 /* quote_identifier may or may not allocate a new string. */
5506 text_format_append_string(buf, quote_identifier(str), flags, width);
5507 }
5508 else if (conversion == 'L')
5509 {
5510 char *qstr = quote_literal_cstr(str);
5511
5512 text_format_append_string(buf, qstr, flags, width);
5513 /* quote_literal_cstr() always allocates a new string */
5514 pfree(qstr);
5515 }
5516 else
5517 text_format_append_string(buf, str, flags, width);
5518
5519 /* Cleanup. */
5520 pfree(str);
5521 }
5522
5523 /*
5524 * Append str to buf, padding as directed by flags/width
5525 */
5526 static void
text_format_append_string(StringInfo buf,const char * str,int flags,int width)5527 text_format_append_string(StringInfo buf, const char *str,
5528 int flags, int width)
5529 {
5530 bool align_to_left = false;
5531 int len;
5532
5533 /* fast path for typical easy case */
5534 if (width == 0)
5535 {
5536 appendStringInfoString(buf, str);
5537 return;
5538 }
5539
5540 if (width < 0)
5541 {
5542 /* Negative width: implicit '-' flag, then take absolute value */
5543 align_to_left = true;
5544 /* -INT_MIN is undefined */
5545 if (width <= INT_MIN)
5546 ereport(ERROR,
5547 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5548 errmsg("number is out of range")));
5549 width = -width;
5550 }
5551 else if (flags & TEXT_FORMAT_FLAG_MINUS)
5552 align_to_left = true;
5553
5554 len = pg_mbstrlen(str);
5555 if (align_to_left)
5556 {
5557 /* left justify */
5558 appendStringInfoString(buf, str);
5559 if (len < width)
5560 appendStringInfoSpaces(buf, width - len);
5561 }
5562 else
5563 {
5564 /* right justify */
5565 if (len < width)
5566 appendStringInfoSpaces(buf, width - len);
5567 appendStringInfoString(buf, str);
5568 }
5569 }
5570
5571 /*
5572 * text_format_nv - nonvariadic wrapper for text_format function.
5573 *
5574 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5575 * which checks that all built-in functions that share the implementing C
5576 * function take the same number of arguments.
5577 */
5578 Datum
text_format_nv(PG_FUNCTION_ARGS)5579 text_format_nv(PG_FUNCTION_ARGS)
5580 {
5581 return text_format(fcinfo);
5582 }
5583
5584 /*
5585 * Helper function for Levenshtein distance functions. Faster than memcmp(),
5586 * for this use case.
5587 */
5588 static inline bool
rest_of_char_same(const char * s1,const char * s2,int len)5589 rest_of_char_same(const char *s1, const char *s2, int len)
5590 {
5591 while (len > 0)
5592 {
5593 len--;
5594 if (s1[len] != s2[len])
5595 return false;
5596 }
5597 return true;
5598 }
5599
5600 /* Expand each Levenshtein distance variant */
5601 #include "levenshtein.c"
5602 #define LEVENSHTEIN_LESS_EQUAL
5603 #include "levenshtein.c"
5604