1 #include <postgres.h>
2 #include <access/hash.h>
3 #include <catalog/pg_type.h>
4 #include <fmgr.h>
5 #include <lib/stringinfo.h>
6 #include <utils/array.h>
7 #include <utils/builtins.h>
8 #include <utils/inet.h>
9
10 #include <uriparser/Uri.h>
11
12
13 PG_MODULE_MAGIC;
14
15
16 typedef struct varlena uritype;
17
18
19 #define DatumGetUriP(X) ((uritype *) PG_DETOAST_DATUM(X))
20 #define DatumGetUriPP(X) ((uritype *) PG_DETOAST_DATUM_PACKED(X))
21 #define UriPGetDatum(X) PointerGetDatum(X)
22
23 #define PG_GETARG_URI_P(n) DatumGetUriP(PG_GETARG_DATUM(n))
24 #define PG_GETARG_URI_PP(n) DatumGetUriPP(PG_GETARG_DATUM(n))
25 #define PG_RETURN_URI_P(x) PG_RETURN_POINTER(x)
26
27
28 static void
parse_uri(const char * s,UriUriA * urip)29 parse_uri(const char *s, UriUriA *urip)
30 {
31 UriParserStateA state;
32
33 state.uri = urip;
34 uriParseUriA(&state, s);
35
36 switch (state.errorCode)
37 {
38 case URI_SUCCESS:
39 return;
40 case URI_ERROR_SYNTAX:
41 ereport(ERROR,
42 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
43 errmsg("invalid input syntax for type uri at or near \"%s\"",
44 state.errorPos)));
45 default:
46 elog(ERROR, "liburiparser error code %d", state.errorCode);
47 }
48 }
49
50 PG_FUNCTION_INFO_V1(uri_in);
51 Datum
uri_in(PG_FUNCTION_ARGS)52 uri_in(PG_FUNCTION_ARGS)
53 {
54 char *s = PG_GETARG_CSTRING(0);
55 uritype *vardata;
56 UriUriA uri;
57
58 parse_uri(s, &uri);
59 uriFreeUriMembersA(&uri);
60
61 vardata = (uritype *) cstring_to_text(s);
62 PG_RETURN_URI_P(vardata);
63 }
64
65 PG_FUNCTION_INFO_V1(uri_out);
66 Datum
uri_out(PG_FUNCTION_ARGS)67 uri_out(PG_FUNCTION_ARGS)
68 {
69 Datum arg = PG_GETARG_DATUM(0);
70
71 PG_RETURN_CSTRING(TextDatumGetCString(arg));
72 }
73
74 static text *
uri_text_range_to_text(UriTextRangeA r)75 uri_text_range_to_text(UriTextRangeA r)
76 {
77 if (!r.first || !r.afterLast)
78 return NULL;
79
80 return cstring_to_text_with_len(r.first, r.afterLast - r.first);
81 }
82
83 PG_FUNCTION_INFO_V1(uri_scheme);
84 Datum
uri_scheme(PG_FUNCTION_ARGS)85 uri_scheme(PG_FUNCTION_ARGS)
86 {
87 Datum arg = PG_GETARG_DATUM(0);
88 char *s = TextDatumGetCString(arg);
89 UriUriA uri;
90 text *result;
91
92 parse_uri(s, &uri);
93 result = uri_text_range_to_text(uri.scheme);
94 uriFreeUriMembersA(&uri);
95 if (result)
96 PG_RETURN_TEXT_P(result);
97 else
98 PG_RETURN_NULL();
99 }
100
101 PG_FUNCTION_INFO_V1(uri_userinfo);
102 Datum
uri_userinfo(PG_FUNCTION_ARGS)103 uri_userinfo(PG_FUNCTION_ARGS)
104 {
105 Datum arg = PG_GETARG_DATUM(0);
106 char *s = TextDatumGetCString(arg);
107 UriUriA uri;
108 text *result;
109
110 parse_uri(s, &uri);
111 result = uri_text_range_to_text(uri.userInfo);
112 uriFreeUriMembersA(&uri);
113 if (result)
114 PG_RETURN_TEXT_P(result);
115 else
116 PG_RETURN_NULL();
117 }
118
119 PG_FUNCTION_INFO_V1(uri_host);
120 Datum
uri_host(PG_FUNCTION_ARGS)121 uri_host(PG_FUNCTION_ARGS)
122 {
123 Datum arg = PG_GETARG_DATUM(0);
124 char *s = TextDatumGetCString(arg);
125 UriUriA uri;
126 text *result;
127
128 parse_uri(s, &uri);
129 result = uri_text_range_to_text(uri.hostText);
130 uriFreeUriMembersA(&uri);
131 if (result)
132 PG_RETURN_TEXT_P(result);
133 else
134 PG_RETURN_NULL();
135 }
136
137 PG_FUNCTION_INFO_V1(uri_host_inet);
138 Datum
uri_host_inet(PG_FUNCTION_ARGS)139 uri_host_inet(PG_FUNCTION_ARGS)
140 {
141 Datum arg = PG_GETARG_DATUM(0);
142 char *s = TextDatumGetCString(arg);
143 UriUriA uri;
144
145 parse_uri(s, &uri);
146 if (uri.hostData.ip4)
147 {
148 unsigned char *data = uri.hostData.ip4->data;
149 char *tmp = palloc(16);
150 snprintf(tmp, 16, "%u.%u.%u.%u", data[0], data[1], data[2], data[3]);
151 uriFreeUriMembersA(&uri);
152 PG_RETURN_INET_P(DirectFunctionCall1(inet_in, CStringGetDatum(tmp)));
153 }
154 else if (uri.hostData.ip6)
155 {
156 unsigned char *data = uri.hostData.ip6->data;
157 char *tmp = palloc(40);
158 snprintf(tmp, 40, "%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x",
159 data[0], data[1], data[2], data[3],
160 data[4], data[5], data[6], data[7],
161 data[8], data[9], data[10], data[11],
162 data[12], data[13], data[14], data[15]);
163 uriFreeUriMembersA(&uri);
164 PG_RETURN_INET_P(DirectFunctionCall1(inet_in, CStringGetDatum(tmp)));
165 }
166 else
167 {
168 uriFreeUriMembersA(&uri);
169 PG_RETURN_NULL();
170 }
171 }
172
173 static int
_uri_port_num(UriUriA * urip)174 _uri_port_num(UriUriA *urip)
175 {
176 if (!urip->portText.first || !urip->portText.afterLast
177 || urip->portText.afterLast == urip->portText.first)
178 return -1;
179 return strtol(pnstrdup(urip->portText.first, urip->portText.afterLast - urip->portText.first),
180 NULL, 10);
181 }
182
183 PG_FUNCTION_INFO_V1(uri_port);
184 Datum
uri_port(PG_FUNCTION_ARGS)185 uri_port(PG_FUNCTION_ARGS)
186 {
187 Datum arg = PG_GETARG_DATUM(0);
188 char *s = TextDatumGetCString(arg);
189 UriUriA uri;
190 int num;
191
192 parse_uri(s, &uri);
193 num = _uri_port_num(&uri);
194 uriFreeUriMembersA(&uri);
195 if (num < 0)
196 PG_RETURN_NULL();
197 PG_RETURN_INT32(num);
198 }
199
200 PG_FUNCTION_INFO_V1(uri_query);
201 Datum
uri_query(PG_FUNCTION_ARGS)202 uri_query(PG_FUNCTION_ARGS)
203 {
204 Datum arg = PG_GETARG_DATUM(0);
205 char *s = TextDatumGetCString(arg);
206 UriUriA uri;
207 text *result;
208
209 parse_uri(s, &uri);
210 result = uri_text_range_to_text(uri.query);
211 uriFreeUriMembersA(&uri);
212 if (result)
213 PG_RETURN_TEXT_P(result);
214 else
215 PG_RETURN_NULL();
216 }
217
218 PG_FUNCTION_INFO_V1(uri_fragment);
219 Datum
uri_fragment(PG_FUNCTION_ARGS)220 uri_fragment(PG_FUNCTION_ARGS)
221 {
222 Datum arg = PG_GETARG_DATUM(0);
223 char *s = TextDatumGetCString(arg);
224 UriUriA uri;
225 text *result;
226
227 parse_uri(s, &uri);
228 result = uri_text_range_to_text(uri.fragment);
229 uriFreeUriMembersA(&uri);
230 if (result)
231 PG_RETURN_TEXT_P(result);
232 else
233 PG_RETURN_NULL();
234 }
235
236 /*
237 * Defined in uriparser library, but not exported, so we keep a local version
238 * here.
239 */
240 static bool
_is_host_set(UriUriA * uri)241 _is_host_set(UriUriA *uri)
242 {
243 return (uri != NULL)
244 && ((uri->hostText.first != NULL)
245 || (uri->hostData.ip4 != NULL)
246 || (uri->hostData.ip6 != NULL)
247 || (uri->hostData.ipFuture.first != NULL)
248 );
249 }
250
251 PG_FUNCTION_INFO_V1(uri_path);
252 Datum
uri_path(PG_FUNCTION_ARGS)253 uri_path(PG_FUNCTION_ARGS)
254 {
255 Datum arg = PG_GETARG_DATUM(0);
256 char *s = TextDatumGetCString(arg);
257 UriUriA uri;
258 StringInfoData buf;
259 UriPathSegmentA *p;
260
261 initStringInfo(&buf);
262
263 parse_uri(s, &uri);
264
265 if (uri.absolutePath || (_is_host_set(&uri) && uri.pathHead))
266 appendStringInfoChar(&buf, '/');
267
268 for (p = uri.pathHead; p; p = p->next)
269 {
270 appendBinaryStringInfo(&buf, p->text.first, p->text.afterLast - p->text.first);
271 if (p->next)
272 appendStringInfoChar(&buf, '/');
273 }
274
275 uriFreeUriMembersA(&uri);
276 PG_RETURN_TEXT_P(cstring_to_text(buf.data));
277 }
278
279 PG_FUNCTION_INFO_V1(uri_path_array);
280 Datum
uri_path_array(PG_FUNCTION_ARGS)281 uri_path_array(PG_FUNCTION_ARGS)
282 {
283 Datum arg = PG_GETARG_DATUM(0);
284 char *s = TextDatumGetCString(arg);
285 UriUriA uri;
286 ArrayBuildState *astate = NULL;
287 UriPathSegmentA *pa;
288
289 parse_uri(s, &uri);
290 for (pa = uri.pathHead; pa; pa = pa->next)
291 {
292 text *piece = uri_text_range_to_text(pa->text);
293 astate = accumArrayResult(astate,
294 PointerGetDatum(piece),
295 !piece,
296 TEXTOID,
297 CurrentMemoryContext);
298 }
299 uriFreeUriMembersA(&uri);
300
301 if (astate)
302 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
303 else
304 PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
305 }
306
307 PG_FUNCTION_INFO_V1(uri_normalize);
308 Datum
uri_normalize(PG_FUNCTION_ARGS)309 uri_normalize(PG_FUNCTION_ARGS)
310 {
311 Datum arg = PG_GETARG_DATUM(0);
312 char *s = TextDatumGetCString(arg);
313 UriUriA uri;
314 int rc;
315 int charsRequired;
316 char *ret;
317
318 parse_uri(s, &uri);
319
320 if ((rc = uriNormalizeSyntaxA(&uri)) != URI_SUCCESS)
321 elog(ERROR, "uriNormalizeSyntaxA() failed: error code %d", rc);
322
323 if ((rc = uriToStringCharsRequiredA(&uri, &charsRequired)) != URI_SUCCESS)
324 elog(ERROR, "uriToStringCharsRequiredA() failed: error code %d", rc);
325 charsRequired++;
326
327 ret = palloc(charsRequired);
328 if ((rc = uriToStringA(ret, &uri, charsRequired, NULL)) != URI_SUCCESS)
329 elog(ERROR, "uriToStringA() failed: error code %d", rc);
330
331 uriFreeUriMembersA(&uri);
332
333 PG_RETURN_URI_P((uritype *) cstring_to_text(ret));
334 }
335
336 static int
strcasecmp_ascii(const char * s1,const char * s2)337 strcasecmp_ascii(const char *s1, const char *s2)
338 {
339 for (;;)
340 {
341 unsigned char ch1 = (unsigned char) *s1++;
342 unsigned char ch2 = (unsigned char) *s2++;
343
344 if (ch1 != ch2)
345 {
346 if (ch1 >= 'A' && ch1 <= 'Z')
347 ch1 += 'a' - 'A';
348
349 if (ch2 >= 'A' && ch2 <= 'Z')
350 ch2 += 'a' - 'A';
351
352 if (ch1 != ch2)
353 return (int) ch1 - (int) ch2;
354 }
355 if (ch1 == 0)
356 break;
357 }
358 return 0;
359 }
360
361 static int
strncasecmp_ascii(const char * s1,const char * s2,size_t n)362 strncasecmp_ascii(const char *s1, const char *s2, size_t n)
363 {
364 while (n-- > 0)
365 {
366 unsigned char ch1 = (unsigned char) *s1++;
367 unsigned char ch2 = (unsigned char) *s2++;
368
369 if (ch1 != ch2)
370 {
371 if (ch1 >= 'A' && ch1 <= 'Z')
372 ch1 += 'a' - 'A';
373
374 if (ch2 >= 'A' && ch2 <= 'Z')
375 ch2 += 'a' - 'A';
376
377 if (ch1 != ch2)
378 return (int) ch1 - (int) ch2;
379 }
380 if (ch1 == 0)
381 break;
382 }
383 return 0;
384 }
385
386 static int
cmp_text_range(UriTextRangeA a,UriTextRangeA b)387 cmp_text_range(UriTextRangeA a, UriTextRangeA b)
388 {
389 if (!a.first || !a.afterLast)
390 {
391 if (!b.first || !b.afterLast)
392 return 0;
393 else
394 return -1;
395 }
396 else if (!b.first || !b.afterLast)
397 return 1;
398 else
399 {
400 int x = strncasecmp_ascii(a.first, b.first,
401 Min(a.afterLast - a.first, b.afterLast - b.first));
402 if (x == 0)
403 return (a.afterLast - a.first) - (b.afterLast - b.first);
404 return x;
405 }
406 }
407
408 static int
cmp_hosts(UriUriA * uap,UriUriA * ubp)409 cmp_hosts(UriUriA *uap, UriUriA *ubp)
410 {
411 if (!uap->hostText.first)
412 {
413 if (!ubp->hostText.first)
414 return 0;
415 else
416 return -1;
417 }
418 else if (uap->hostData.ip4)
419 {
420 if (!ubp->hostText.first)
421 return 1;
422 else if (ubp->hostData.ip4)
423 return memcmp(uap->hostData.ip4->data,
424 ubp->hostData.ip4->data,
425 sizeof(uap->hostData.ip4->data));
426 else
427 return -1;
428 }
429 else if (uap->hostData.ip6)
430 {
431 if (!ubp->hostText.first)
432 return 1;
433 else if (ubp->hostData.ip4)
434 return 1;
435 else if (ubp->hostData.ip6)
436 return memcmp(uap->hostData.ip6->data,
437 ubp->hostData.ip6->data,
438 sizeof(uap->hostData.ip6->data));
439 else
440 return -1;
441 }
442 else
443 return cmp_text_range(uap->hostText, ubp->hostText);
444 }
445
446 static int
_uri_cmp(Datum a,Datum b)447 _uri_cmp(Datum a, Datum b)
448 {
449 const char *sa = TextDatumGetCString(a);
450 const char *sb = TextDatumGetCString(b);
451 UriUriA ua;
452 UriUriA ub;
453 int res = 0;
454
455 parse_uri(sa, &ua);
456 parse_uri(sb, &ub);
457
458 if (res == 0)
459 res = cmp_text_range(ua.scheme, ub.scheme);
460 if (res == 0)
461 res = cmp_hosts(&ua, &ub);
462 if (res == 0)
463 res = _uri_port_num(&ua) - _uri_port_num(&ub);
464 if (res == 0)
465 res = cmp_text_range(ua.userInfo, ub.userInfo);
466 if (res == 0)
467 res = strcasecmp_ascii(sa, sb);
468 if (res == 0)
469 res = strcmp(sa, sb);
470 uriFreeUriMembersA(&ua);
471 uriFreeUriMembersA(&ub);
472
473 return res;
474 }
475
476 PG_FUNCTION_INFO_V1(uri_lt);
477 Datum
uri_lt(PG_FUNCTION_ARGS)478 uri_lt(PG_FUNCTION_ARGS)
479 {
480 Datum arg1 = PG_GETARG_DATUM(0);
481 Datum arg2 = PG_GETARG_DATUM(1);
482
483 PG_RETURN_BOOL(_uri_cmp(arg1, arg2) < 0);
484 }
485
486 PG_FUNCTION_INFO_V1(uri_le);
487 Datum
uri_le(PG_FUNCTION_ARGS)488 uri_le(PG_FUNCTION_ARGS)
489 {
490 Datum arg1 = PG_GETARG_DATUM(0);
491 Datum arg2 = PG_GETARG_DATUM(1);
492
493 PG_RETURN_BOOL(_uri_cmp(arg1, arg2) <= 0);
494 }
495
496 PG_FUNCTION_INFO_V1(uri_eq);
497 Datum
uri_eq(PG_FUNCTION_ARGS)498 uri_eq(PG_FUNCTION_ARGS)
499 {
500 Datum arg1 = PG_GETARG_DATUM(0);
501 Datum arg2 = PG_GETARG_DATUM(1);
502
503 PG_RETURN_BOOL(_uri_cmp(arg1, arg2) == 0);
504 }
505
506 PG_FUNCTION_INFO_V1(uri_ne);
507 Datum
uri_ne(PG_FUNCTION_ARGS)508 uri_ne(PG_FUNCTION_ARGS)
509 {
510 Datum arg1 = PG_GETARG_DATUM(0);
511 Datum arg2 = PG_GETARG_DATUM(1);
512
513 PG_RETURN_BOOL(_uri_cmp(arg1, arg2) != 0);
514 }
515
516 PG_FUNCTION_INFO_V1(uri_ge);
517 Datum
uri_ge(PG_FUNCTION_ARGS)518 uri_ge(PG_FUNCTION_ARGS)
519 {
520 Datum arg1 = PG_GETARG_DATUM(0);
521 Datum arg2 = PG_GETARG_DATUM(1);
522
523 PG_RETURN_BOOL(_uri_cmp(arg1, arg2) >= 0);
524 }
525
526 PG_FUNCTION_INFO_V1(uri_gt);
527 Datum
uri_gt(PG_FUNCTION_ARGS)528 uri_gt(PG_FUNCTION_ARGS)
529 {
530 Datum arg1 = PG_GETARG_DATUM(0);
531 Datum arg2 = PG_GETARG_DATUM(1);
532
533 PG_RETURN_BOOL(_uri_cmp(arg1, arg2) > 0);
534 }
535
536 PG_FUNCTION_INFO_V1(uri_cmp);
537 Datum
uri_cmp(PG_FUNCTION_ARGS)538 uri_cmp(PG_FUNCTION_ARGS)
539 {
540 Datum arg1 = PG_GETARG_DATUM(0);
541 Datum arg2 = PG_GETARG_DATUM(1);
542
543 PG_RETURN_INT32(_uri_cmp(arg1, arg2));
544 }
545
546 PG_FUNCTION_INFO_V1(uri_hash);
547 Datum
uri_hash(PG_FUNCTION_ARGS)548 uri_hash(PG_FUNCTION_ARGS)
549 {
550 uritype *key = PG_GETARG_URI_PP(0);
551 Datum result;
552
553 result = hash_any((unsigned char *) VARDATA_ANY(key),
554 VARSIZE_ANY_EXHDR(key));
555
556 /* Avoid leaking memory for toasted inputs */
557 PG_FREE_IF_COPY(key, 0);
558
559 return result;
560 }
561
562 PG_FUNCTION_INFO_V1(uri_escape);
563 Datum
uri_escape(PG_FUNCTION_ARGS)564 uri_escape(PG_FUNCTION_ARGS)
565 {
566 text *arg = PG_GETARG_TEXT_PP(0);
567 bool space_to_plus = PG_GETARG_BOOL(1);
568 bool normalize_breaks = PG_GETARG_BOOL(2);
569
570 size_t chars_required;
571 char *ret;
572
573 chars_required = (VARSIZE(arg) - 4) * (normalize_breaks ? 6 : 3) + 1;
574 ret = palloc(chars_required);
575 uriEscapeExA(VARDATA(arg),
576 VARDATA(arg) + VARSIZE(arg) - 4,
577 ret,
578 space_to_plus, normalize_breaks);
579
580 PG_RETURN_TEXT_P(cstring_to_text(ret));
581 }
582
583 PG_FUNCTION_INFO_V1(uri_unescape);
584 Datum
uri_unescape(PG_FUNCTION_ARGS)585 uri_unescape(PG_FUNCTION_ARGS)
586 {
587 text *arg = PG_GETARG_TEXT_PP(0);
588 bool plus_to_space = PG_GETARG_BOOL(1);
589 bool break_conversion = PG_GETARG_BOOL(2);
590
591 char *s = text_to_cstring(arg);
592
593 uriUnescapeInPlaceExA(s, plus_to_space, break_conversion);
594
595 PG_RETURN_TEXT_P(cstring_to_text(s));
596 }
597