1 #include <postgres.h>
2 #include <access/hash.h>
3 #include <catalog/pg_type.h>
4 #include <fmgr.h>
5 #include <lib/stringinfo.h>
6 #include <utils/array.h>
7 #include <utils/builtins.h>
8 #include <utils/inet.h>
9 
10 #include <uriparser/Uri.h>
11 
12 
13 PG_MODULE_MAGIC;
14 
15 
16 typedef struct varlena uritype;
17 
18 
19 #define DatumGetUriP(X)		((uritype *) PG_DETOAST_DATUM(X))
20 #define DatumGetUriPP(X)	((uritype *) PG_DETOAST_DATUM_PACKED(X))
21 #define UriPGetDatum(X)		PointerGetDatum(X)
22 
23 #define PG_GETARG_URI_P(n)	DatumGetUriP(PG_GETARG_DATUM(n))
24 #define PG_GETARG_URI_PP(n)	DatumGetUriPP(PG_GETARG_DATUM(n))
25 #define PG_RETURN_URI_P(x)	PG_RETURN_POINTER(x)
26 
27 
28 static void
parse_uri(const char * s,UriUriA * urip)29 parse_uri(const char *s, UriUriA *urip)
30 {
31 	UriParserStateA state;
32 
33 	state.uri = urip;
34 	uriParseUriA(&state, s);
35 
36 	switch (state.errorCode)
37 	{
38 		case URI_SUCCESS:
39 			return;
40 		case URI_ERROR_SYNTAX:
41 			ereport(ERROR,
42 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
43 					 errmsg("invalid input syntax for type uri at or near \"%s\"",
44 							state.errorPos)));
45 		default:
46 			elog(ERROR, "liburiparser error code %d", state.errorCode);
47 	}
48 }
49 
50 PG_FUNCTION_INFO_V1(uri_in);
51 Datum
uri_in(PG_FUNCTION_ARGS)52 uri_in(PG_FUNCTION_ARGS)
53 {
54 	char *s = PG_GETARG_CSTRING(0);
55 	uritype *vardata;
56 	UriUriA uri;
57 
58 	parse_uri(s, &uri);
59 	uriFreeUriMembersA(&uri);
60 
61 	vardata = (uritype *) cstring_to_text(s);
62 	PG_RETURN_URI_P(vardata);
63 }
64 
65 PG_FUNCTION_INFO_V1(uri_out);
66 Datum
uri_out(PG_FUNCTION_ARGS)67 uri_out(PG_FUNCTION_ARGS)
68 {
69 	Datum arg = PG_GETARG_DATUM(0);
70 
71 	PG_RETURN_CSTRING(TextDatumGetCString(arg));
72 }
73 
74 static text *
uri_text_range_to_text(UriTextRangeA r)75 uri_text_range_to_text(UriTextRangeA r)
76 {
77 	if (!r.first || !r.afterLast)
78 		return NULL;
79 
80 	return cstring_to_text_with_len(r.first, r.afterLast - r.first);
81 }
82 
83 PG_FUNCTION_INFO_V1(uri_scheme);
84 Datum
uri_scheme(PG_FUNCTION_ARGS)85 uri_scheme(PG_FUNCTION_ARGS)
86 {
87 	Datum arg = PG_GETARG_DATUM(0);
88 	char *s = TextDatumGetCString(arg);
89 	UriUriA uri;
90 	text *result;
91 
92 	parse_uri(s, &uri);
93 	result = uri_text_range_to_text(uri.scheme);
94 	uriFreeUriMembersA(&uri);
95 	if (result)
96 		PG_RETURN_TEXT_P(result);
97 	else
98 		PG_RETURN_NULL();
99 }
100 
101 PG_FUNCTION_INFO_V1(uri_userinfo);
102 Datum
uri_userinfo(PG_FUNCTION_ARGS)103 uri_userinfo(PG_FUNCTION_ARGS)
104 {
105 	Datum arg = PG_GETARG_DATUM(0);
106 	char *s = TextDatumGetCString(arg);
107 	UriUriA uri;
108 	text *result;
109 
110 	parse_uri(s, &uri);
111 	result = uri_text_range_to_text(uri.userInfo);
112 	uriFreeUriMembersA(&uri);
113 	if (result)
114 		PG_RETURN_TEXT_P(result);
115 	else
116 		PG_RETURN_NULL();
117 }
118 
119 PG_FUNCTION_INFO_V1(uri_host);
120 Datum
uri_host(PG_FUNCTION_ARGS)121 uri_host(PG_FUNCTION_ARGS)
122 {
123 	Datum arg = PG_GETARG_DATUM(0);
124 	char *s = TextDatumGetCString(arg);
125 	UriUriA uri;
126 	text *result;
127 
128 	parse_uri(s, &uri);
129 	result = uri_text_range_to_text(uri.hostText);
130 	uriFreeUriMembersA(&uri);
131 	if (result)
132 		PG_RETURN_TEXT_P(result);
133 	else
134 		PG_RETURN_NULL();
135 }
136 
137 PG_FUNCTION_INFO_V1(uri_host_inet);
138 Datum
uri_host_inet(PG_FUNCTION_ARGS)139 uri_host_inet(PG_FUNCTION_ARGS)
140 {
141 	Datum arg = PG_GETARG_DATUM(0);
142 	char *s = TextDatumGetCString(arg);
143 	UriUriA uri;
144 
145 	parse_uri(s, &uri);
146 	if (uri.hostData.ip4)
147 	{
148 		unsigned char *data = uri.hostData.ip4->data;
149 		char *tmp = palloc(16);
150 		snprintf(tmp, 16, "%u.%u.%u.%u", data[0], data[1], data[2], data[3]);
151 		uriFreeUriMembersA(&uri);
152 		PG_RETURN_INET_P(DirectFunctionCall1(inet_in, CStringGetDatum(tmp)));
153 	}
154 	else if (uri.hostData.ip6)
155 	{
156 		unsigned char *data = uri.hostData.ip6->data;
157 		char *tmp = palloc(40);
158 		snprintf(tmp, 40, "%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x",
159 				 data[0], data[1], data[2], data[3],
160 				 data[4], data[5], data[6], data[7],
161 				 data[8], data[9], data[10], data[11],
162 				 data[12], data[13], data[14], data[15]);
163 		uriFreeUriMembersA(&uri);
164 		PG_RETURN_INET_P(DirectFunctionCall1(inet_in, CStringGetDatum(tmp)));
165 	}
166 	else
167 	{
168 		uriFreeUriMembersA(&uri);
169 		PG_RETURN_NULL();
170 	}
171 }
172 
173 static int
_uri_port_num(UriUriA * urip)174 _uri_port_num(UriUriA *urip)
175 {
176 	if (!urip->portText.first || !urip->portText.afterLast
177 		|| urip->portText.afterLast == urip->portText.first)
178 		return -1;
179 	return strtol(pnstrdup(urip->portText.first, urip->portText.afterLast - urip->portText.first),
180 				 NULL, 10);
181 }
182 
183 PG_FUNCTION_INFO_V1(uri_port);
184 Datum
uri_port(PG_FUNCTION_ARGS)185 uri_port(PG_FUNCTION_ARGS)
186 {
187 	Datum arg = PG_GETARG_DATUM(0);
188 	char *s = TextDatumGetCString(arg);
189 	UriUriA uri;
190 	int num;
191 
192 	parse_uri(s, &uri);
193 	num = _uri_port_num(&uri);
194 	uriFreeUriMembersA(&uri);
195 	if (num < 0)
196 		PG_RETURN_NULL();
197 	PG_RETURN_INT32(num);
198 }
199 
200 PG_FUNCTION_INFO_V1(uri_query);
201 Datum
uri_query(PG_FUNCTION_ARGS)202 uri_query(PG_FUNCTION_ARGS)
203 {
204 	Datum arg = PG_GETARG_DATUM(0);
205 	char *s = TextDatumGetCString(arg);
206 	UriUriA uri;
207 	text *result;
208 
209 	parse_uri(s, &uri);
210 	result = uri_text_range_to_text(uri.query);
211 	uriFreeUriMembersA(&uri);
212 	if (result)
213 		PG_RETURN_TEXT_P(result);
214 	else
215 		PG_RETURN_NULL();
216 }
217 
218 PG_FUNCTION_INFO_V1(uri_fragment);
219 Datum
uri_fragment(PG_FUNCTION_ARGS)220 uri_fragment(PG_FUNCTION_ARGS)
221 {
222 	Datum arg = PG_GETARG_DATUM(0);
223 	char *s = TextDatumGetCString(arg);
224 	UriUriA uri;
225 	text *result;
226 
227 	parse_uri(s, &uri);
228 	result = uri_text_range_to_text(uri.fragment);
229 	uriFreeUriMembersA(&uri);
230 	if (result)
231 		PG_RETURN_TEXT_P(result);
232 	else
233 		PG_RETURN_NULL();
234 }
235 
236 /*
237  * Defined in uriparser library, but not exported, so we keep a local version
238  * here.
239  */
240 static bool
_is_host_set(UriUriA * uri)241 _is_host_set(UriUriA *uri)
242 {
243 	return (uri != NULL)
244 		&& ((uri->hostText.first != NULL)
245 			|| (uri->hostData.ip4 != NULL)
246 			|| (uri->hostData.ip6 != NULL)
247 			|| (uri->hostData.ipFuture.first != NULL)
248 			);
249 }
250 
251 PG_FUNCTION_INFO_V1(uri_path);
252 Datum
uri_path(PG_FUNCTION_ARGS)253 uri_path(PG_FUNCTION_ARGS)
254 {
255 	Datum arg = PG_GETARG_DATUM(0);
256 	char *s = TextDatumGetCString(arg);
257 	UriUriA uri;
258 	StringInfoData buf;
259 	UriPathSegmentA *p;
260 
261 	initStringInfo(&buf);
262 
263 	parse_uri(s, &uri);
264 
265 	if (uri.absolutePath || (_is_host_set(&uri) && uri.pathHead))
266 		appendStringInfoChar(&buf, '/');
267 
268 	for (p = uri.pathHead; p; p = p->next)
269 	{
270 		appendBinaryStringInfo(&buf, p->text.first, p->text.afterLast - p->text.first);
271 		if (p->next)
272 			appendStringInfoChar(&buf, '/');
273 	}
274 
275 	uriFreeUriMembersA(&uri);
276 	PG_RETURN_TEXT_P(cstring_to_text(buf.data));
277 }
278 
279 PG_FUNCTION_INFO_V1(uri_path_array);
280 Datum
uri_path_array(PG_FUNCTION_ARGS)281 uri_path_array(PG_FUNCTION_ARGS)
282 {
283 	Datum arg = PG_GETARG_DATUM(0);
284 	char *s = TextDatumGetCString(arg);
285 	UriUriA uri;
286 	ArrayBuildState *astate = NULL;
287 	UriPathSegmentA *pa;
288 
289 	parse_uri(s, &uri);
290 	for (pa = uri.pathHead; pa; pa = pa->next)
291 	{
292 		text *piece = uri_text_range_to_text(pa->text);
293 		astate = accumArrayResult(astate,
294 								  PointerGetDatum(piece),
295 								  !piece,
296 								  TEXTOID,
297 								  CurrentMemoryContext);
298 	}
299 	uriFreeUriMembersA(&uri);
300 
301 	if (astate)
302 		PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
303 	else
304 		PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
305 }
306 
307 PG_FUNCTION_INFO_V1(uri_normalize);
308 Datum
uri_normalize(PG_FUNCTION_ARGS)309 uri_normalize(PG_FUNCTION_ARGS)
310 {
311 	Datum arg = PG_GETARG_DATUM(0);
312 	char *s = TextDatumGetCString(arg);
313 	UriUriA uri;
314 	int rc;
315 	int charsRequired;
316 	char *ret;
317 
318 	parse_uri(s, &uri);
319 
320 	if ((rc = uriNormalizeSyntaxA(&uri)) != URI_SUCCESS)
321 		elog(ERROR, "uriNormalizeSyntaxA() failed: error code %d", rc);
322 
323 	if ((rc = uriToStringCharsRequiredA(&uri, &charsRequired)) != URI_SUCCESS)
324 		elog(ERROR, "uriToStringCharsRequiredA() failed: error code %d", rc);
325 	charsRequired++;
326 
327 	ret = palloc(charsRequired);
328 	if ((rc = uriToStringA(ret, &uri, charsRequired, NULL)) != URI_SUCCESS)
329 		elog(ERROR, "uriToStringA() failed: error code %d", rc);
330 
331 	uriFreeUriMembersA(&uri);
332 
333 	PG_RETURN_URI_P((uritype *) cstring_to_text(ret));
334 }
335 
336 static int
strcasecmp_ascii(const char * s1,const char * s2)337 strcasecmp_ascii(const char *s1, const char *s2)
338 {
339 	for (;;)
340 	{
341 		unsigned char ch1 = (unsigned char) *s1++;
342 		unsigned char ch2 = (unsigned char) *s2++;
343 
344 		if (ch1 != ch2)
345 		{
346 			if (ch1 >= 'A' && ch1 <= 'Z')
347 				ch1 += 'a' - 'A';
348 
349 			if (ch2 >= 'A' && ch2 <= 'Z')
350 				ch2 += 'a' - 'A';
351 
352 			if (ch1 != ch2)
353 				return (int) ch1 - (int) ch2;
354 		}
355 		if (ch1 == 0)
356 			break;
357 	}
358 	return 0;
359 }
360 
361 static int
strncasecmp_ascii(const char * s1,const char * s2,size_t n)362 strncasecmp_ascii(const char *s1, const char *s2, size_t n)
363 {
364 	while (n-- > 0)
365 	{
366 		unsigned char ch1 = (unsigned char) *s1++;
367 		unsigned char ch2 = (unsigned char) *s2++;
368 
369 		if (ch1 != ch2)
370 		{
371 			if (ch1 >= 'A' && ch1 <= 'Z')
372 				ch1 += 'a' - 'A';
373 
374 			if (ch2 >= 'A' && ch2 <= 'Z')
375 				ch2 += 'a' - 'A';
376 
377 			if (ch1 != ch2)
378 				return (int) ch1 - (int) ch2;
379 		}
380 		if (ch1 == 0)
381 			break;
382 	}
383 	return 0;
384 }
385 
386 static int
cmp_text_range(UriTextRangeA a,UriTextRangeA b)387 cmp_text_range(UriTextRangeA a, UriTextRangeA b)
388 {
389 	if (!a.first || !a.afterLast)
390 	{
391 		if (!b.first || !b.afterLast)
392 			return 0;
393 		else
394 			return -1;
395 	}
396 	else if (!b.first || !b.afterLast)
397 		return 1;
398 	else
399 	{
400 		int x = strncasecmp_ascii(a.first, b.first,
401 								  Min(a.afterLast - a.first, b.afterLast - b.first));
402 		if (x == 0)
403 			return (a.afterLast - a.first) - (b.afterLast - b.first);
404 		return x;
405 	}
406 }
407 
408 static int
cmp_hosts(UriUriA * uap,UriUriA * ubp)409 cmp_hosts(UriUriA *uap, UriUriA *ubp)
410 {
411 	if (!uap->hostText.first)
412 	{
413 		if (!ubp->hostText.first)
414 			return 0;
415 		else
416 			return -1;
417 	}
418 	else if (uap->hostData.ip4)
419 	{
420 		if (!ubp->hostText.first)
421 			return 1;
422 		else if (ubp->hostData.ip4)
423 			return memcmp(uap->hostData.ip4->data,
424 						  ubp->hostData.ip4->data,
425 						  sizeof(uap->hostData.ip4->data));
426 		else
427 			return -1;
428 	}
429 	else if (uap->hostData.ip6)
430 	{
431 		if (!ubp->hostText.first)
432 			return 1;
433 		else if (ubp->hostData.ip4)
434 			return 1;
435 		else if (ubp->hostData.ip6)
436 			return memcmp(uap->hostData.ip6->data,
437 						  ubp->hostData.ip6->data,
438 						  sizeof(uap->hostData.ip6->data));
439 		else
440 			return -1;
441 	}
442 	else
443 		return cmp_text_range(uap->hostText, ubp->hostText);
444 }
445 
446 static int
_uri_cmp(Datum a,Datum b)447 _uri_cmp(Datum a, Datum b)
448 {
449 	const char *sa = TextDatumGetCString(a);
450 	const char *sb = TextDatumGetCString(b);
451 	UriUriA ua;
452 	UriUriA ub;
453 	int res = 0;
454 
455 	parse_uri(sa, &ua);
456 	parse_uri(sb, &ub);
457 
458 	if (res == 0)
459 		res = cmp_text_range(ua.scheme, ub.scheme);
460 	if (res == 0)
461 		res = cmp_hosts(&ua, &ub);
462 	if (res == 0)
463 		res = _uri_port_num(&ua) - _uri_port_num(&ub);
464 	if (res == 0)
465 		res = cmp_text_range(ua.userInfo, ub.userInfo);
466 	if (res == 0)
467 		res = strcasecmp_ascii(sa, sb);
468 	if (res == 0)
469 		res = strcmp(sa, sb);
470 	uriFreeUriMembersA(&ua);
471 	uriFreeUriMembersA(&ub);
472 
473 	return res;
474 }
475 
476 PG_FUNCTION_INFO_V1(uri_lt);
477 Datum
uri_lt(PG_FUNCTION_ARGS)478 uri_lt(PG_FUNCTION_ARGS)
479 {
480 	Datum arg1 = PG_GETARG_DATUM(0);
481 	Datum arg2 = PG_GETARG_DATUM(1);
482 
483 	PG_RETURN_BOOL(_uri_cmp(arg1, arg2) < 0);
484 }
485 
486 PG_FUNCTION_INFO_V1(uri_le);
487 Datum
uri_le(PG_FUNCTION_ARGS)488 uri_le(PG_FUNCTION_ARGS)
489 {
490 	Datum arg1 = PG_GETARG_DATUM(0);
491 	Datum arg2 = PG_GETARG_DATUM(1);
492 
493 	PG_RETURN_BOOL(_uri_cmp(arg1, arg2) <= 0);
494 }
495 
496 PG_FUNCTION_INFO_V1(uri_eq);
497 Datum
uri_eq(PG_FUNCTION_ARGS)498 uri_eq(PG_FUNCTION_ARGS)
499 {
500 	Datum arg1 = PG_GETARG_DATUM(0);
501 	Datum arg2 = PG_GETARG_DATUM(1);
502 
503 	PG_RETURN_BOOL(_uri_cmp(arg1, arg2) == 0);
504 }
505 
506 PG_FUNCTION_INFO_V1(uri_ne);
507 Datum
uri_ne(PG_FUNCTION_ARGS)508 uri_ne(PG_FUNCTION_ARGS)
509 {
510 	Datum arg1 = PG_GETARG_DATUM(0);
511 	Datum arg2 = PG_GETARG_DATUM(1);
512 
513 	PG_RETURN_BOOL(_uri_cmp(arg1, arg2) != 0);
514 }
515 
516 PG_FUNCTION_INFO_V1(uri_ge);
517 Datum
uri_ge(PG_FUNCTION_ARGS)518 uri_ge(PG_FUNCTION_ARGS)
519 {
520 	Datum arg1 = PG_GETARG_DATUM(0);
521 	Datum arg2 = PG_GETARG_DATUM(1);
522 
523 	PG_RETURN_BOOL(_uri_cmp(arg1, arg2) >= 0);
524 }
525 
526 PG_FUNCTION_INFO_V1(uri_gt);
527 Datum
uri_gt(PG_FUNCTION_ARGS)528 uri_gt(PG_FUNCTION_ARGS)
529 {
530 	Datum arg1 = PG_GETARG_DATUM(0);
531 	Datum arg2 = PG_GETARG_DATUM(1);
532 
533 	PG_RETURN_BOOL(_uri_cmp(arg1, arg2) > 0);
534 }
535 
536 PG_FUNCTION_INFO_V1(uri_cmp);
537 Datum
uri_cmp(PG_FUNCTION_ARGS)538 uri_cmp(PG_FUNCTION_ARGS)
539 {
540 	Datum arg1 = PG_GETARG_DATUM(0);
541 	Datum arg2 = PG_GETARG_DATUM(1);
542 
543 	PG_RETURN_INT32(_uri_cmp(arg1, arg2));
544 }
545 
546 PG_FUNCTION_INFO_V1(uri_hash);
547 Datum
uri_hash(PG_FUNCTION_ARGS)548 uri_hash(PG_FUNCTION_ARGS)
549 {
550 	uritype	   *key = PG_GETARG_URI_PP(0);
551 	Datum		result;
552 
553 	result = hash_any((unsigned char *) VARDATA_ANY(key),
554 					  VARSIZE_ANY_EXHDR(key));
555 
556 	/* Avoid leaking memory for toasted inputs */
557 	PG_FREE_IF_COPY(key, 0);
558 
559 	return result;
560 }
561 
562 PG_FUNCTION_INFO_V1(uri_escape);
563 Datum
uri_escape(PG_FUNCTION_ARGS)564 uri_escape(PG_FUNCTION_ARGS)
565 {
566 	text *arg = PG_GETARG_TEXT_PP(0);
567 	bool space_to_plus = PG_GETARG_BOOL(1);
568 	bool normalize_breaks = PG_GETARG_BOOL(2);
569 
570 	size_t chars_required;
571 	char *ret;
572 
573 	chars_required = (VARSIZE(arg) - 4) * (normalize_breaks ? 6 : 3) + 1;
574 	ret = palloc(chars_required);
575 	uriEscapeExA(VARDATA(arg),
576 				 VARDATA(arg) + VARSIZE(arg) - 4,
577 				 ret,
578 				 space_to_plus, normalize_breaks);
579 
580 	PG_RETURN_TEXT_P(cstring_to_text(ret));
581 }
582 
583 PG_FUNCTION_INFO_V1(uri_unescape);
584 Datum
uri_unescape(PG_FUNCTION_ARGS)585 uri_unescape(PG_FUNCTION_ARGS)
586 {
587 	text *arg = PG_GETARG_TEXT_PP(0);
588 	bool plus_to_space = PG_GETARG_BOOL(1);
589 	bool break_conversion = PG_GETARG_BOOL(2);
590 
591 	char *s = text_to_cstring(arg);
592 
593 	uriUnescapeInPlaceExA(s, plus_to_space, break_conversion);
594 
595 	PG_RETURN_TEXT_P(cstring_to_text(s));
596 }
597