1 /*-------------------------------------------------------------------------
2  *
3  * xml.c
4  *	  XML data type support.
5  *
6  *
7  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/utils/adt/xml.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 /*
16  * Generally, XML type support is only available when libxml use was
17  * configured during the build.  But even if that is not done, the
18  * type and all the functions are available, but most of them will
19  * fail.  For one thing, this avoids having to manage variant catalog
20  * installations.  But it also has nice effects such as that you can
21  * dump a database containing XML type data even if the server is not
22  * linked with libxml.  Thus, make sure xml_out() works even if nothing
23  * else does.
24  */
25 
26 /*
27  * Notes on memory management:
28  *
29  * Sometimes libxml allocates global structures in the hope that it can reuse
30  * them later on.  This makes it impractical to change the xmlMemSetup
31  * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32  * allocated with malloc() or vice versa.  Since libxml might be used by
33  * loadable modules, eg libperl, our only safe choices are to change the
34  * functions at postmaster/backend launch or not at all.  Since we'd rather
35  * not activate libxml in sessions that might never use it, the latter choice
36  * is the preferred one.  However, for debugging purposes it can be awfully
37  * handy to constrain libxml's allocations to be done in a specific palloc
38  * context, where they're easy to track.  Therefore there is code here that
39  * can be enabled in debug builds to redirect libxml's allocations into a
40  * special context LibxmlContext.  It's not recommended to turn this on in
41  * a production build because of the possibility of bad interactions with
42  * external modules.
43  */
44 /* #define USE_LIBXMLCONTEXT */
45 
46 #include "postgres.h"
47 
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
59 
60 /*
61  * We used to check for xmlStructuredErrorContext via a configure test; but
62  * that doesn't work on Windows, so instead use this grottier method of
63  * testing the library version number.
64  */
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 #endif
68 #endif							/* USE_LIBXML */
69 
70 #include "access/htup_details.h"
71 #include "access/table.h"
72 #include "catalog/namespace.h"
73 #include "catalog/pg_class.h"
74 #include "catalog/pg_type.h"
75 #include "commands/dbcommands.h"
76 #include "executor/spi.h"
77 #include "executor/tablefunc.h"
78 #include "fmgr.h"
79 #include "lib/stringinfo.h"
80 #include "libpq/pqformat.h"
81 #include "mb/pg_wchar.h"
82 #include "miscadmin.h"
83 #include "nodes/execnodes.h"
84 #include "nodes/nodeFuncs.h"
85 #include "utils/array.h"
86 #include "utils/builtins.h"
87 #include "utils/date.h"
88 #include "utils/datetime.h"
89 #include "utils/lsyscache.h"
90 #include "utils/memutils.h"
91 #include "utils/rel.h"
92 #include "utils/syscache.h"
93 #include "utils/xml.h"
94 
95 
96 /* GUC variables */
97 int			xmlbinary;
98 int			xmloption;
99 
100 #ifdef USE_LIBXML
101 
102 /* random number to identify PgXmlErrorContext */
103 #define ERRCXT_MAGIC	68275028
104 
105 struct PgXmlErrorContext
106 {
107 	int			magic;
108 	/* strictness argument passed to pg_xml_init */
109 	PgXmlStrictness strictness;
110 	/* current error status and accumulated message, if any */
111 	bool		err_occurred;
112 	StringInfoData err_buf;
113 	/* previous libxml error handling state (saved by pg_xml_init) */
114 	xmlStructuredErrorFunc saved_errfunc;
115 	void	   *saved_errcxt;
116 	/* previous libxml entity handler (saved by pg_xml_init) */
117 	xmlExternalEntityLoader saved_entityfunc;
118 };
119 
120 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
121 										   xmlParserCtxtPtr ctxt);
122 static void xml_errorHandler(void *data, xmlErrorPtr error);
123 static void xml_ereport_by_code(int level, int sqlcode,
124 								const char *msg, int errcode);
125 static void chopStringInfoNewlines(StringInfo str);
126 static void appendStringInfoLineSeparator(StringInfo str);
127 
128 #ifdef USE_LIBXMLCONTEXT
129 
130 static MemoryContext LibxmlContext = NULL;
131 
132 static void xml_memory_init(void);
133 static void *xml_palloc(size_t size);
134 static void *xml_repalloc(void *ptr, size_t size);
135 static void xml_pfree(void *ptr);
136 static char *xml_pstrdup(const char *string);
137 #endif							/* USE_LIBXMLCONTEXT */
138 
139 static xmlChar *xml_text2xmlChar(text *in);
140 static int	parse_xml_decl(const xmlChar *str, size_t *lenp,
141 						   xmlChar **version, xmlChar **encoding, int *standalone);
142 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143 						   pg_enc encoding, int standalone);
144 static bool xml_doctype_in_content(const xmlChar *str);
145 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
146 						   bool preserve_whitespace, int encoding);
147 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
148 static int	xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
149 								   ArrayBuildState *astate,
150 								   PgXmlErrorContext *xmlerrcxt);
151 static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
152 #endif							/* USE_LIBXML */
153 
154 static void xmldata_root_element_start(StringInfo result, const char *eltname,
155 									   const char *xmlschema, const char *targetns,
156 									   bool top_level);
157 static void xmldata_root_element_end(StringInfo result, const char *eltname);
158 static StringInfo query_to_xml_internal(const char *query, char *tablename,
159 										const char *xmlschema, bool nulls, bool tableforest,
160 										const char *targetns, bool top_level);
161 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
162 											  bool nulls, bool tableforest, const char *targetns);
163 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
164 													 List *relid_list, bool nulls,
165 													 bool tableforest, const char *targetns);
166 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
167 													  bool nulls, bool tableforest,
168 													  const char *targetns);
169 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
170 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
171 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
172 static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
173 									  char *tablename, bool nulls, bool tableforest,
174 									  const char *targetns, bool top_level);
175 
176 /* XMLTABLE support */
177 #ifdef USE_LIBXML
178 /* random number to identify XmlTableContext */
179 #define XMLTABLE_CONTEXT_MAGIC	46922182
180 typedef struct XmlTableBuilderData
181 {
182 	int			magic;
183 	int			natts;
184 	long int	row_count;
185 	PgXmlErrorContext *xmlerrcxt;
186 	xmlParserCtxtPtr ctxt;
187 	xmlDocPtr	doc;
188 	xmlXPathContextPtr xpathcxt;
189 	xmlXPathCompExprPtr xpathcomp;
190 	xmlXPathObjectPtr xpathobj;
191 	xmlXPathCompExprPtr *xpathscomp;
192 } XmlTableBuilderData;
193 #endif
194 
195 static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
196 static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
197 static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
198 								 const char *uri);
199 static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
200 static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
201 									const char *path, int colnum);
202 static bool XmlTableFetchRow(struct TableFuncScanState *state);
203 static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
204 							  Oid typid, int32 typmod, bool *isnull);
205 static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
206 
207 const TableFuncRoutine XmlTableRoutine =
208 {
209 	XmlTableInitOpaque,
210 	XmlTableSetDocument,
211 	XmlTableSetNamespace,
212 	XmlTableSetRowFilter,
213 	XmlTableSetColumnFilter,
214 	XmlTableFetchRow,
215 	XmlTableGetValue,
216 	XmlTableDestroyOpaque
217 };
218 
219 #define NO_XML_SUPPORT() \
220 	ereport(ERROR, \
221 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
222 			 errmsg("unsupported XML feature"), \
223 			 errdetail("This functionality requires the server to be built with libxml support."), \
224 			 errhint("You need to rebuild PostgreSQL using --with-libxml.")))
225 
226 
227 /* from SQL/XML:2008 section 4.9 */
228 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
229 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
230 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
231 
232 
233 #ifdef USE_LIBXML
234 
235 static int
xmlChar_to_encoding(const xmlChar * encoding_name)236 xmlChar_to_encoding(const xmlChar *encoding_name)
237 {
238 	int			encoding = pg_char_to_encoding((const char *) encoding_name);
239 
240 	if (encoding < 0)
241 		ereport(ERROR,
242 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
243 				 errmsg("invalid encoding name \"%s\"",
244 						(const char *) encoding_name)));
245 	return encoding;
246 }
247 #endif
248 
249 
250 /*
251  * xml_in uses a plain C string to VARDATA conversion, so for the time being
252  * we use the conversion function for the text datatype.
253  *
254  * This is only acceptable so long as xmltype and text use the same
255  * representation.
256  */
257 Datum
xml_in(PG_FUNCTION_ARGS)258 xml_in(PG_FUNCTION_ARGS)
259 {
260 #ifdef USE_LIBXML
261 	char	   *s = PG_GETARG_CSTRING(0);
262 	xmltype    *vardata;
263 	xmlDocPtr	doc;
264 
265 	vardata = (xmltype *) cstring_to_text(s);
266 
267 	/*
268 	 * Parse the data to check if it is well-formed XML data.  Assume that
269 	 * ERROR occurred if parsing failed.
270 	 */
271 	doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
272 	xmlFreeDoc(doc);
273 
274 	PG_RETURN_XML_P(vardata);
275 #else
276 	NO_XML_SUPPORT();
277 	return 0;
278 #endif
279 }
280 
281 
282 #define PG_XML_DEFAULT_VERSION "1.0"
283 
284 
285 /*
286  * xml_out_internal uses a plain VARDATA to C string conversion, so for the
287  * time being we use the conversion function for the text datatype.
288  *
289  * This is only acceptable so long as xmltype and text use the same
290  * representation.
291  */
292 static char *
xml_out_internal(xmltype * x,pg_enc target_encoding)293 xml_out_internal(xmltype *x, pg_enc target_encoding)
294 {
295 	char	   *str = text_to_cstring((text *) x);
296 
297 #ifdef USE_LIBXML
298 	size_t		len = strlen(str);
299 	xmlChar    *version;
300 	int			standalone;
301 	int			res_code;
302 
303 	if ((res_code = parse_xml_decl((xmlChar *) str,
304 								   &len, &version, NULL, &standalone)) == 0)
305 	{
306 		StringInfoData buf;
307 
308 		initStringInfo(&buf);
309 
310 		if (!print_xml_decl(&buf, version, target_encoding, standalone))
311 		{
312 			/*
313 			 * If we are not going to produce an XML declaration, eat a single
314 			 * newline in the original string to prevent empty first lines in
315 			 * the output.
316 			 */
317 			if (*(str + len) == '\n')
318 				len += 1;
319 		}
320 		appendStringInfoString(&buf, str + len);
321 
322 		pfree(str);
323 
324 		return buf.data;
325 	}
326 
327 	xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
328 						"could not parse XML declaration in stored value",
329 						res_code);
330 #endif
331 	return str;
332 }
333 
334 
335 Datum
xml_out(PG_FUNCTION_ARGS)336 xml_out(PG_FUNCTION_ARGS)
337 {
338 	xmltype    *x = PG_GETARG_XML_P(0);
339 
340 	/*
341 	 * xml_out removes the encoding property in all cases.  This is because we
342 	 * cannot control from here whether the datum will be converted to a
343 	 * different client encoding, so we'd do more harm than good by including
344 	 * it.
345 	 */
346 	PG_RETURN_CSTRING(xml_out_internal(x, 0));
347 }
348 
349 
350 Datum
xml_recv(PG_FUNCTION_ARGS)351 xml_recv(PG_FUNCTION_ARGS)
352 {
353 #ifdef USE_LIBXML
354 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
355 	xmltype    *result;
356 	char	   *str;
357 	char	   *newstr;
358 	int			nbytes;
359 	xmlDocPtr	doc;
360 	xmlChar    *encodingStr = NULL;
361 	int			encoding;
362 
363 	/*
364 	 * Read the data in raw format. We don't know yet what the encoding is, as
365 	 * that information is embedded in the xml declaration; so we have to
366 	 * parse that before converting to server encoding.
367 	 */
368 	nbytes = buf->len - buf->cursor;
369 	str = (char *) pq_getmsgbytes(buf, nbytes);
370 
371 	/*
372 	 * We need a null-terminated string to pass to parse_xml_decl().  Rather
373 	 * than make a separate copy, make the temporary result one byte bigger
374 	 * than it needs to be.
375 	 */
376 	result = palloc(nbytes + 1 + VARHDRSZ);
377 	SET_VARSIZE(result, nbytes + VARHDRSZ);
378 	memcpy(VARDATA(result), str, nbytes);
379 	str = VARDATA(result);
380 	str[nbytes] = '\0';
381 
382 	parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
383 
384 	/*
385 	 * If encoding wasn't explicitly specified in the XML header, treat it as
386 	 * UTF-8, as that's the default in XML. This is different from xml_in(),
387 	 * where the input has to go through the normal client to server encoding
388 	 * conversion.
389 	 */
390 	encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
391 
392 	/*
393 	 * Parse the data to check if it is well-formed XML data.  Assume that
394 	 * xml_parse will throw ERROR if not.
395 	 */
396 	doc = xml_parse(result, xmloption, true, encoding);
397 	xmlFreeDoc(doc);
398 
399 	/* Now that we know what we're dealing with, convert to server encoding */
400 	newstr = pg_any_to_server(str, nbytes, encoding);
401 
402 	if (newstr != str)
403 	{
404 		pfree(result);
405 		result = (xmltype *) cstring_to_text(newstr);
406 		pfree(newstr);
407 	}
408 
409 	PG_RETURN_XML_P(result);
410 #else
411 	NO_XML_SUPPORT();
412 	return 0;
413 #endif
414 }
415 
416 
417 Datum
xml_send(PG_FUNCTION_ARGS)418 xml_send(PG_FUNCTION_ARGS)
419 {
420 	xmltype    *x = PG_GETARG_XML_P(0);
421 	char	   *outval;
422 	StringInfoData buf;
423 
424 	/*
425 	 * xml_out_internal doesn't convert the encoding, it just prints the right
426 	 * declaration. pq_sendtext will do the conversion.
427 	 */
428 	outval = xml_out_internal(x, pg_get_client_encoding());
429 
430 	pq_begintypsend(&buf);
431 	pq_sendtext(&buf, outval, strlen(outval));
432 	pfree(outval);
433 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
434 }
435 
436 
437 #ifdef USE_LIBXML
438 static void
appendStringInfoText(StringInfo str,const text * t)439 appendStringInfoText(StringInfo str, const text *t)
440 {
441 	appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
442 }
443 #endif
444 
445 
446 static xmltype *
stringinfo_to_xmltype(StringInfo buf)447 stringinfo_to_xmltype(StringInfo buf)
448 {
449 	return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
450 }
451 
452 
453 static xmltype *
cstring_to_xmltype(const char * string)454 cstring_to_xmltype(const char *string)
455 {
456 	return (xmltype *) cstring_to_text(string);
457 }
458 
459 
460 #ifdef USE_LIBXML
461 static xmltype *
xmlBuffer_to_xmltype(xmlBufferPtr buf)462 xmlBuffer_to_xmltype(xmlBufferPtr buf)
463 {
464 	return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
465 												xmlBufferLength(buf));
466 }
467 #endif
468 
469 
470 Datum
xmlcomment(PG_FUNCTION_ARGS)471 xmlcomment(PG_FUNCTION_ARGS)
472 {
473 #ifdef USE_LIBXML
474 	text	   *arg = PG_GETARG_TEXT_PP(0);
475 	char	   *argdata = VARDATA_ANY(arg);
476 	int			len = VARSIZE_ANY_EXHDR(arg);
477 	StringInfoData buf;
478 	int			i;
479 
480 	/* check for "--" in string or "-" at the end */
481 	for (i = 1; i < len; i++)
482 	{
483 		if (argdata[i] == '-' && argdata[i - 1] == '-')
484 			ereport(ERROR,
485 					(errcode(ERRCODE_INVALID_XML_COMMENT),
486 					 errmsg("invalid XML comment")));
487 	}
488 	if (len > 0 && argdata[len - 1] == '-')
489 		ereport(ERROR,
490 				(errcode(ERRCODE_INVALID_XML_COMMENT),
491 				 errmsg("invalid XML comment")));
492 
493 	initStringInfo(&buf);
494 	appendStringInfoString(&buf, "<!--");
495 	appendStringInfoText(&buf, arg);
496 	appendStringInfoString(&buf, "-->");
497 
498 	PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
499 #else
500 	NO_XML_SUPPORT();
501 	return 0;
502 #endif
503 }
504 
505 
506 
507 /*
508  * TODO: xmlconcat needs to merge the notations and unparsed entities
509  * of the argument values.  Not very important in practice, though.
510  */
511 xmltype *
xmlconcat(List * args)512 xmlconcat(List *args)
513 {
514 #ifdef USE_LIBXML
515 	int			global_standalone = 1;
516 	xmlChar    *global_version = NULL;
517 	bool		global_version_no_value = false;
518 	StringInfoData buf;
519 	ListCell   *v;
520 
521 	initStringInfo(&buf);
522 	foreach(v, args)
523 	{
524 		xmltype    *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
525 		size_t		len;
526 		xmlChar    *version;
527 		int			standalone;
528 		char	   *str;
529 
530 		len = VARSIZE(x) - VARHDRSZ;
531 		str = text_to_cstring((text *) x);
532 
533 		parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
534 
535 		if (standalone == 0 && global_standalone == 1)
536 			global_standalone = 0;
537 		if (standalone < 0)
538 			global_standalone = -1;
539 
540 		if (!version)
541 			global_version_no_value = true;
542 		else if (!global_version)
543 			global_version = version;
544 		else if (xmlStrcmp(version, global_version) != 0)
545 			global_version_no_value = true;
546 
547 		appendStringInfoString(&buf, str + len);
548 		pfree(str);
549 	}
550 
551 	if (!global_version_no_value || global_standalone >= 0)
552 	{
553 		StringInfoData buf2;
554 
555 		initStringInfo(&buf2);
556 
557 		print_xml_decl(&buf2,
558 					   (!global_version_no_value) ? global_version : NULL,
559 					   0,
560 					   global_standalone);
561 
562 		appendBinaryStringInfo(&buf2, buf.data, buf.len);
563 		buf = buf2;
564 	}
565 
566 	return stringinfo_to_xmltype(&buf);
567 #else
568 	NO_XML_SUPPORT();
569 	return NULL;
570 #endif
571 }
572 
573 
574 /*
575  * XMLAGG support
576  */
577 Datum
xmlconcat2(PG_FUNCTION_ARGS)578 xmlconcat2(PG_FUNCTION_ARGS)
579 {
580 	if (PG_ARGISNULL(0))
581 	{
582 		if (PG_ARGISNULL(1))
583 			PG_RETURN_NULL();
584 		else
585 			PG_RETURN_XML_P(PG_GETARG_XML_P(1));
586 	}
587 	else if (PG_ARGISNULL(1))
588 		PG_RETURN_XML_P(PG_GETARG_XML_P(0));
589 	else
590 		PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
591 											 PG_GETARG_XML_P(1))));
592 }
593 
594 
595 Datum
texttoxml(PG_FUNCTION_ARGS)596 texttoxml(PG_FUNCTION_ARGS)
597 {
598 	text	   *data = PG_GETARG_TEXT_PP(0);
599 
600 	PG_RETURN_XML_P(xmlparse(data, xmloption, true));
601 }
602 
603 
604 Datum
xmltotext(PG_FUNCTION_ARGS)605 xmltotext(PG_FUNCTION_ARGS)
606 {
607 	xmltype    *data = PG_GETARG_XML_P(0);
608 
609 	/* It's actually binary compatible. */
610 	PG_RETURN_TEXT_P((text *) data);
611 }
612 
613 
614 text *
xmltotext_with_xmloption(xmltype * data,XmlOptionType xmloption_arg)615 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
616 {
617 	if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
618 		ereport(ERROR,
619 				(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
620 				 errmsg("not an XML document")));
621 
622 	/* It's actually binary compatible, save for the above check. */
623 	return (text *) data;
624 }
625 
626 
627 xmltype *
xmlelement(XmlExpr * xexpr,Datum * named_argvalue,bool * named_argnull,Datum * argvalue,bool * argnull)628 xmlelement(XmlExpr *xexpr,
629 		   Datum *named_argvalue, bool *named_argnull,
630 		   Datum *argvalue, bool *argnull)
631 {
632 #ifdef USE_LIBXML
633 	xmltype    *result;
634 	List	   *named_arg_strings;
635 	List	   *arg_strings;
636 	int			i;
637 	ListCell   *arg;
638 	ListCell   *narg;
639 	PgXmlErrorContext *xmlerrcxt;
640 	volatile xmlBufferPtr buf = NULL;
641 	volatile xmlTextWriterPtr writer = NULL;
642 
643 	/*
644 	 * All arguments are already evaluated, and their values are passed in the
645 	 * named_argvalue/named_argnull or argvalue/argnull arrays.  This avoids
646 	 * issues if one of the arguments involves a call to some other function
647 	 * or subsystem that wants to use libxml on its own terms.  We examine the
648 	 * original XmlExpr to identify the numbers and types of the arguments.
649 	 */
650 	named_arg_strings = NIL;
651 	i = 0;
652 	foreach(arg, xexpr->named_args)
653 	{
654 		Expr	   *e = (Expr *) lfirst(arg);
655 		char	   *str;
656 
657 		if (named_argnull[i])
658 			str = NULL;
659 		else
660 			str = map_sql_value_to_xml_value(named_argvalue[i],
661 											 exprType((Node *) e),
662 											 false);
663 		named_arg_strings = lappend(named_arg_strings, str);
664 		i++;
665 	}
666 
667 	arg_strings = NIL;
668 	i = 0;
669 	foreach(arg, xexpr->args)
670 	{
671 		Expr	   *e = (Expr *) lfirst(arg);
672 		char	   *str;
673 
674 		/* here we can just forget NULL elements immediately */
675 		if (!argnull[i])
676 		{
677 			str = map_sql_value_to_xml_value(argvalue[i],
678 											 exprType((Node *) e),
679 											 true);
680 			arg_strings = lappend(arg_strings, str);
681 		}
682 		i++;
683 	}
684 
685 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
686 
687 	PG_TRY();
688 	{
689 		buf = xmlBufferCreate();
690 		if (buf == NULL || xmlerrcxt->err_occurred)
691 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
692 						"could not allocate xmlBuffer");
693 		writer = xmlNewTextWriterMemory(buf, 0);
694 		if (writer == NULL || xmlerrcxt->err_occurred)
695 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
696 						"could not allocate xmlTextWriter");
697 
698 		xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
699 
700 		forboth(arg, named_arg_strings, narg, xexpr->arg_names)
701 		{
702 			char	   *str = (char *) lfirst(arg);
703 			char	   *argname = strVal(lfirst(narg));
704 
705 			if (str)
706 				xmlTextWriterWriteAttribute(writer,
707 											(xmlChar *) argname,
708 											(xmlChar *) str);
709 		}
710 
711 		foreach(arg, arg_strings)
712 		{
713 			char	   *str = (char *) lfirst(arg);
714 
715 			xmlTextWriterWriteRaw(writer, (xmlChar *) str);
716 		}
717 
718 		xmlTextWriterEndElement(writer);
719 
720 		/* we MUST do this now to flush data out to the buffer ... */
721 		xmlFreeTextWriter(writer);
722 		writer = NULL;
723 
724 		result = xmlBuffer_to_xmltype(buf);
725 	}
726 	PG_CATCH();
727 	{
728 		if (writer)
729 			xmlFreeTextWriter(writer);
730 		if (buf)
731 			xmlBufferFree(buf);
732 
733 		pg_xml_done(xmlerrcxt, true);
734 
735 		PG_RE_THROW();
736 	}
737 	PG_END_TRY();
738 
739 	xmlBufferFree(buf);
740 
741 	pg_xml_done(xmlerrcxt, false);
742 
743 	return result;
744 #else
745 	NO_XML_SUPPORT();
746 	return NULL;
747 #endif
748 }
749 
750 
751 xmltype *
xmlparse(text * data,XmlOptionType xmloption_arg,bool preserve_whitespace)752 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
753 {
754 #ifdef USE_LIBXML
755 	xmlDocPtr	doc;
756 
757 	doc = xml_parse(data, xmloption_arg, preserve_whitespace,
758 					GetDatabaseEncoding());
759 	xmlFreeDoc(doc);
760 
761 	return (xmltype *) data;
762 #else
763 	NO_XML_SUPPORT();
764 	return NULL;
765 #endif
766 }
767 
768 
769 xmltype *
xmlpi(const char * target,text * arg,bool arg_is_null,bool * result_is_null)770 xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
771 {
772 #ifdef USE_LIBXML
773 	xmltype    *result;
774 	StringInfoData buf;
775 
776 	if (pg_strcasecmp(target, "xml") == 0)
777 		ereport(ERROR,
778 				(errcode(ERRCODE_SYNTAX_ERROR), /* really */
779 				 errmsg("invalid XML processing instruction"),
780 				 errdetail("XML processing instruction target name cannot be \"%s\".", target)));
781 
782 	/*
783 	 * Following the SQL standard, the null check comes after the syntax check
784 	 * above.
785 	 */
786 	*result_is_null = arg_is_null;
787 	if (*result_is_null)
788 		return NULL;
789 
790 	initStringInfo(&buf);
791 
792 	appendStringInfo(&buf, "<?%s", target);
793 
794 	if (arg != NULL)
795 	{
796 		char	   *string;
797 
798 		string = text_to_cstring(arg);
799 		if (strstr(string, "?>") != NULL)
800 			ereport(ERROR,
801 					(errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
802 					 errmsg("invalid XML processing instruction"),
803 					 errdetail("XML processing instruction cannot contain \"?>\".")));
804 
805 		appendStringInfoChar(&buf, ' ');
806 		appendStringInfoString(&buf, string + strspn(string, " "));
807 		pfree(string);
808 	}
809 	appendStringInfoString(&buf, "?>");
810 
811 	result = stringinfo_to_xmltype(&buf);
812 	pfree(buf.data);
813 	return result;
814 #else
815 	NO_XML_SUPPORT();
816 	return NULL;
817 #endif
818 }
819 
820 
821 xmltype *
xmlroot(xmltype * data,text * version,int standalone)822 xmlroot(xmltype *data, text *version, int standalone)
823 {
824 #ifdef USE_LIBXML
825 	char	   *str;
826 	size_t		len;
827 	xmlChar    *orig_version;
828 	int			orig_standalone;
829 	StringInfoData buf;
830 
831 	len = VARSIZE(data) - VARHDRSZ;
832 	str = text_to_cstring((text *) data);
833 
834 	parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
835 
836 	if (version)
837 		orig_version = xml_text2xmlChar(version);
838 	else
839 		orig_version = NULL;
840 
841 	switch (standalone)
842 	{
843 		case XML_STANDALONE_YES:
844 			orig_standalone = 1;
845 			break;
846 		case XML_STANDALONE_NO:
847 			orig_standalone = 0;
848 			break;
849 		case XML_STANDALONE_NO_VALUE:
850 			orig_standalone = -1;
851 			break;
852 		case XML_STANDALONE_OMITTED:
853 			/* leave original value */
854 			break;
855 	}
856 
857 	initStringInfo(&buf);
858 	print_xml_decl(&buf, orig_version, 0, orig_standalone);
859 	appendStringInfoString(&buf, str + len);
860 
861 	return stringinfo_to_xmltype(&buf);
862 #else
863 	NO_XML_SUPPORT();
864 	return NULL;
865 #endif
866 }
867 
868 
869 /*
870  * Validate document (given as string) against DTD (given as external link)
871  *
872  * This has been removed because it is a security hole: unprivileged users
873  * should not be able to use Postgres to fetch arbitrary external files,
874  * which unfortunately is exactly what libxml is willing to do with the DTD
875  * parameter.
876  */
877 Datum
xmlvalidate(PG_FUNCTION_ARGS)878 xmlvalidate(PG_FUNCTION_ARGS)
879 {
880 	ereport(ERROR,
881 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
882 			 errmsg("xmlvalidate is not implemented")));
883 	return 0;
884 }
885 
886 
887 bool
xml_is_document(xmltype * arg)888 xml_is_document(xmltype *arg)
889 {
890 #ifdef USE_LIBXML
891 	bool		result;
892 	volatile xmlDocPtr doc = NULL;
893 	MemoryContext ccxt = CurrentMemoryContext;
894 
895 	/* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
896 	PG_TRY();
897 	{
898 		doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
899 						GetDatabaseEncoding());
900 		result = true;
901 	}
902 	PG_CATCH();
903 	{
904 		ErrorData  *errdata;
905 		MemoryContext ecxt;
906 
907 		ecxt = MemoryContextSwitchTo(ccxt);
908 		errdata = CopyErrorData();
909 		if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
910 		{
911 			FlushErrorState();
912 			result = false;
913 		}
914 		else
915 		{
916 			MemoryContextSwitchTo(ecxt);
917 			PG_RE_THROW();
918 		}
919 	}
920 	PG_END_TRY();
921 
922 	if (doc)
923 		xmlFreeDoc(doc);
924 
925 	return result;
926 #else							/* not USE_LIBXML */
927 	NO_XML_SUPPORT();
928 	return false;
929 #endif							/* not USE_LIBXML */
930 }
931 
932 
933 #ifdef USE_LIBXML
934 
935 /*
936  * pg_xml_init_library --- set up for use of libxml
937  *
938  * This should be called by each function that is about to use libxml
939  * facilities but doesn't require error handling.  It initializes libxml
940  * and verifies compatibility with the loaded libxml version.  These are
941  * once-per-session activities.
942  *
943  * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
944  * check)
945  */
946 void
pg_xml_init_library(void)947 pg_xml_init_library(void)
948 {
949 	static bool first_time = true;
950 
951 	if (first_time)
952 	{
953 		/* Stuff we need do only once per session */
954 
955 		/*
956 		 * Currently, we have no pure UTF-8 support for internals -- check if
957 		 * we can work.
958 		 */
959 		if (sizeof(char) != sizeof(xmlChar))
960 			ereport(ERROR,
961 					(errmsg("could not initialize XML library"),
962 					 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
963 							   (int) sizeof(char), (int) sizeof(xmlChar))));
964 
965 #ifdef USE_LIBXMLCONTEXT
966 		/* Set up libxml's memory allocation our way */
967 		xml_memory_init();
968 #endif
969 
970 		/* Check library compatibility */
971 		LIBXML_TEST_VERSION;
972 
973 		first_time = false;
974 	}
975 }
976 
977 /*
978  * pg_xml_init --- set up for use of libxml and register an error handler
979  *
980  * This should be called by each function that is about to use libxml
981  * facilities and requires error handling.  It initializes libxml with
982  * pg_xml_init_library() and establishes our libxml error handler.
983  *
984  * strictness determines which errors are reported and which are ignored.
985  *
986  * Calls to this function MUST be followed by a PG_TRY block that guarantees
987  * that pg_xml_done() is called during either normal or error exit.
988  *
989  * This is exported for use by contrib/xml2, as well as other code that might
990  * wish to share use of this module's libxml error handler.
991  */
992 PgXmlErrorContext *
pg_xml_init(PgXmlStrictness strictness)993 pg_xml_init(PgXmlStrictness strictness)
994 {
995 	PgXmlErrorContext *errcxt;
996 	void	   *new_errcxt;
997 
998 	/* Do one-time setup if needed */
999 	pg_xml_init_library();
1000 
1001 	/* Create error handling context structure */
1002 	errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1003 	errcxt->magic = ERRCXT_MAGIC;
1004 	errcxt->strictness = strictness;
1005 	errcxt->err_occurred = false;
1006 	initStringInfo(&errcxt->err_buf);
1007 
1008 	/*
1009 	 * Save original error handler and install ours. libxml originally didn't
1010 	 * distinguish between the contexts for generic and for structured error
1011 	 * handlers.  If we're using an old libxml version, we must thus save the
1012 	 * generic error context, even though we're using a structured error
1013 	 * handler.
1014 	 */
1015 	errcxt->saved_errfunc = xmlStructuredError;
1016 
1017 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1018 	errcxt->saved_errcxt = xmlStructuredErrorContext;
1019 #else
1020 	errcxt->saved_errcxt = xmlGenericErrorContext;
1021 #endif
1022 
1023 	xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1024 
1025 	/*
1026 	 * Verify that xmlSetStructuredErrorFunc set the context variable we
1027 	 * expected it to.  If not, the error context pointer we just saved is not
1028 	 * the correct thing to restore, and since that leaves us without a way to
1029 	 * restore the context in pg_xml_done, we must fail.
1030 	 *
1031 	 * The only known situation in which this test fails is if we compile with
1032 	 * headers from a libxml2 that doesn't track the structured error context
1033 	 * separately (< 2.7.4), but at runtime use a version that does, or vice
1034 	 * versa.  The libxml2 authors did not treat that change as constituting
1035 	 * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1036 	 * fails to protect us from this.
1037 	 */
1038 
1039 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1040 	new_errcxt = xmlStructuredErrorContext;
1041 #else
1042 	new_errcxt = xmlGenericErrorContext;
1043 #endif
1044 
1045 	if (new_errcxt != (void *) errcxt)
1046 		ereport(ERROR,
1047 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1048 				 errmsg("could not set up XML error handler"),
1049 				 errhint("This probably indicates that the version of libxml2"
1050 						 " being used is not compatible with the libxml2"
1051 						 " header files that PostgreSQL was built with.")));
1052 
1053 	/*
1054 	 * Also, install an entity loader to prevent unwanted fetches of external
1055 	 * files and URLs.
1056 	 */
1057 	errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1058 	xmlSetExternalEntityLoader(xmlPgEntityLoader);
1059 
1060 	return errcxt;
1061 }
1062 
1063 
1064 /*
1065  * pg_xml_done --- restore previous libxml error handling
1066  *
1067  * Resets libxml's global error-handling state to what it was before
1068  * pg_xml_init() was called.
1069  *
1070  * This routine verifies that all pending errors have been dealt with
1071  * (in assert-enabled builds, anyway).
1072  */
1073 void
pg_xml_done(PgXmlErrorContext * errcxt,bool isError)1074 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1075 {
1076 	void	   *cur_errcxt;
1077 
1078 	/* An assert seems like enough protection here */
1079 	Assert(errcxt->magic == ERRCXT_MAGIC);
1080 
1081 	/*
1082 	 * In a normal exit, there should be no un-handled libxml errors.  But we
1083 	 * shouldn't try to enforce this during error recovery, since the longjmp
1084 	 * could have been thrown before xml_ereport had a chance to run.
1085 	 */
1086 	Assert(!errcxt->err_occurred || isError);
1087 
1088 	/*
1089 	 * Check that libxml's global state is correct, warn if not.  This is a
1090 	 * real test and not an Assert because it has a higher probability of
1091 	 * happening.
1092 	 */
1093 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1094 	cur_errcxt = xmlStructuredErrorContext;
1095 #else
1096 	cur_errcxt = xmlGenericErrorContext;
1097 #endif
1098 
1099 	if (cur_errcxt != (void *) errcxt)
1100 		elog(WARNING, "libxml error handling state is out of sync with xml.c");
1101 
1102 	/* Restore the saved handlers */
1103 	xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1104 	xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1105 
1106 	/*
1107 	 * Mark the struct as invalid, just in case somebody somehow manages to
1108 	 * call xml_errorHandler or xml_ereport with it.
1109 	 */
1110 	errcxt->magic = 0;
1111 
1112 	/* Release memory */
1113 	pfree(errcxt->err_buf.data);
1114 	pfree(errcxt);
1115 }
1116 
1117 
1118 /*
1119  * pg_xml_error_occurred() --- test the error flag
1120  */
1121 bool
pg_xml_error_occurred(PgXmlErrorContext * errcxt)1122 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1123 {
1124 	return errcxt->err_occurred;
1125 }
1126 
1127 
1128 /*
1129  * SQL/XML allows storing "XML documents" or "XML content".  "XML
1130  * documents" are specified by the XML specification and are parsed
1131  * easily by libxml.  "XML content" is specified by SQL/XML as the
1132  * production "XMLDecl? content".  But libxml can only parse the
1133  * "content" part, so we have to parse the XML declaration ourselves
1134  * to complete this.
1135  */
1136 
1137 #define CHECK_XML_SPACE(p) \
1138 	do { \
1139 		if (!xmlIsBlank_ch(*(p))) \
1140 			return XML_ERR_SPACE_REQUIRED; \
1141 	} while (0)
1142 
1143 #define SKIP_XML_SPACE(p) \
1144 	while (xmlIsBlank_ch(*(p))) (p)++
1145 
1146 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1147 /* Beware of multiple evaluations of argument! */
1148 #define PG_XMLISNAMECHAR(c) \
1149 	(xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1150 			|| xmlIsDigit_ch(c) \
1151 			|| c == '.' || c == '-' || c == '_' || c == ':' \
1152 			|| xmlIsCombiningQ(c) \
1153 			|| xmlIsExtender_ch(c))
1154 
1155 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1156 static xmlChar *
xml_pnstrdup(const xmlChar * str,size_t len)1157 xml_pnstrdup(const xmlChar *str, size_t len)
1158 {
1159 	xmlChar    *result;
1160 
1161 	result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1162 	memcpy(result, str, len * sizeof(xmlChar));
1163 	result[len] = 0;
1164 	return result;
1165 }
1166 
1167 /* Ditto, except input is char* */
1168 static xmlChar *
pg_xmlCharStrndup(const char * str,size_t len)1169 pg_xmlCharStrndup(const char *str, size_t len)
1170 {
1171 	xmlChar    *result;
1172 
1173 	result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1174 	memcpy(result, str, len);
1175 	result[len] = '\0';
1176 
1177 	return result;
1178 }
1179 
1180 /*
1181  * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1182  *
1183  * The input xmlChar is freed regardless of success of the copy.
1184  */
1185 static char *
xml_pstrdup_and_free(xmlChar * str)1186 xml_pstrdup_and_free(xmlChar *str)
1187 {
1188 	char	   *result;
1189 
1190 	if (str)
1191 	{
1192 		PG_TRY();
1193 		{
1194 			result = pstrdup((char *) str);
1195 		}
1196 		PG_FINALLY();
1197 		{
1198 			xmlFree(str);
1199 		}
1200 		PG_END_TRY();
1201 	}
1202 	else
1203 		result = NULL;
1204 
1205 	return result;
1206 }
1207 
1208 /*
1209  * str is the null-terminated input string.  Remaining arguments are
1210  * output arguments; each can be NULL if value is not wanted.
1211  * version and encoding are returned as locally-palloc'd strings.
1212  * Result is 0 if OK, an error code if not.
1213  */
1214 static int
parse_xml_decl(const xmlChar * str,size_t * lenp,xmlChar ** version,xmlChar ** encoding,int * standalone)1215 parse_xml_decl(const xmlChar *str, size_t *lenp,
1216 			   xmlChar **version, xmlChar **encoding, int *standalone)
1217 {
1218 	const xmlChar *p;
1219 	const xmlChar *save_p;
1220 	size_t		len;
1221 	int			utf8char;
1222 	int			utf8len;
1223 
1224 	/*
1225 	 * Only initialize libxml.  We don't need error handling here, but we do
1226 	 * need to make sure libxml is initialized before calling any of its
1227 	 * functions.  Note that this is safe (and a no-op) if caller has already
1228 	 * done pg_xml_init().
1229 	 */
1230 	pg_xml_init_library();
1231 
1232 	/* Initialize output arguments to "not present" */
1233 	if (version)
1234 		*version = NULL;
1235 	if (encoding)
1236 		*encoding = NULL;
1237 	if (standalone)
1238 		*standalone = -1;
1239 
1240 	p = str;
1241 
1242 	if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1243 		goto finished;
1244 
1245 	/*
1246 	 * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1247 	 * rather than an XMLDecl, so we have done what we came to do and found no
1248 	 * XMLDecl.
1249 	 *
1250 	 * We need an input length value for xmlGetUTF8Char, but there's no need
1251 	 * to count the whole document size, so use strnlen not strlen.
1252 	 */
1253 	utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1254 	utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1255 	if (PG_XMLISNAMECHAR(utf8char))
1256 		goto finished;
1257 
1258 	p += 5;
1259 
1260 	/* version */
1261 	CHECK_XML_SPACE(p);
1262 	SKIP_XML_SPACE(p);
1263 	if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1264 		return XML_ERR_VERSION_MISSING;
1265 	p += 7;
1266 	SKIP_XML_SPACE(p);
1267 	if (*p != '=')
1268 		return XML_ERR_VERSION_MISSING;
1269 	p += 1;
1270 	SKIP_XML_SPACE(p);
1271 
1272 	if (*p == '\'' || *p == '"')
1273 	{
1274 		const xmlChar *q;
1275 
1276 		q = xmlStrchr(p + 1, *p);
1277 		if (!q)
1278 			return XML_ERR_VERSION_MISSING;
1279 
1280 		if (version)
1281 			*version = xml_pnstrdup(p + 1, q - p - 1);
1282 		p = q + 1;
1283 	}
1284 	else
1285 		return XML_ERR_VERSION_MISSING;
1286 
1287 	/* encoding */
1288 	save_p = p;
1289 	SKIP_XML_SPACE(p);
1290 	if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1291 	{
1292 		CHECK_XML_SPACE(save_p);
1293 		p += 8;
1294 		SKIP_XML_SPACE(p);
1295 		if (*p != '=')
1296 			return XML_ERR_MISSING_ENCODING;
1297 		p += 1;
1298 		SKIP_XML_SPACE(p);
1299 
1300 		if (*p == '\'' || *p == '"')
1301 		{
1302 			const xmlChar *q;
1303 
1304 			q = xmlStrchr(p + 1, *p);
1305 			if (!q)
1306 				return XML_ERR_MISSING_ENCODING;
1307 
1308 			if (encoding)
1309 				*encoding = xml_pnstrdup(p + 1, q - p - 1);
1310 			p = q + 1;
1311 		}
1312 		else
1313 			return XML_ERR_MISSING_ENCODING;
1314 	}
1315 	else
1316 	{
1317 		p = save_p;
1318 	}
1319 
1320 	/* standalone */
1321 	save_p = p;
1322 	SKIP_XML_SPACE(p);
1323 	if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1324 	{
1325 		CHECK_XML_SPACE(save_p);
1326 		p += 10;
1327 		SKIP_XML_SPACE(p);
1328 		if (*p != '=')
1329 			return XML_ERR_STANDALONE_VALUE;
1330 		p += 1;
1331 		SKIP_XML_SPACE(p);
1332 		if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1333 			xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1334 		{
1335 			if (standalone)
1336 				*standalone = 1;
1337 			p += 5;
1338 		}
1339 		else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1340 				 xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1341 		{
1342 			if (standalone)
1343 				*standalone = 0;
1344 			p += 4;
1345 		}
1346 		else
1347 			return XML_ERR_STANDALONE_VALUE;
1348 	}
1349 	else
1350 	{
1351 		p = save_p;
1352 	}
1353 
1354 	SKIP_XML_SPACE(p);
1355 	if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1356 		return XML_ERR_XMLDECL_NOT_FINISHED;
1357 	p += 2;
1358 
1359 finished:
1360 	len = p - str;
1361 
1362 	for (p = str; p < str + len; p++)
1363 		if (*p > 127)
1364 			return XML_ERR_INVALID_CHAR;
1365 
1366 	if (lenp)
1367 		*lenp = len;
1368 
1369 	return XML_ERR_OK;
1370 }
1371 
1372 
1373 /*
1374  * Write an XML declaration.  On output, we adjust the XML declaration
1375  * as follows.  (These rules are the moral equivalent of the clause
1376  * "Serialization of an XML value" in the SQL standard.)
1377  *
1378  * We try to avoid generating an XML declaration if possible.  This is
1379  * so that you don't get trivial things like xml '<foo/>' resulting in
1380  * '<?xml version="1.0"?><foo/>', which would surely be annoying.  We
1381  * must provide a declaration if the standalone property is specified
1382  * or if we include an encoding declaration.  If we have a
1383  * declaration, we must specify a version (XML requires this).
1384  * Otherwise we only make a declaration if the version is not "1.0",
1385  * which is the default version specified in SQL:2003.
1386  */
1387 static bool
print_xml_decl(StringInfo buf,const xmlChar * version,pg_enc encoding,int standalone)1388 print_xml_decl(StringInfo buf, const xmlChar *version,
1389 			   pg_enc encoding, int standalone)
1390 {
1391 	if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1392 		|| (encoding && encoding != PG_UTF8)
1393 		|| standalone != -1)
1394 	{
1395 		appendStringInfoString(buf, "<?xml");
1396 
1397 		if (version)
1398 			appendStringInfo(buf, " version=\"%s\"", version);
1399 		else
1400 			appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1401 
1402 		if (encoding && encoding != PG_UTF8)
1403 		{
1404 			/*
1405 			 * XXX might be useful to convert this to IANA names (ISO-8859-1
1406 			 * instead of LATIN1 etc.); needs field experience
1407 			 */
1408 			appendStringInfo(buf, " encoding=\"%s\"",
1409 							 pg_encoding_to_char(encoding));
1410 		}
1411 
1412 		if (standalone == 1)
1413 			appendStringInfoString(buf, " standalone=\"yes\"");
1414 		else if (standalone == 0)
1415 			appendStringInfoString(buf, " standalone=\"no\"");
1416 		appendStringInfoString(buf, "?>");
1417 
1418 		return true;
1419 	}
1420 	else
1421 		return false;
1422 }
1423 
1424 /*
1425  * Test whether an input that is to be parsed as CONTENT contains a DTD.
1426  *
1427  * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1428  * satisfied by a document with a DTD, which is a bit of a wart, as it means
1429  * the CONTENT type is not a proper superset of DOCUMENT.  SQL/XML:2006 and
1430  * later fix that, by redefining content with reference to the "more
1431  * permissive" Document Node of the XQuery/XPath Data Model, such that any
1432  * DOCUMENT value is indeed also a CONTENT value.  That definition is more
1433  * useful, as CONTENT becomes usable for parsing input of unknown form (think
1434  * pg_restore).
1435  *
1436  * As used below in parse_xml when parsing for CONTENT, libxml does not give
1437  * us the 2006+ behavior, but only the 2003; it will choke if the input has
1438  * a DTD.  But we can provide the 2006+ definition of CONTENT easily enough,
1439  * by detecting this case first and simply doing the parse as DOCUMENT.
1440  *
1441  * A DTD can be found arbitrarily far in, but that would be a contrived case;
1442  * it will ordinarily start within a few dozen characters.  The only things
1443  * that can precede it are an XMLDecl (here, the caller will have called
1444  * parse_xml_decl already), whitespace, comments, and processing instructions.
1445  * This function need only return true if it sees a valid sequence of such
1446  * things leading to <!DOCTYPE.  It can simply return false in any other
1447  * cases, including malformed input; that will mean the input gets parsed as
1448  * CONTENT as originally planned, with libxml reporting any errors.
1449  *
1450  * This is only to be called from xml_parse, when pg_xml_init has already
1451  * been called.  The input is already in UTF8 encoding.
1452  */
1453 static bool
xml_doctype_in_content(const xmlChar * str)1454 xml_doctype_in_content(const xmlChar *str)
1455 {
1456 	const xmlChar *p = str;
1457 
1458 	for (;;)
1459 	{
1460 		const xmlChar *e;
1461 
1462 		SKIP_XML_SPACE(p);
1463 		if (*p != '<')
1464 			return false;
1465 		p++;
1466 
1467 		if (*p == '!')
1468 		{
1469 			p++;
1470 
1471 			/* if we see <!DOCTYPE, we can return true */
1472 			if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1473 				return true;
1474 
1475 			/* otherwise, if it's not a comment, fail */
1476 			if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1477 				return false;
1478 			/* find end of comment: find -- and a > must follow */
1479 			p = xmlStrstr(p + 2, (xmlChar *) "--");
1480 			if (!p || p[2] != '>')
1481 				return false;
1482 			/* advance over comment, and keep scanning */
1483 			p += 3;
1484 			continue;
1485 		}
1486 
1487 		/* otherwise, if it's not a PI <?target something?>, fail */
1488 		if (*p != '?')
1489 			return false;
1490 		p++;
1491 
1492 		/* find end of PI (the string ?> is forbidden within a PI) */
1493 		e = xmlStrstr(p, (xmlChar *) "?>");
1494 		if (!e)
1495 			return false;
1496 
1497 		/* advance over PI, keep scanning */
1498 		p = e + 2;
1499 	}
1500 }
1501 
1502 
1503 /*
1504  * Convert a C string to XML internal representation
1505  *
1506  * Note: it is caller's responsibility to xmlFreeDoc() the result,
1507  * else a permanent memory leak will ensue!
1508  *
1509  * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1510  * yet do not use SAX - see xmlreader.c)
1511  */
1512 static xmlDocPtr
xml_parse(text * data,XmlOptionType xmloption_arg,bool preserve_whitespace,int encoding)1513 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1514 		  int encoding)
1515 {
1516 	int32		len;
1517 	xmlChar    *string;
1518 	xmlChar    *utf8string;
1519 	PgXmlErrorContext *xmlerrcxt;
1520 	volatile xmlParserCtxtPtr ctxt = NULL;
1521 	volatile xmlDocPtr doc = NULL;
1522 
1523 	len = VARSIZE_ANY_EXHDR(data);	/* will be useful later */
1524 	string = xml_text2xmlChar(data);
1525 
1526 	utf8string = pg_do_encoding_conversion(string,
1527 										   len,
1528 										   encoding,
1529 										   PG_UTF8);
1530 
1531 	/* Start up libxml and its parser */
1532 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1533 
1534 	/* Use a TRY block to ensure we clean up correctly */
1535 	PG_TRY();
1536 	{
1537 		bool		parse_as_document = false;
1538 		int			res_code;
1539 		size_t		count = 0;
1540 		xmlChar    *version = NULL;
1541 		int			standalone = 0;
1542 
1543 		xmlInitParser();
1544 
1545 		ctxt = xmlNewParserCtxt();
1546 		if (ctxt == NULL || xmlerrcxt->err_occurred)
1547 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1548 						"could not allocate parser context");
1549 
1550 		/* Decide whether to parse as document or content */
1551 		if (xmloption_arg == XMLOPTION_DOCUMENT)
1552 			parse_as_document = true;
1553 		else
1554 		{
1555 			/* Parse and skip over the XML declaration, if any */
1556 			res_code = parse_xml_decl(utf8string,
1557 									  &count, &version, NULL, &standalone);
1558 			if (res_code != 0)
1559 				xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1560 									"invalid XML content: invalid XML declaration",
1561 									res_code);
1562 
1563 			/* Is there a DOCTYPE element? */
1564 			if (xml_doctype_in_content(utf8string + count))
1565 				parse_as_document = true;
1566 		}
1567 
1568 		if (parse_as_document)
1569 		{
1570 			/*
1571 			 * Note, that here we try to apply DTD defaults
1572 			 * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1573 			 * 'Default values defined by internal DTD are applied'. As for
1574 			 * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1575 			 * 10.16.7.e)
1576 			 */
1577 			doc = xmlCtxtReadDoc(ctxt, utf8string,
1578 								 NULL,
1579 								 "UTF-8",
1580 								 XML_PARSE_NOENT | XML_PARSE_DTDATTR
1581 								 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1582 			if (doc == NULL || xmlerrcxt->err_occurred)
1583 			{
1584 				/* Use original option to decide which error code to throw */
1585 				if (xmloption_arg == XMLOPTION_DOCUMENT)
1586 					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1587 								"invalid XML document");
1588 				else
1589 					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1590 								"invalid XML content");
1591 			}
1592 		}
1593 		else
1594 		{
1595 			doc = xmlNewDoc(version);
1596 			Assert(doc->encoding == NULL);
1597 			doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1598 			doc->standalone = standalone;
1599 
1600 			/* allow empty content */
1601 			if (*(utf8string + count))
1602 			{
1603 				res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1604 													   utf8string + count, NULL);
1605 				if (res_code != 0 || xmlerrcxt->err_occurred)
1606 					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1607 								"invalid XML content");
1608 			}
1609 		}
1610 	}
1611 	PG_CATCH();
1612 	{
1613 		if (doc != NULL)
1614 			xmlFreeDoc(doc);
1615 		if (ctxt != NULL)
1616 			xmlFreeParserCtxt(ctxt);
1617 
1618 		pg_xml_done(xmlerrcxt, true);
1619 
1620 		PG_RE_THROW();
1621 	}
1622 	PG_END_TRY();
1623 
1624 	xmlFreeParserCtxt(ctxt);
1625 
1626 	pg_xml_done(xmlerrcxt, false);
1627 
1628 	return doc;
1629 }
1630 
1631 
1632 /*
1633  * xmlChar<->text conversions
1634  */
1635 static xmlChar *
xml_text2xmlChar(text * in)1636 xml_text2xmlChar(text *in)
1637 {
1638 	return (xmlChar *) text_to_cstring(in);
1639 }
1640 
1641 
1642 #ifdef USE_LIBXMLCONTEXT
1643 
1644 /*
1645  * Manage the special context used for all libxml allocations (but only
1646  * in special debug builds; see notes at top of file)
1647  */
1648 static void
xml_memory_init(void)1649 xml_memory_init(void)
1650 {
1651 	/* Create memory context if not there already */
1652 	if (LibxmlContext == NULL)
1653 		LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1654 											  "Libxml context",
1655 											  ALLOCSET_DEFAULT_SIZES);
1656 
1657 	/* Re-establish the callbacks even if already set */
1658 	xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1659 }
1660 
1661 /*
1662  * Wrappers for memory management functions
1663  */
1664 static void *
xml_palloc(size_t size)1665 xml_palloc(size_t size)
1666 {
1667 	return MemoryContextAlloc(LibxmlContext, size);
1668 }
1669 
1670 
1671 static void *
xml_repalloc(void * ptr,size_t size)1672 xml_repalloc(void *ptr, size_t size)
1673 {
1674 	return repalloc(ptr, size);
1675 }
1676 
1677 
1678 static void
xml_pfree(void * ptr)1679 xml_pfree(void *ptr)
1680 {
1681 	/* At least some parts of libxml assume xmlFree(NULL) is allowed */
1682 	if (ptr)
1683 		pfree(ptr);
1684 }
1685 
1686 
1687 static char *
xml_pstrdup(const char * string)1688 xml_pstrdup(const char *string)
1689 {
1690 	return MemoryContextStrdup(LibxmlContext, string);
1691 }
1692 #endif							/* USE_LIBXMLCONTEXT */
1693 
1694 
1695 /*
1696  * xmlPgEntityLoader --- entity loader callback function
1697  *
1698  * Silently prevent any external entity URL from being loaded.  We don't want
1699  * to throw an error, so instead make the entity appear to expand to an empty
1700  * string.
1701  *
1702  * We would prefer to allow loading entities that exist in the system's
1703  * global XML catalog; but the available libxml2 APIs make that a complex
1704  * and fragile task.  For now, just shut down all external access.
1705  */
1706 static xmlParserInputPtr
xmlPgEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)1707 xmlPgEntityLoader(const char *URL, const char *ID,
1708 				  xmlParserCtxtPtr ctxt)
1709 {
1710 	return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1711 }
1712 
1713 
1714 /*
1715  * xml_ereport --- report an XML-related error
1716  *
1717  * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1718  * standard.  This function adds libxml's native error message, if any, as
1719  * detail.
1720  *
1721  * This is exported for modules that want to share the core libxml error
1722  * handler.  Note that pg_xml_init() *must* have been called previously.
1723  */
1724 void
xml_ereport(PgXmlErrorContext * errcxt,int level,int sqlcode,const char * msg)1725 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1726 {
1727 	char	   *detail;
1728 
1729 	/* Defend against someone passing us a bogus context struct */
1730 	if (errcxt->magic != ERRCXT_MAGIC)
1731 		elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1732 
1733 	/* Flag that the current libxml error has been reported */
1734 	errcxt->err_occurred = false;
1735 
1736 	/* Include detail only if we have some text from libxml */
1737 	if (errcxt->err_buf.len > 0)
1738 		detail = errcxt->err_buf.data;
1739 	else
1740 		detail = NULL;
1741 
1742 	ereport(level,
1743 			(errcode(sqlcode),
1744 			 errmsg_internal("%s", msg),
1745 			 detail ? errdetail_internal("%s", detail) : 0));
1746 }
1747 
1748 
1749 /*
1750  * Error handler for libxml errors and warnings
1751  */
1752 static void
xml_errorHandler(void * data,xmlErrorPtr error)1753 xml_errorHandler(void *data, xmlErrorPtr error)
1754 {
1755 	PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1756 	xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1757 	xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1758 	xmlNodePtr	node = error->node;
1759 	const xmlChar *name = (node != NULL &&
1760 						   node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1761 	int			domain = error->domain;
1762 	int			level = error->level;
1763 	StringInfo	errorBuf;
1764 
1765 	/*
1766 	 * Defend against someone passing us a bogus context struct.
1767 	 *
1768 	 * We force a backend exit if this check fails because longjmp'ing out of
1769 	 * libxml would likely render it unsafe to use further.
1770 	 */
1771 	if (xmlerrcxt->magic != ERRCXT_MAGIC)
1772 		elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1773 
1774 	/*----------
1775 	 * Older libxml versions report some errors differently.
1776 	 * First, some errors were previously reported as coming from the parser
1777 	 * domain but are now reported as coming from the namespace domain.
1778 	 * Second, some warnings were upgraded to errors.
1779 	 * We attempt to compensate for that here.
1780 	 *----------
1781 	 */
1782 	switch (error->code)
1783 	{
1784 		case XML_WAR_NS_URI:
1785 			level = XML_ERR_ERROR;
1786 			domain = XML_FROM_NAMESPACE;
1787 			break;
1788 
1789 		case XML_ERR_NS_DECL_ERROR:
1790 		case XML_WAR_NS_URI_RELATIVE:
1791 		case XML_WAR_NS_COLUMN:
1792 		case XML_NS_ERR_XML_NAMESPACE:
1793 		case XML_NS_ERR_UNDEFINED_NAMESPACE:
1794 		case XML_NS_ERR_QNAME:
1795 		case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1796 		case XML_NS_ERR_EMPTY:
1797 			domain = XML_FROM_NAMESPACE;
1798 			break;
1799 	}
1800 
1801 	/* Decide whether to act on the error or not */
1802 	switch (domain)
1803 	{
1804 		case XML_FROM_PARSER:
1805 		case XML_FROM_NONE:
1806 		case XML_FROM_MEMORY:
1807 		case XML_FROM_IO:
1808 
1809 			/*
1810 			 * Suppress warnings about undeclared entities.  We need to do
1811 			 * this to avoid problems due to not loading DTD definitions.
1812 			 */
1813 			if (error->code == XML_WAR_UNDECLARED_ENTITY)
1814 				return;
1815 
1816 			/* Otherwise, accept error regardless of the parsing purpose */
1817 			break;
1818 
1819 		default:
1820 			/* Ignore error if only doing well-formedness check */
1821 			if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1822 				return;
1823 			break;
1824 	}
1825 
1826 	/* Prepare error message in errorBuf */
1827 	errorBuf = makeStringInfo();
1828 
1829 	if (error->line > 0)
1830 		appendStringInfo(errorBuf, "line %d: ", error->line);
1831 	if (name != NULL)
1832 		appendStringInfo(errorBuf, "element %s: ", name);
1833 	if (error->message != NULL)
1834 		appendStringInfoString(errorBuf, error->message);
1835 	else
1836 		appendStringInfoString(errorBuf, "(no message provided)");
1837 
1838 	/*
1839 	 * Append context information to errorBuf.
1840 	 *
1841 	 * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1842 	 * write the context.  Since we don't want to duplicate libxml
1843 	 * functionality here, we set up a generic error handler temporarily.
1844 	 *
1845 	 * We use appendStringInfo() directly as libxml's generic error handler.
1846 	 * This should work because it has essentially the same signature as
1847 	 * libxml expects, namely (void *ptr, const char *msg, ...).
1848 	 */
1849 	if (input != NULL)
1850 	{
1851 		xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1852 		void	   *errCtxSaved = xmlGenericErrorContext;
1853 
1854 		xmlSetGenericErrorFunc((void *) errorBuf,
1855 							   (xmlGenericErrorFunc) appendStringInfo);
1856 
1857 		/* Add context information to errorBuf */
1858 		appendStringInfoLineSeparator(errorBuf);
1859 
1860 		xmlParserPrintFileContext(input);
1861 
1862 		/* Restore generic error func */
1863 		xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1864 	}
1865 
1866 	/* Get rid of any trailing newlines in errorBuf */
1867 	chopStringInfoNewlines(errorBuf);
1868 
1869 	/*
1870 	 * Legacy error handling mode.  err_occurred is never set, we just add the
1871 	 * message to err_buf.  This mode exists because the xml2 contrib module
1872 	 * uses our error-handling infrastructure, but we don't want to change its
1873 	 * behaviour since it's deprecated anyway.  This is also why we don't
1874 	 * distinguish between notices, warnings and errors here --- the old-style
1875 	 * generic error handler wouldn't have done that either.
1876 	 */
1877 	if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1878 	{
1879 		appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1880 		appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1881 							   errorBuf->len);
1882 
1883 		pfree(errorBuf->data);
1884 		pfree(errorBuf);
1885 		return;
1886 	}
1887 
1888 	/*
1889 	 * We don't want to ereport() here because that'd probably leave libxml in
1890 	 * an inconsistent state.  Instead, we remember the error and ereport()
1891 	 * from xml_ereport().
1892 	 *
1893 	 * Warnings and notices can be reported immediately since they won't cause
1894 	 * a longjmp() out of libxml.
1895 	 */
1896 	if (level >= XML_ERR_ERROR)
1897 	{
1898 		appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1899 		appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1900 							   errorBuf->len);
1901 
1902 		xmlerrcxt->err_occurred = true;
1903 	}
1904 	else if (level >= XML_ERR_WARNING)
1905 	{
1906 		ereport(WARNING,
1907 				(errmsg_internal("%s", errorBuf->data)));
1908 	}
1909 	else
1910 	{
1911 		ereport(NOTICE,
1912 				(errmsg_internal("%s", errorBuf->data)));
1913 	}
1914 
1915 	pfree(errorBuf->data);
1916 	pfree(errorBuf);
1917 }
1918 
1919 
1920 /*
1921  * Wrapper for "ereport" function for XML-related errors.  The "msg"
1922  * is the SQL-level message; some can be adopted from the SQL/XML
1923  * standard.  This function uses "code" to create a textual detail
1924  * message.  At the moment, we only need to cover those codes that we
1925  * may raise in this file.
1926  */
1927 static void
xml_ereport_by_code(int level,int sqlcode,const char * msg,int code)1928 xml_ereport_by_code(int level, int sqlcode,
1929 					const char *msg, int code)
1930 {
1931 	const char *det;
1932 
1933 	switch (code)
1934 	{
1935 		case XML_ERR_INVALID_CHAR:
1936 			det = gettext_noop("Invalid character value.");
1937 			break;
1938 		case XML_ERR_SPACE_REQUIRED:
1939 			det = gettext_noop("Space required.");
1940 			break;
1941 		case XML_ERR_STANDALONE_VALUE:
1942 			det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1943 			break;
1944 		case XML_ERR_VERSION_MISSING:
1945 			det = gettext_noop("Malformed declaration: missing version.");
1946 			break;
1947 		case XML_ERR_MISSING_ENCODING:
1948 			det = gettext_noop("Missing encoding in text declaration.");
1949 			break;
1950 		case XML_ERR_XMLDECL_NOT_FINISHED:
1951 			det = gettext_noop("Parsing XML declaration: '?>' expected.");
1952 			break;
1953 		default:
1954 			det = gettext_noop("Unrecognized libxml error code: %d.");
1955 			break;
1956 	}
1957 
1958 	ereport(level,
1959 			(errcode(sqlcode),
1960 			 errmsg_internal("%s", msg),
1961 			 errdetail(det, code)));
1962 }
1963 
1964 
1965 /*
1966  * Remove all trailing newlines from a StringInfo string
1967  */
1968 static void
chopStringInfoNewlines(StringInfo str)1969 chopStringInfoNewlines(StringInfo str)
1970 {
1971 	while (str->len > 0 && str->data[str->len - 1] == '\n')
1972 		str->data[--str->len] = '\0';
1973 }
1974 
1975 
1976 /*
1977  * Append a newline after removing any existing trailing newlines
1978  */
1979 static void
appendStringInfoLineSeparator(StringInfo str)1980 appendStringInfoLineSeparator(StringInfo str)
1981 {
1982 	chopStringInfoNewlines(str);
1983 	if (str->len > 0)
1984 		appendStringInfoChar(str, '\n');
1985 }
1986 
1987 
1988 /*
1989  * Convert one char in the current server encoding to a Unicode codepoint.
1990  */
1991 static pg_wchar
sqlchar_to_unicode(const char * s)1992 sqlchar_to_unicode(const char *s)
1993 {
1994 	char	   *utf8string;
1995 	pg_wchar	ret[2];			/* need space for trailing zero */
1996 
1997 	/* note we're not assuming s is null-terminated */
1998 	utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1999 
2000 	pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2001 								  pg_encoding_mblen(PG_UTF8, utf8string));
2002 
2003 	if (utf8string != s)
2004 		pfree(utf8string);
2005 
2006 	return ret[0];
2007 }
2008 
2009 
2010 static bool
is_valid_xml_namefirst(pg_wchar c)2011 is_valid_xml_namefirst(pg_wchar c)
2012 {
2013 	/* (Letter | '_' | ':') */
2014 	return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2015 			|| c == '_' || c == ':');
2016 }
2017 
2018 
2019 static bool
is_valid_xml_namechar(pg_wchar c)2020 is_valid_xml_namechar(pg_wchar c)
2021 {
2022 	/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2023 	return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2024 			|| xmlIsDigitQ(c)
2025 			|| c == '.' || c == '-' || c == '_' || c == ':'
2026 			|| xmlIsCombiningQ(c)
2027 			|| xmlIsExtenderQ(c));
2028 }
2029 #endif							/* USE_LIBXML */
2030 
2031 
2032 /*
2033  * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2034  */
2035 char *
map_sql_identifier_to_xml_name(const char * ident,bool fully_escaped,bool escape_period)2036 map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2037 							   bool escape_period)
2038 {
2039 #ifdef USE_LIBXML
2040 	StringInfoData buf;
2041 	const char *p;
2042 
2043 	/*
2044 	 * SQL/XML doesn't make use of this case anywhere, so it's probably a
2045 	 * mistake.
2046 	 */
2047 	Assert(fully_escaped || !escape_period);
2048 
2049 	initStringInfo(&buf);
2050 
2051 	for (p = ident; *p; p += pg_mblen(p))
2052 	{
2053 		if (*p == ':' && (p == ident || fully_escaped))
2054 			appendStringInfoString(&buf, "_x003A_");
2055 		else if (*p == '_' && *(p + 1) == 'x')
2056 			appendStringInfoString(&buf, "_x005F_");
2057 		else if (fully_escaped && p == ident &&
2058 				 pg_strncasecmp(p, "xml", 3) == 0)
2059 		{
2060 			if (*p == 'x')
2061 				appendStringInfoString(&buf, "_x0078_");
2062 			else
2063 				appendStringInfoString(&buf, "_x0058_");
2064 		}
2065 		else if (escape_period && *p == '.')
2066 			appendStringInfoString(&buf, "_x002E_");
2067 		else
2068 		{
2069 			pg_wchar	u = sqlchar_to_unicode(p);
2070 
2071 			if ((p == ident)
2072 				? !is_valid_xml_namefirst(u)
2073 				: !is_valid_xml_namechar(u))
2074 				appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2075 			else
2076 				appendBinaryStringInfo(&buf, p, pg_mblen(p));
2077 		}
2078 	}
2079 
2080 	return buf.data;
2081 #else							/* not USE_LIBXML */
2082 	NO_XML_SUPPORT();
2083 	return NULL;
2084 #endif							/* not USE_LIBXML */
2085 }
2086 
2087 
2088 /*
2089  * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2090  */
2091 char *
map_xml_name_to_sql_identifier(const char * name)2092 map_xml_name_to_sql_identifier(const char *name)
2093 {
2094 	StringInfoData buf;
2095 	const char *p;
2096 
2097 	initStringInfo(&buf);
2098 
2099 	for (p = name; *p; p += pg_mblen(p))
2100 	{
2101 		if (*p == '_' && *(p + 1) == 'x'
2102 			&& isxdigit((unsigned char) *(p + 2))
2103 			&& isxdigit((unsigned char) *(p + 3))
2104 			&& isxdigit((unsigned char) *(p + 4))
2105 			&& isxdigit((unsigned char) *(p + 5))
2106 			&& *(p + 6) == '_')
2107 		{
2108 			char		cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2109 			unsigned int u;
2110 
2111 			sscanf(p + 2, "%X", &u);
2112 			pg_unicode_to_server(u, (unsigned char *) cbuf);
2113 			appendStringInfoString(&buf, cbuf);
2114 			p += 6;
2115 		}
2116 		else
2117 			appendBinaryStringInfo(&buf, p, pg_mblen(p));
2118 	}
2119 
2120 	return buf.data;
2121 }
2122 
2123 /*
2124  * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2125  *
2126  * When xml_escape_strings is true, then certain characters in string
2127  * values are replaced by entity references (&lt; etc.), as specified
2128  * in SQL/XML:2008 section 9.8 GR 9) a) iii).   This is normally what is
2129  * wanted.  The false case is mainly useful when the resulting value
2130  * is used with xmlTextWriterWriteAttribute() to write out an
2131  * attribute, because that function does the escaping itself.
2132  */
2133 char *
map_sql_value_to_xml_value(Datum value,Oid type,bool xml_escape_strings)2134 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2135 {
2136 	if (type_is_array_domain(type))
2137 	{
2138 		ArrayType  *array;
2139 		Oid			elmtype;
2140 		int16		elmlen;
2141 		bool		elmbyval;
2142 		char		elmalign;
2143 		int			num_elems;
2144 		Datum	   *elem_values;
2145 		bool	   *elem_nulls;
2146 		StringInfoData buf;
2147 		int			i;
2148 
2149 		array = DatumGetArrayTypeP(value);
2150 		elmtype = ARR_ELEMTYPE(array);
2151 		get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2152 
2153 		deconstruct_array(array, elmtype,
2154 						  elmlen, elmbyval, elmalign,
2155 						  &elem_values, &elem_nulls,
2156 						  &num_elems);
2157 
2158 		initStringInfo(&buf);
2159 
2160 		for (i = 0; i < num_elems; i++)
2161 		{
2162 			if (elem_nulls[i])
2163 				continue;
2164 			appendStringInfoString(&buf, "<element>");
2165 			appendStringInfoString(&buf,
2166 								   map_sql_value_to_xml_value(elem_values[i],
2167 															  elmtype, true));
2168 			appendStringInfoString(&buf, "</element>");
2169 		}
2170 
2171 		pfree(elem_values);
2172 		pfree(elem_nulls);
2173 
2174 		return buf.data;
2175 	}
2176 	else
2177 	{
2178 		Oid			typeOut;
2179 		bool		isvarlena;
2180 		char	   *str;
2181 
2182 		/*
2183 		 * Flatten domains; the special-case treatments below should apply to,
2184 		 * eg, domains over boolean not just boolean.
2185 		 */
2186 		type = getBaseType(type);
2187 
2188 		/*
2189 		 * Special XSD formatting for some data types
2190 		 */
2191 		switch (type)
2192 		{
2193 			case BOOLOID:
2194 				if (DatumGetBool(value))
2195 					return "true";
2196 				else
2197 					return "false";
2198 
2199 			case DATEOID:
2200 				{
2201 					DateADT		date;
2202 					struct pg_tm tm;
2203 					char		buf[MAXDATELEN + 1];
2204 
2205 					date = DatumGetDateADT(value);
2206 					/* XSD doesn't support infinite values */
2207 					if (DATE_NOT_FINITE(date))
2208 						ereport(ERROR,
2209 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2210 								 errmsg("date out of range"),
2211 								 errdetail("XML does not support infinite date values.")));
2212 					j2date(date + POSTGRES_EPOCH_JDATE,
2213 						   &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2214 					EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2215 
2216 					return pstrdup(buf);
2217 				}
2218 
2219 			case TIMESTAMPOID:
2220 				{
2221 					Timestamp	timestamp;
2222 					struct pg_tm tm;
2223 					fsec_t		fsec;
2224 					char		buf[MAXDATELEN + 1];
2225 
2226 					timestamp = DatumGetTimestamp(value);
2227 
2228 					/* XSD doesn't support infinite values */
2229 					if (TIMESTAMP_NOT_FINITE(timestamp))
2230 						ereport(ERROR,
2231 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2232 								 errmsg("timestamp out of range"),
2233 								 errdetail("XML does not support infinite timestamp values.")));
2234 					else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2235 						EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2236 					else
2237 						ereport(ERROR,
2238 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2239 								 errmsg("timestamp out of range")));
2240 
2241 					return pstrdup(buf);
2242 				}
2243 
2244 			case TIMESTAMPTZOID:
2245 				{
2246 					TimestampTz timestamp;
2247 					struct pg_tm tm;
2248 					int			tz;
2249 					fsec_t		fsec;
2250 					const char *tzn = NULL;
2251 					char		buf[MAXDATELEN + 1];
2252 
2253 					timestamp = DatumGetTimestamp(value);
2254 
2255 					/* XSD doesn't support infinite values */
2256 					if (TIMESTAMP_NOT_FINITE(timestamp))
2257 						ereport(ERROR,
2258 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2259 								 errmsg("timestamp out of range"),
2260 								 errdetail("XML does not support infinite timestamp values.")));
2261 					else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2262 						EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2263 					else
2264 						ereport(ERROR,
2265 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2266 								 errmsg("timestamp out of range")));
2267 
2268 					return pstrdup(buf);
2269 				}
2270 
2271 #ifdef USE_LIBXML
2272 			case BYTEAOID:
2273 				{
2274 					bytea	   *bstr = DatumGetByteaPP(value);
2275 					PgXmlErrorContext *xmlerrcxt;
2276 					volatile xmlBufferPtr buf = NULL;
2277 					volatile xmlTextWriterPtr writer = NULL;
2278 					char	   *result;
2279 
2280 					xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2281 
2282 					PG_TRY();
2283 					{
2284 						buf = xmlBufferCreate();
2285 						if (buf == NULL || xmlerrcxt->err_occurred)
2286 							xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2287 										"could not allocate xmlBuffer");
2288 						writer = xmlNewTextWriterMemory(buf, 0);
2289 						if (writer == NULL || xmlerrcxt->err_occurred)
2290 							xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2291 										"could not allocate xmlTextWriter");
2292 
2293 						if (xmlbinary == XMLBINARY_BASE64)
2294 							xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2295 													 0, VARSIZE_ANY_EXHDR(bstr));
2296 						else
2297 							xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2298 													 0, VARSIZE_ANY_EXHDR(bstr));
2299 
2300 						/* we MUST do this now to flush data out to the buffer */
2301 						xmlFreeTextWriter(writer);
2302 						writer = NULL;
2303 
2304 						result = pstrdup((const char *) xmlBufferContent(buf));
2305 					}
2306 					PG_CATCH();
2307 					{
2308 						if (writer)
2309 							xmlFreeTextWriter(writer);
2310 						if (buf)
2311 							xmlBufferFree(buf);
2312 
2313 						pg_xml_done(xmlerrcxt, true);
2314 
2315 						PG_RE_THROW();
2316 					}
2317 					PG_END_TRY();
2318 
2319 					xmlBufferFree(buf);
2320 
2321 					pg_xml_done(xmlerrcxt, false);
2322 
2323 					return result;
2324 				}
2325 #endif							/* USE_LIBXML */
2326 
2327 		}
2328 
2329 		/*
2330 		 * otherwise, just use the type's native text representation
2331 		 */
2332 		getTypeOutputInfo(type, &typeOut, &isvarlena);
2333 		str = OidOutputFunctionCall(typeOut, value);
2334 
2335 		/* ... exactly as-is for XML, and when escaping is not wanted */
2336 		if (type == XMLOID || !xml_escape_strings)
2337 			return str;
2338 
2339 		/* otherwise, translate special characters as needed */
2340 		return escape_xml(str);
2341 	}
2342 }
2343 
2344 
2345 /*
2346  * Escape characters in text that have special meanings in XML.
2347  *
2348  * Returns a palloc'd string.
2349  *
2350  * NB: this is intentionally not dependent on libxml.
2351  */
2352 char *
escape_xml(const char * str)2353 escape_xml(const char *str)
2354 {
2355 	StringInfoData buf;
2356 	const char *p;
2357 
2358 	initStringInfo(&buf);
2359 	for (p = str; *p; p++)
2360 	{
2361 		switch (*p)
2362 		{
2363 			case '&':
2364 				appendStringInfoString(&buf, "&amp;");
2365 				break;
2366 			case '<':
2367 				appendStringInfoString(&buf, "&lt;");
2368 				break;
2369 			case '>':
2370 				appendStringInfoString(&buf, "&gt;");
2371 				break;
2372 			case '\r':
2373 				appendStringInfoString(&buf, "&#x0d;");
2374 				break;
2375 			default:
2376 				appendStringInfoCharMacro(&buf, *p);
2377 				break;
2378 		}
2379 	}
2380 	return buf.data;
2381 }
2382 
2383 
2384 static char *
_SPI_strdup(const char * s)2385 _SPI_strdup(const char *s)
2386 {
2387 	size_t		len = strlen(s) + 1;
2388 	char	   *ret = SPI_palloc(len);
2389 
2390 	memcpy(ret, s, len);
2391 	return ret;
2392 }
2393 
2394 
2395 /*
2396  * SQL to XML mapping functions
2397  *
2398  * What follows below was at one point intentionally organized so that
2399  * you can read along in the SQL/XML standard. The functions are
2400  * mostly split up the way the clauses lay out in the standards
2401  * document, and the identifiers are also aligned with the standard
2402  * text.  Unfortunately, SQL/XML:2006 reordered the clauses
2403  * differently than SQL/XML:2003, so the order below doesn't make much
2404  * sense anymore.
2405  *
2406  * There are many things going on there:
2407  *
2408  * There are two kinds of mappings: Mapping SQL data (table contents)
2409  * to XML documents, and mapping SQL structure (the "schema") to XML
2410  * Schema.  And there are functions that do both at the same time.
2411  *
2412  * Then you can map a database, a schema, or a table, each in both
2413  * ways.  This breaks down recursively: Mapping a database invokes
2414  * mapping schemas, which invokes mapping tables, which invokes
2415  * mapping rows, which invokes mapping columns, although you can't
2416  * call the last two from the outside.  Because of this, there are a
2417  * number of xyz_internal() functions which are to be called both from
2418  * the function manager wrapper and from some upper layer in a
2419  * recursive call.
2420  *
2421  * See the documentation about what the common function arguments
2422  * nulls, tableforest, and targetns mean.
2423  *
2424  * Some style guidelines for XML output: Use double quotes for quoting
2425  * XML attributes.  Indent XML elements by two spaces, but remember
2426  * that a lot of code is called recursively at different levels, so
2427  * it's better not to indent rather than create output that indents
2428  * and outdents weirdly.  Add newlines to make the output look nice.
2429  */
2430 
2431 
2432 /*
2433  * Visibility of objects for XML mappings; see SQL/XML:2008 section
2434  * 4.10.8.
2435  */
2436 
2437 /*
2438  * Given a query, which must return type oid as first column, produce
2439  * a list of Oids with the query results.
2440  */
2441 static List *
query_to_oid_list(const char * query)2442 query_to_oid_list(const char *query)
2443 {
2444 	uint64		i;
2445 	List	   *list = NIL;
2446 	int			spi_result;
2447 
2448 	spi_result = SPI_execute(query, true, 0);
2449 	if (spi_result != SPI_OK_SELECT)
2450 		elog(ERROR, "SPI_execute returned %s for %s",
2451 			 SPI_result_code_string(spi_result), query);
2452 
2453 	for (i = 0; i < SPI_processed; i++)
2454 	{
2455 		Datum		oid;
2456 		bool		isnull;
2457 
2458 		oid = SPI_getbinval(SPI_tuptable->vals[i],
2459 							SPI_tuptable->tupdesc,
2460 							1,
2461 							&isnull);
2462 		if (!isnull)
2463 			list = lappend_oid(list, DatumGetObjectId(oid));
2464 	}
2465 
2466 	return list;
2467 }
2468 
2469 
2470 static List *
schema_get_xml_visible_tables(Oid nspid)2471 schema_get_xml_visible_tables(Oid nspid)
2472 {
2473 	StringInfoData query;
2474 
2475 	initStringInfo(&query);
2476 	appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2477 					 " WHERE relnamespace = %u AND relkind IN ("
2478 					 CppAsString2(RELKIND_RELATION) ","
2479 					 CppAsString2(RELKIND_MATVIEW) ","
2480 					 CppAsString2(RELKIND_VIEW) ")"
2481 					 " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2482 					 " ORDER BY relname;", nspid);
2483 
2484 	return query_to_oid_list(query.data);
2485 }
2486 
2487 
2488 /*
2489  * Including the system schemas is probably not useful for a database
2490  * mapping.
2491  */
2492 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2493 
2494 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2495 
2496 
2497 static List *
database_get_xml_visible_schemas(void)2498 database_get_xml_visible_schemas(void)
2499 {
2500 	return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2501 }
2502 
2503 
2504 static List *
database_get_xml_visible_tables(void)2505 database_get_xml_visible_tables(void)
2506 {
2507 	/* At the moment there is no order required here. */
2508 	return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2509 							 " WHERE relkind IN ("
2510 							 CppAsString2(RELKIND_RELATION) ","
2511 							 CppAsString2(RELKIND_MATVIEW) ","
2512 							 CppAsString2(RELKIND_VIEW) ")"
2513 							 " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2514 							 " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2515 }
2516 
2517 
2518 /*
2519  * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2520  * section 9.11.
2521  */
2522 
2523 static StringInfo
table_to_xml_internal(Oid relid,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2524 table_to_xml_internal(Oid relid,
2525 					  const char *xmlschema, bool nulls, bool tableforest,
2526 					  const char *targetns, bool top_level)
2527 {
2528 	StringInfoData query;
2529 
2530 	initStringInfo(&query);
2531 	appendStringInfo(&query, "SELECT * FROM %s",
2532 					 DatumGetCString(DirectFunctionCall1(regclassout,
2533 														 ObjectIdGetDatum(relid))));
2534 	return query_to_xml_internal(query.data, get_rel_name(relid),
2535 								 xmlschema, nulls, tableforest,
2536 								 targetns, top_level);
2537 }
2538 
2539 
2540 Datum
table_to_xml(PG_FUNCTION_ARGS)2541 table_to_xml(PG_FUNCTION_ARGS)
2542 {
2543 	Oid			relid = PG_GETARG_OID(0);
2544 	bool		nulls = PG_GETARG_BOOL(1);
2545 	bool		tableforest = PG_GETARG_BOOL(2);
2546 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2547 
2548 	PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2549 																nulls, tableforest,
2550 																targetns, true)));
2551 }
2552 
2553 
2554 Datum
query_to_xml(PG_FUNCTION_ARGS)2555 query_to_xml(PG_FUNCTION_ARGS)
2556 {
2557 	char	   *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2558 	bool		nulls = PG_GETARG_BOOL(1);
2559 	bool		tableforest = PG_GETARG_BOOL(2);
2560 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2561 
2562 	PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2563 																NULL, nulls, tableforest,
2564 																targetns, true)));
2565 }
2566 
2567 
2568 Datum
cursor_to_xml(PG_FUNCTION_ARGS)2569 cursor_to_xml(PG_FUNCTION_ARGS)
2570 {
2571 	char	   *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2572 	int32		count = PG_GETARG_INT32(1);
2573 	bool		nulls = PG_GETARG_BOOL(2);
2574 	bool		tableforest = PG_GETARG_BOOL(3);
2575 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2576 
2577 	StringInfoData result;
2578 	Portal		portal;
2579 	uint64		i;
2580 
2581 	initStringInfo(&result);
2582 
2583 	if (!tableforest)
2584 	{
2585 		xmldata_root_element_start(&result, "table", NULL, targetns, true);
2586 		appendStringInfoChar(&result, '\n');
2587 	}
2588 
2589 	SPI_connect();
2590 	portal = SPI_cursor_find(name);
2591 	if (portal == NULL)
2592 		ereport(ERROR,
2593 				(errcode(ERRCODE_UNDEFINED_CURSOR),
2594 				 errmsg("cursor \"%s\" does not exist", name)));
2595 
2596 	SPI_cursor_fetch(portal, true, count);
2597 	for (i = 0; i < SPI_processed; i++)
2598 		SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2599 								  tableforest, targetns, true);
2600 
2601 	SPI_finish();
2602 
2603 	if (!tableforest)
2604 		xmldata_root_element_end(&result, "table");
2605 
2606 	PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2607 }
2608 
2609 
2610 /*
2611  * Write the start tag of the root element of a data mapping.
2612  *
2613  * top_level means that this is the very top level of the eventual
2614  * output.  For example, when the user calls table_to_xml, then a call
2615  * with a table name to this function is the top level.  When the user
2616  * calls database_to_xml, then a call with a schema name to this
2617  * function is not the top level.  If top_level is false, then the XML
2618  * namespace declarations are omitted, because they supposedly already
2619  * appeared earlier in the output.  Repeating them is not wrong, but
2620  * it looks ugly.
2621  */
2622 static void
xmldata_root_element_start(StringInfo result,const char * eltname,const char * xmlschema,const char * targetns,bool top_level)2623 xmldata_root_element_start(StringInfo result, const char *eltname,
2624 						   const char *xmlschema, const char *targetns,
2625 						   bool top_level)
2626 {
2627 	/* This isn't really wrong but currently makes no sense. */
2628 	Assert(top_level || !xmlschema);
2629 
2630 	appendStringInfo(result, "<%s", eltname);
2631 	if (top_level)
2632 	{
2633 		appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2634 		if (strlen(targetns) > 0)
2635 			appendStringInfo(result, " xmlns=\"%s\"", targetns);
2636 	}
2637 	if (xmlschema)
2638 	{
2639 		/* FIXME: better targets */
2640 		if (strlen(targetns) > 0)
2641 			appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2642 		else
2643 			appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2644 	}
2645 	appendStringInfoString(result, ">\n");
2646 }
2647 
2648 
2649 static void
xmldata_root_element_end(StringInfo result,const char * eltname)2650 xmldata_root_element_end(StringInfo result, const char *eltname)
2651 {
2652 	appendStringInfo(result, "</%s>\n", eltname);
2653 }
2654 
2655 
2656 static StringInfo
query_to_xml_internal(const char * query,char * tablename,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2657 query_to_xml_internal(const char *query, char *tablename,
2658 					  const char *xmlschema, bool nulls, bool tableforest,
2659 					  const char *targetns, bool top_level)
2660 {
2661 	StringInfo	result;
2662 	char	   *xmltn;
2663 	uint64		i;
2664 
2665 	if (tablename)
2666 		xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2667 	else
2668 		xmltn = "table";
2669 
2670 	result = makeStringInfo();
2671 
2672 	SPI_connect();
2673 	if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2674 		ereport(ERROR,
2675 				(errcode(ERRCODE_DATA_EXCEPTION),
2676 				 errmsg("invalid query")));
2677 
2678 	if (!tableforest)
2679 	{
2680 		xmldata_root_element_start(result, xmltn, xmlschema,
2681 								   targetns, top_level);
2682 		appendStringInfoChar(result, '\n');
2683 	}
2684 
2685 	if (xmlschema)
2686 		appendStringInfo(result, "%s\n\n", xmlschema);
2687 
2688 	for (i = 0; i < SPI_processed; i++)
2689 		SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2690 								  tableforest, targetns, top_level);
2691 
2692 	if (!tableforest)
2693 		xmldata_root_element_end(result, xmltn);
2694 
2695 	SPI_finish();
2696 
2697 	return result;
2698 }
2699 
2700 
2701 Datum
table_to_xmlschema(PG_FUNCTION_ARGS)2702 table_to_xmlschema(PG_FUNCTION_ARGS)
2703 {
2704 	Oid			relid = PG_GETARG_OID(0);
2705 	bool		nulls = PG_GETARG_BOOL(1);
2706 	bool		tableforest = PG_GETARG_BOOL(2);
2707 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2708 	const char *result;
2709 	Relation	rel;
2710 
2711 	rel = table_open(relid, AccessShareLock);
2712 	result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2713 										tableforest, targetns);
2714 	table_close(rel, NoLock);
2715 
2716 	PG_RETURN_XML_P(cstring_to_xmltype(result));
2717 }
2718 
2719 
2720 Datum
query_to_xmlschema(PG_FUNCTION_ARGS)2721 query_to_xmlschema(PG_FUNCTION_ARGS)
2722 {
2723 	char	   *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2724 	bool		nulls = PG_GETARG_BOOL(1);
2725 	bool		tableforest = PG_GETARG_BOOL(2);
2726 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2727 	const char *result;
2728 	SPIPlanPtr	plan;
2729 	Portal		portal;
2730 
2731 	SPI_connect();
2732 
2733 	if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2734 		elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2735 
2736 	if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2737 		elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2738 
2739 	result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2740 													InvalidOid, nulls,
2741 													tableforest, targetns));
2742 	SPI_cursor_close(portal);
2743 	SPI_finish();
2744 
2745 	PG_RETURN_XML_P(cstring_to_xmltype(result));
2746 }
2747 
2748 
2749 Datum
cursor_to_xmlschema(PG_FUNCTION_ARGS)2750 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2751 {
2752 	char	   *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2753 	bool		nulls = PG_GETARG_BOOL(1);
2754 	bool		tableforest = PG_GETARG_BOOL(2);
2755 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2756 	const char *xmlschema;
2757 	Portal		portal;
2758 
2759 	SPI_connect();
2760 	portal = SPI_cursor_find(name);
2761 	if (portal == NULL)
2762 		ereport(ERROR,
2763 				(errcode(ERRCODE_UNDEFINED_CURSOR),
2764 				 errmsg("cursor \"%s\" does not exist", name)));
2765 
2766 	xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2767 													   InvalidOid, nulls,
2768 													   tableforest, targetns));
2769 	SPI_finish();
2770 
2771 	PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2772 }
2773 
2774 
2775 Datum
table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2776 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2777 {
2778 	Oid			relid = PG_GETARG_OID(0);
2779 	bool		nulls = PG_GETARG_BOOL(1);
2780 	bool		tableforest = PG_GETARG_BOOL(2);
2781 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2782 	Relation	rel;
2783 	const char *xmlschema;
2784 
2785 	rel = table_open(relid, AccessShareLock);
2786 	xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2787 										   tableforest, targetns);
2788 	table_close(rel, NoLock);
2789 
2790 	PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2791 																xmlschema, nulls, tableforest,
2792 																targetns, true)));
2793 }
2794 
2795 
2796 Datum
query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2797 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2798 {
2799 	char	   *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2800 	bool		nulls = PG_GETARG_BOOL(1);
2801 	bool		tableforest = PG_GETARG_BOOL(2);
2802 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2803 
2804 	const char *xmlschema;
2805 	SPIPlanPtr	plan;
2806 	Portal		portal;
2807 
2808 	SPI_connect();
2809 
2810 	if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2811 		elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2812 
2813 	if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2814 		elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2815 
2816 	xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2817 													   InvalidOid, nulls, tableforest, targetns));
2818 	SPI_cursor_close(portal);
2819 	SPI_finish();
2820 
2821 	PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2822 																xmlschema, nulls, tableforest,
2823 																targetns, true)));
2824 }
2825 
2826 
2827 /*
2828  * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2829  * sections 9.13, 9.14.
2830  */
2831 
2832 static StringInfo
schema_to_xml_internal(Oid nspid,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2833 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2834 					   bool tableforest, const char *targetns, bool top_level)
2835 {
2836 	StringInfo	result;
2837 	char	   *xmlsn;
2838 	List	   *relid_list;
2839 	ListCell   *cell;
2840 
2841 	xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2842 										   true, false);
2843 	result = makeStringInfo();
2844 
2845 	xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2846 	appendStringInfoChar(result, '\n');
2847 
2848 	if (xmlschema)
2849 		appendStringInfo(result, "%s\n\n", xmlschema);
2850 
2851 	SPI_connect();
2852 
2853 	relid_list = schema_get_xml_visible_tables(nspid);
2854 
2855 	foreach(cell, relid_list)
2856 	{
2857 		Oid			relid = lfirst_oid(cell);
2858 		StringInfo	subres;
2859 
2860 		subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2861 									   targetns, false);
2862 
2863 		appendBinaryStringInfo(result, subres->data, subres->len);
2864 		appendStringInfoChar(result, '\n');
2865 	}
2866 
2867 	SPI_finish();
2868 
2869 	xmldata_root_element_end(result, xmlsn);
2870 
2871 	return result;
2872 }
2873 
2874 
2875 Datum
schema_to_xml(PG_FUNCTION_ARGS)2876 schema_to_xml(PG_FUNCTION_ARGS)
2877 {
2878 	Name		name = PG_GETARG_NAME(0);
2879 	bool		nulls = PG_GETARG_BOOL(1);
2880 	bool		tableforest = PG_GETARG_BOOL(2);
2881 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2882 
2883 	char	   *schemaname;
2884 	Oid			nspid;
2885 
2886 	schemaname = NameStr(*name);
2887 	nspid = LookupExplicitNamespace(schemaname, false);
2888 
2889 	PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2890 																 nulls, tableforest, targetns, true)));
2891 }
2892 
2893 
2894 /*
2895  * Write the start element of the root element of an XML Schema mapping.
2896  */
2897 static void
xsd_schema_element_start(StringInfo result,const char * targetns)2898 xsd_schema_element_start(StringInfo result, const char *targetns)
2899 {
2900 	appendStringInfoString(result,
2901 						   "<xsd:schema\n"
2902 						   "    xmlns:xsd=\"" NAMESPACE_XSD "\"");
2903 	if (strlen(targetns) > 0)
2904 		appendStringInfo(result,
2905 						 "\n"
2906 						 "    targetNamespace=\"%s\"\n"
2907 						 "    elementFormDefault=\"qualified\"",
2908 						 targetns);
2909 	appendStringInfoString(result,
2910 						   ">\n\n");
2911 }
2912 
2913 
2914 static void
xsd_schema_element_end(StringInfo result)2915 xsd_schema_element_end(StringInfo result)
2916 {
2917 	appendStringInfoString(result, "</xsd:schema>");
2918 }
2919 
2920 
2921 static StringInfo
schema_to_xmlschema_internal(const char * schemaname,bool nulls,bool tableforest,const char * targetns)2922 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2923 							 bool tableforest, const char *targetns)
2924 {
2925 	Oid			nspid;
2926 	List	   *relid_list;
2927 	List	   *tupdesc_list;
2928 	ListCell   *cell;
2929 	StringInfo	result;
2930 
2931 	result = makeStringInfo();
2932 
2933 	nspid = LookupExplicitNamespace(schemaname, false);
2934 
2935 	xsd_schema_element_start(result, targetns);
2936 
2937 	SPI_connect();
2938 
2939 	relid_list = schema_get_xml_visible_tables(nspid);
2940 
2941 	tupdesc_list = NIL;
2942 	foreach(cell, relid_list)
2943 	{
2944 		Relation	rel;
2945 
2946 		rel = table_open(lfirst_oid(cell), AccessShareLock);
2947 		tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2948 		table_close(rel, NoLock);
2949 	}
2950 
2951 	appendStringInfoString(result,
2952 						   map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2953 
2954 	appendStringInfoString(result,
2955 						   map_sql_schema_to_xmlschema_types(nspid, relid_list,
2956 															 nulls, tableforest, targetns));
2957 
2958 	xsd_schema_element_end(result);
2959 
2960 	SPI_finish();
2961 
2962 	return result;
2963 }
2964 
2965 
2966 Datum
schema_to_xmlschema(PG_FUNCTION_ARGS)2967 schema_to_xmlschema(PG_FUNCTION_ARGS)
2968 {
2969 	Name		name = PG_GETARG_NAME(0);
2970 	bool		nulls = PG_GETARG_BOOL(1);
2971 	bool		tableforest = PG_GETARG_BOOL(2);
2972 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2973 
2974 	PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2975 																	   nulls, tableforest, targetns)));
2976 }
2977 
2978 
2979 Datum
schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2980 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2981 {
2982 	Name		name = PG_GETARG_NAME(0);
2983 	bool		nulls = PG_GETARG_BOOL(1);
2984 	bool		tableforest = PG_GETARG_BOOL(2);
2985 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2986 	char	   *schemaname;
2987 	Oid			nspid;
2988 	StringInfo	xmlschema;
2989 
2990 	schemaname = NameStr(*name);
2991 	nspid = LookupExplicitNamespace(schemaname, false);
2992 
2993 	xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2994 											 tableforest, targetns);
2995 
2996 	PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2997 																 xmlschema->data, nulls,
2998 																 tableforest, targetns, true)));
2999 }
3000 
3001 
3002 /*
3003  * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3004  * sections 9.16, 9.17.
3005  */
3006 
3007 static StringInfo
database_to_xml_internal(const char * xmlschema,bool nulls,bool tableforest,const char * targetns)3008 database_to_xml_internal(const char *xmlschema, bool nulls,
3009 						 bool tableforest, const char *targetns)
3010 {
3011 	StringInfo	result;
3012 	List	   *nspid_list;
3013 	ListCell   *cell;
3014 	char	   *xmlcn;
3015 
3016 	xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3017 										   true, false);
3018 	result = makeStringInfo();
3019 
3020 	xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3021 	appendStringInfoChar(result, '\n');
3022 
3023 	if (xmlschema)
3024 		appendStringInfo(result, "%s\n\n", xmlschema);
3025 
3026 	SPI_connect();
3027 
3028 	nspid_list = database_get_xml_visible_schemas();
3029 
3030 	foreach(cell, nspid_list)
3031 	{
3032 		Oid			nspid = lfirst_oid(cell);
3033 		StringInfo	subres;
3034 
3035 		subres = schema_to_xml_internal(nspid, NULL, nulls,
3036 										tableforest, targetns, false);
3037 
3038 		appendBinaryStringInfo(result, subres->data, subres->len);
3039 		appendStringInfoChar(result, '\n');
3040 	}
3041 
3042 	SPI_finish();
3043 
3044 	xmldata_root_element_end(result, xmlcn);
3045 
3046 	return result;
3047 }
3048 
3049 
3050 Datum
database_to_xml(PG_FUNCTION_ARGS)3051 database_to_xml(PG_FUNCTION_ARGS)
3052 {
3053 	bool		nulls = PG_GETARG_BOOL(0);
3054 	bool		tableforest = PG_GETARG_BOOL(1);
3055 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3056 
3057 	PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3058 																   tableforest, targetns)));
3059 }
3060 
3061 
3062 static StringInfo
database_to_xmlschema_internal(bool nulls,bool tableforest,const char * targetns)3063 database_to_xmlschema_internal(bool nulls, bool tableforest,
3064 							   const char *targetns)
3065 {
3066 	List	   *relid_list;
3067 	List	   *nspid_list;
3068 	List	   *tupdesc_list;
3069 	ListCell   *cell;
3070 	StringInfo	result;
3071 
3072 	result = makeStringInfo();
3073 
3074 	xsd_schema_element_start(result, targetns);
3075 
3076 	SPI_connect();
3077 
3078 	relid_list = database_get_xml_visible_tables();
3079 	nspid_list = database_get_xml_visible_schemas();
3080 
3081 	tupdesc_list = NIL;
3082 	foreach(cell, relid_list)
3083 	{
3084 		Relation	rel;
3085 
3086 		rel = table_open(lfirst_oid(cell), AccessShareLock);
3087 		tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3088 		table_close(rel, NoLock);
3089 	}
3090 
3091 	appendStringInfoString(result,
3092 						   map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3093 
3094 	appendStringInfoString(result,
3095 						   map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3096 
3097 	xsd_schema_element_end(result);
3098 
3099 	SPI_finish();
3100 
3101 	return result;
3102 }
3103 
3104 
3105 Datum
database_to_xmlschema(PG_FUNCTION_ARGS)3106 database_to_xmlschema(PG_FUNCTION_ARGS)
3107 {
3108 	bool		nulls = PG_GETARG_BOOL(0);
3109 	bool		tableforest = PG_GETARG_BOOL(1);
3110 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3111 
3112 	PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3113 																		 tableforest, targetns)));
3114 }
3115 
3116 
3117 Datum
database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)3118 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3119 {
3120 	bool		nulls = PG_GETARG_BOOL(0);
3121 	bool		tableforest = PG_GETARG_BOOL(1);
3122 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3123 	StringInfo	xmlschema;
3124 
3125 	xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3126 
3127 	PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3128 																   nulls, tableforest, targetns)));
3129 }
3130 
3131 
3132 /*
3133  * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3134  * 9.2.
3135  */
3136 static char *
map_multipart_sql_identifier_to_xml_name(const char * a,const char * b,const char * c,const char * d)3137 map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3138 {
3139 	StringInfoData result;
3140 
3141 	initStringInfo(&result);
3142 
3143 	if (a)
3144 		appendStringInfoString(&result,
3145 							   map_sql_identifier_to_xml_name(a, true, true));
3146 	if (b)
3147 		appendStringInfo(&result, ".%s",
3148 						 map_sql_identifier_to_xml_name(b, true, true));
3149 	if (c)
3150 		appendStringInfo(&result, ".%s",
3151 						 map_sql_identifier_to_xml_name(c, true, true));
3152 	if (d)
3153 		appendStringInfo(&result, ".%s",
3154 						 map_sql_identifier_to_xml_name(d, true, true));
3155 
3156 	return result.data;
3157 }
3158 
3159 
3160 /*
3161  * Map an SQL table to an XML Schema document; see SQL/XML:2008
3162  * section 9.11.
3163  *
3164  * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3165  * 9.9.
3166  */
3167 static const char *
map_sql_table_to_xmlschema(TupleDesc tupdesc,Oid relid,bool nulls,bool tableforest,const char * targetns)3168 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3169 						   bool tableforest, const char *targetns)
3170 {
3171 	int			i;
3172 	char	   *xmltn;
3173 	char	   *tabletypename;
3174 	char	   *rowtypename;
3175 	StringInfoData result;
3176 
3177 	initStringInfo(&result);
3178 
3179 	if (OidIsValid(relid))
3180 	{
3181 		HeapTuple	tuple;
3182 		Form_pg_class reltuple;
3183 
3184 		tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3185 		if (!HeapTupleIsValid(tuple))
3186 			elog(ERROR, "cache lookup failed for relation %u", relid);
3187 		reltuple = (Form_pg_class) GETSTRUCT(tuple);
3188 
3189 		xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3190 											   true, false);
3191 
3192 		tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3193 																 get_database_name(MyDatabaseId),
3194 																 get_namespace_name(reltuple->relnamespace),
3195 																 NameStr(reltuple->relname));
3196 
3197 		rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3198 															   get_database_name(MyDatabaseId),
3199 															   get_namespace_name(reltuple->relnamespace),
3200 															   NameStr(reltuple->relname));
3201 
3202 		ReleaseSysCache(tuple);
3203 	}
3204 	else
3205 	{
3206 		if (tableforest)
3207 			xmltn = "row";
3208 		else
3209 			xmltn = "table";
3210 
3211 		tabletypename = "TableType";
3212 		rowtypename = "RowType";
3213 	}
3214 
3215 	xsd_schema_element_start(&result, targetns);
3216 
3217 	appendStringInfoString(&result,
3218 						   map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3219 
3220 	appendStringInfo(&result,
3221 					 "<xsd:complexType name=\"%s\">\n"
3222 					 "  <xsd:sequence>\n",
3223 					 rowtypename);
3224 
3225 	for (i = 0; i < tupdesc->natts; i++)
3226 	{
3227 		Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3228 
3229 		if (att->attisdropped)
3230 			continue;
3231 		appendStringInfo(&result,
3232 						 "    <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3233 						 map_sql_identifier_to_xml_name(NameStr(att->attname),
3234 														true, false),
3235 						 map_sql_type_to_xml_name(att->atttypid, -1),
3236 						 nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3237 	}
3238 
3239 	appendStringInfoString(&result,
3240 						   "  </xsd:sequence>\n"
3241 						   "</xsd:complexType>\n\n");
3242 
3243 	if (!tableforest)
3244 	{
3245 		appendStringInfo(&result,
3246 						 "<xsd:complexType name=\"%s\">\n"
3247 						 "  <xsd:sequence>\n"
3248 						 "    <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3249 						 "  </xsd:sequence>\n"
3250 						 "</xsd:complexType>\n\n",
3251 						 tabletypename, rowtypename);
3252 
3253 		appendStringInfo(&result,
3254 						 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3255 						 xmltn, tabletypename);
3256 	}
3257 	else
3258 		appendStringInfo(&result,
3259 						 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3260 						 xmltn, rowtypename);
3261 
3262 	xsd_schema_element_end(&result);
3263 
3264 	return result.data;
3265 }
3266 
3267 
3268 /*
3269  * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3270  * section 9.12.
3271  */
3272 static const char *
map_sql_schema_to_xmlschema_types(Oid nspid,List * relid_list,bool nulls,bool tableforest,const char * targetns)3273 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3274 								  bool tableforest, const char *targetns)
3275 {
3276 	char	   *dbname;
3277 	char	   *nspname;
3278 	char	   *xmlsn;
3279 	char	   *schematypename;
3280 	StringInfoData result;
3281 	ListCell   *cell;
3282 
3283 	dbname = get_database_name(MyDatabaseId);
3284 	nspname = get_namespace_name(nspid);
3285 
3286 	initStringInfo(&result);
3287 
3288 	xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3289 
3290 	schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3291 															  dbname,
3292 															  nspname,
3293 															  NULL);
3294 
3295 	appendStringInfo(&result,
3296 					 "<xsd:complexType name=\"%s\">\n", schematypename);
3297 	if (!tableforest)
3298 		appendStringInfoString(&result,
3299 							   "  <xsd:all>\n");
3300 	else
3301 		appendStringInfoString(&result,
3302 							   "  <xsd:sequence>\n");
3303 
3304 	foreach(cell, relid_list)
3305 	{
3306 		Oid			relid = lfirst_oid(cell);
3307 		char	   *relname = get_rel_name(relid);
3308 		char	   *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3309 		char	   *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3310 																			 dbname,
3311 																			 nspname,
3312 																			 relname);
3313 
3314 		if (!tableforest)
3315 			appendStringInfo(&result,
3316 							 "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3317 							 xmltn, tabletypename);
3318 		else
3319 			appendStringInfo(&result,
3320 							 "    <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3321 							 xmltn, tabletypename);
3322 	}
3323 
3324 	if (!tableforest)
3325 		appendStringInfoString(&result,
3326 							   "  </xsd:all>\n");
3327 	else
3328 		appendStringInfoString(&result,
3329 							   "  </xsd:sequence>\n");
3330 	appendStringInfoString(&result,
3331 						   "</xsd:complexType>\n\n");
3332 
3333 	appendStringInfo(&result,
3334 					 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3335 					 xmlsn, schematypename);
3336 
3337 	return result.data;
3338 }
3339 
3340 
3341 /*
3342  * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3343  * section 9.15.
3344  */
3345 static const char *
map_sql_catalog_to_xmlschema_types(List * nspid_list,bool nulls,bool tableforest,const char * targetns)3346 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3347 								   bool tableforest, const char *targetns)
3348 {
3349 	char	   *dbname;
3350 	char	   *xmlcn;
3351 	char	   *catalogtypename;
3352 	StringInfoData result;
3353 	ListCell   *cell;
3354 
3355 	dbname = get_database_name(MyDatabaseId);
3356 
3357 	initStringInfo(&result);
3358 
3359 	xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3360 
3361 	catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3362 															   dbname,
3363 															   NULL,
3364 															   NULL);
3365 
3366 	appendStringInfo(&result,
3367 					 "<xsd:complexType name=\"%s\">\n", catalogtypename);
3368 	appendStringInfoString(&result,
3369 						   "  <xsd:all>\n");
3370 
3371 	foreach(cell, nspid_list)
3372 	{
3373 		Oid			nspid = lfirst_oid(cell);
3374 		char	   *nspname = get_namespace_name(nspid);
3375 		char	   *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3376 		char	   *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3377 																			  dbname,
3378 																			  nspname,
3379 																			  NULL);
3380 
3381 		appendStringInfo(&result,
3382 						 "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3383 						 xmlsn, schematypename);
3384 	}
3385 
3386 	appendStringInfoString(&result,
3387 						   "  </xsd:all>\n");
3388 	appendStringInfoString(&result,
3389 						   "</xsd:complexType>\n\n");
3390 
3391 	appendStringInfo(&result,
3392 					 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3393 					 xmlcn, catalogtypename);
3394 
3395 	return result.data;
3396 }
3397 
3398 
3399 /*
3400  * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3401  */
3402 static const char *
map_sql_type_to_xml_name(Oid typeoid,int typmod)3403 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3404 {
3405 	StringInfoData result;
3406 
3407 	initStringInfo(&result);
3408 
3409 	switch (typeoid)
3410 	{
3411 		case BPCHAROID:
3412 			if (typmod == -1)
3413 				appendStringInfoString(&result, "CHAR");
3414 			else
3415 				appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3416 			break;
3417 		case VARCHAROID:
3418 			if (typmod == -1)
3419 				appendStringInfoString(&result, "VARCHAR");
3420 			else
3421 				appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3422 			break;
3423 		case NUMERICOID:
3424 			if (typmod == -1)
3425 				appendStringInfoString(&result, "NUMERIC");
3426 			else
3427 				appendStringInfo(&result, "NUMERIC_%d_%d",
3428 								 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3429 								 (typmod - VARHDRSZ) & 0xffff);
3430 			break;
3431 		case INT4OID:
3432 			appendStringInfoString(&result, "INTEGER");
3433 			break;
3434 		case INT2OID:
3435 			appendStringInfoString(&result, "SMALLINT");
3436 			break;
3437 		case INT8OID:
3438 			appendStringInfoString(&result, "BIGINT");
3439 			break;
3440 		case FLOAT4OID:
3441 			appendStringInfoString(&result, "REAL");
3442 			break;
3443 		case FLOAT8OID:
3444 			appendStringInfoString(&result, "DOUBLE");
3445 			break;
3446 		case BOOLOID:
3447 			appendStringInfoString(&result, "BOOLEAN");
3448 			break;
3449 		case TIMEOID:
3450 			if (typmod == -1)
3451 				appendStringInfoString(&result, "TIME");
3452 			else
3453 				appendStringInfo(&result, "TIME_%d", typmod);
3454 			break;
3455 		case TIMETZOID:
3456 			if (typmod == -1)
3457 				appendStringInfoString(&result, "TIME_WTZ");
3458 			else
3459 				appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3460 			break;
3461 		case TIMESTAMPOID:
3462 			if (typmod == -1)
3463 				appendStringInfoString(&result, "TIMESTAMP");
3464 			else
3465 				appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3466 			break;
3467 		case TIMESTAMPTZOID:
3468 			if (typmod == -1)
3469 				appendStringInfoString(&result, "TIMESTAMP_WTZ");
3470 			else
3471 				appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3472 			break;
3473 		case DATEOID:
3474 			appendStringInfoString(&result, "DATE");
3475 			break;
3476 		case XMLOID:
3477 			appendStringInfoString(&result, "XML");
3478 			break;
3479 		default:
3480 			{
3481 				HeapTuple	tuple;
3482 				Form_pg_type typtuple;
3483 
3484 				tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3485 				if (!HeapTupleIsValid(tuple))
3486 					elog(ERROR, "cache lookup failed for type %u", typeoid);
3487 				typtuple = (Form_pg_type) GETSTRUCT(tuple);
3488 
3489 				appendStringInfoString(&result,
3490 									   map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3491 																				get_database_name(MyDatabaseId),
3492 																				get_namespace_name(typtuple->typnamespace),
3493 																				NameStr(typtuple->typname)));
3494 
3495 				ReleaseSysCache(tuple);
3496 			}
3497 	}
3498 
3499 	return result.data;
3500 }
3501 
3502 
3503 /*
3504  * Map a collection of SQL data types to XML Schema data types; see
3505  * SQL/XML:2008 section 9.7.
3506  */
3507 static const char *
map_sql_typecoll_to_xmlschema_types(List * tupdesc_list)3508 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3509 {
3510 	List	   *uniquetypes = NIL;
3511 	int			i;
3512 	StringInfoData result;
3513 	ListCell   *cell0;
3514 
3515 	/* extract all column types used in the set of TupleDescs */
3516 	foreach(cell0, tupdesc_list)
3517 	{
3518 		TupleDesc	tupdesc = (TupleDesc) lfirst(cell0);
3519 
3520 		for (i = 0; i < tupdesc->natts; i++)
3521 		{
3522 			Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3523 
3524 			if (att->attisdropped)
3525 				continue;
3526 			uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3527 		}
3528 	}
3529 
3530 	/* add base types of domains */
3531 	foreach(cell0, uniquetypes)
3532 	{
3533 		Oid			typid = lfirst_oid(cell0);
3534 		Oid			basetypid = getBaseType(typid);
3535 
3536 		if (basetypid != typid)
3537 			uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3538 	}
3539 
3540 	/* Convert to textual form */
3541 	initStringInfo(&result);
3542 
3543 	foreach(cell0, uniquetypes)
3544 	{
3545 		appendStringInfo(&result, "%s\n",
3546 						 map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3547 														-1));
3548 	}
3549 
3550 	return result.data;
3551 }
3552 
3553 
3554 /*
3555  * Map an SQL data type to a named XML Schema data type; see
3556  * SQL/XML:2008 sections 9.5 and 9.6.
3557  *
3558  * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3559  * a name attribute, which this function does.  The name-less version
3560  * 9.5 doesn't appear to be required anywhere.)
3561  */
3562 static const char *
map_sql_type_to_xmlschema_type(Oid typeoid,int typmod)3563 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3564 {
3565 	StringInfoData result;
3566 	const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3567 
3568 	initStringInfo(&result);
3569 
3570 	if (typeoid == XMLOID)
3571 	{
3572 		appendStringInfoString(&result,
3573 							   "<xsd:complexType mixed=\"true\">\n"
3574 							   "  <xsd:sequence>\n"
3575 							   "    <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3576 							   "  </xsd:sequence>\n"
3577 							   "</xsd:complexType>\n");
3578 	}
3579 	else
3580 	{
3581 		appendStringInfo(&result,
3582 						 "<xsd:simpleType name=\"%s\">\n", typename);
3583 
3584 		switch (typeoid)
3585 		{
3586 			case BPCHAROID:
3587 			case VARCHAROID:
3588 			case TEXTOID:
3589 				appendStringInfoString(&result,
3590 									   "  <xsd:restriction base=\"xsd:string\">\n");
3591 				if (typmod != -1)
3592 					appendStringInfo(&result,
3593 									 "    <xsd:maxLength value=\"%d\"/>\n",
3594 									 typmod - VARHDRSZ);
3595 				appendStringInfoString(&result, "  </xsd:restriction>\n");
3596 				break;
3597 
3598 			case BYTEAOID:
3599 				appendStringInfo(&result,
3600 								 "  <xsd:restriction base=\"xsd:%s\">\n"
3601 								 "  </xsd:restriction>\n",
3602 								 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3603 				break;
3604 
3605 			case NUMERICOID:
3606 				if (typmod != -1)
3607 					appendStringInfo(&result,
3608 									 "  <xsd:restriction base=\"xsd:decimal\">\n"
3609 									 "    <xsd:totalDigits value=\"%d\"/>\n"
3610 									 "    <xsd:fractionDigits value=\"%d\"/>\n"
3611 									 "  </xsd:restriction>\n",
3612 									 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3613 									 (typmod - VARHDRSZ) & 0xffff);
3614 				break;
3615 
3616 			case INT2OID:
3617 				appendStringInfo(&result,
3618 								 "  <xsd:restriction base=\"xsd:short\">\n"
3619 								 "    <xsd:maxInclusive value=\"%d\"/>\n"
3620 								 "    <xsd:minInclusive value=\"%d\"/>\n"
3621 								 "  </xsd:restriction>\n",
3622 								 SHRT_MAX, SHRT_MIN);
3623 				break;
3624 
3625 			case INT4OID:
3626 				appendStringInfo(&result,
3627 								 "  <xsd:restriction base=\"xsd:int\">\n"
3628 								 "    <xsd:maxInclusive value=\"%d\"/>\n"
3629 								 "    <xsd:minInclusive value=\"%d\"/>\n"
3630 								 "  </xsd:restriction>\n",
3631 								 INT_MAX, INT_MIN);
3632 				break;
3633 
3634 			case INT8OID:
3635 				appendStringInfo(&result,
3636 								 "  <xsd:restriction base=\"xsd:long\">\n"
3637 								 "    <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3638 								 "    <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3639 								 "  </xsd:restriction>\n",
3640 								 (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3641 								 (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3642 				break;
3643 
3644 			case FLOAT4OID:
3645 				appendStringInfoString(&result,
3646 									   "  <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3647 				break;
3648 
3649 			case FLOAT8OID:
3650 				appendStringInfoString(&result,
3651 									   "  <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3652 				break;
3653 
3654 			case BOOLOID:
3655 				appendStringInfoString(&result,
3656 									   "  <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3657 				break;
3658 
3659 			case TIMEOID:
3660 			case TIMETZOID:
3661 				{
3662 					const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3663 
3664 					if (typmod == -1)
3665 						appendStringInfo(&result,
3666 										 "  <xsd:restriction base=\"xsd:time\">\n"
3667 										 "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3668 										 "  </xsd:restriction>\n", tz);
3669 					else if (typmod == 0)
3670 						appendStringInfo(&result,
3671 										 "  <xsd:restriction base=\"xsd:time\">\n"
3672 										 "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3673 										 "  </xsd:restriction>\n", tz);
3674 					else
3675 						appendStringInfo(&result,
3676 										 "  <xsd:restriction base=\"xsd:time\">\n"
3677 										 "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3678 										 "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3679 					break;
3680 				}
3681 
3682 			case TIMESTAMPOID:
3683 			case TIMESTAMPTZOID:
3684 				{
3685 					const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3686 
3687 					if (typmod == -1)
3688 						appendStringInfo(&result,
3689 										 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3690 										 "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3691 										 "  </xsd:restriction>\n", tz);
3692 					else if (typmod == 0)
3693 						appendStringInfo(&result,
3694 										 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3695 										 "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3696 										 "  </xsd:restriction>\n", tz);
3697 					else
3698 						appendStringInfo(&result,
3699 										 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3700 										 "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3701 										 "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3702 					break;
3703 				}
3704 
3705 			case DATEOID:
3706 				appendStringInfoString(&result,
3707 									   "  <xsd:restriction base=\"xsd:date\">\n"
3708 									   "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3709 									   "  </xsd:restriction>\n");
3710 				break;
3711 
3712 			default:
3713 				if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3714 				{
3715 					Oid			base_typeoid;
3716 					int32		base_typmod = -1;
3717 
3718 					base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3719 
3720 					appendStringInfo(&result,
3721 									 "  <xsd:restriction base=\"%s\"/>\n",
3722 									 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3723 				}
3724 				break;
3725 		}
3726 		appendStringInfoString(&result, "</xsd:simpleType>\n");
3727 	}
3728 
3729 	return result.data;
3730 }
3731 
3732 
3733 /*
3734  * Map an SQL row to an XML element, taking the row from the active
3735  * SPI cursor.  See also SQL/XML:2008 section 9.10.
3736  */
3737 static void
SPI_sql_row_to_xmlelement(uint64 rownum,StringInfo result,char * tablename,bool nulls,bool tableforest,const char * targetns,bool top_level)3738 SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3739 						  bool nulls, bool tableforest,
3740 						  const char *targetns, bool top_level)
3741 {
3742 	int			i;
3743 	char	   *xmltn;
3744 
3745 	if (tablename)
3746 		xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3747 	else
3748 	{
3749 		if (tableforest)
3750 			xmltn = "row";
3751 		else
3752 			xmltn = "table";
3753 	}
3754 
3755 	if (tableforest)
3756 		xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3757 	else
3758 		appendStringInfoString(result, "<row>\n");
3759 
3760 	for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3761 	{
3762 		char	   *colname;
3763 		Datum		colval;
3764 		bool		isnull;
3765 
3766 		colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3767 												 true, false);
3768 		colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3769 							   SPI_tuptable->tupdesc,
3770 							   i,
3771 							   &isnull);
3772 		if (isnull)
3773 		{
3774 			if (nulls)
3775 				appendStringInfo(result, "  <%s xsi:nil=\"true\"/>\n", colname);
3776 		}
3777 		else
3778 			appendStringInfo(result, "  <%s>%s</%s>\n",
3779 							 colname,
3780 							 map_sql_value_to_xml_value(colval,
3781 														SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3782 							 colname);
3783 	}
3784 
3785 	if (tableforest)
3786 	{
3787 		xmldata_root_element_end(result, xmltn);
3788 		appendStringInfoChar(result, '\n');
3789 	}
3790 	else
3791 		appendStringInfoString(result, "</row>\n\n");
3792 }
3793 
3794 
3795 /*
3796  * XPath related functions
3797  */
3798 
3799 #ifdef USE_LIBXML
3800 
3801 /*
3802  * Convert XML node to text.
3803  *
3804  * For attribute and text nodes, return the escaped text.  For anything else,
3805  * dump the whole subtree.
3806  */
3807 static text *
xml_xmlnodetoxmltype(xmlNodePtr cur,PgXmlErrorContext * xmlerrcxt)3808 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3809 {
3810 	xmltype    *result = NULL;
3811 
3812 	if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
3813 	{
3814 		void		(*volatile nodefree) (xmlNodePtr) = NULL;
3815 		volatile xmlBufferPtr buf = NULL;
3816 		volatile xmlNodePtr cur_copy = NULL;
3817 
3818 		PG_TRY();
3819 		{
3820 			int			bytes;
3821 
3822 			buf = xmlBufferCreate();
3823 			if (buf == NULL || xmlerrcxt->err_occurred)
3824 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3825 							"could not allocate xmlBuffer");
3826 
3827 			/*
3828 			 * Produce a dump of the node that we can serialize.  xmlNodeDump
3829 			 * does that, but the result of that function won't contain
3830 			 * namespace definitions from ancestor nodes, so we first do a
3831 			 * xmlCopyNode() which duplicates the node along with its required
3832 			 * namespace definitions.
3833 			 *
3834 			 * Some old libxml2 versions such as 2.7.6 produce partially
3835 			 * broken XML_DOCUMENT_NODE nodes (unset content field) when
3836 			 * copying them.  xmlNodeDump of such a node works fine, but
3837 			 * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
3838 			 */
3839 			cur_copy = xmlCopyNode(cur, 1);
3840 			if (cur_copy == NULL || xmlerrcxt->err_occurred)
3841 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3842 							"could not copy node");
3843 			nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
3844 				(void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
3845 
3846 			bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
3847 			if (bytes == -1 || xmlerrcxt->err_occurred)
3848 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3849 							"could not dump node");
3850 
3851 			result = xmlBuffer_to_xmltype(buf);
3852 		}
3853 		PG_FINALLY();
3854 		{
3855 			if (nodefree)
3856 				nodefree(cur_copy);
3857 			if (buf)
3858 				xmlBufferFree(buf);
3859 		}
3860 		PG_END_TRY();
3861 	}
3862 	else
3863 	{
3864 		xmlChar    *str;
3865 
3866 		str = xmlXPathCastNodeToString(cur);
3867 		PG_TRY();
3868 		{
3869 			/* Here we rely on XML having the same representation as TEXT */
3870 			char	   *escaped = escape_xml((char *) str);
3871 
3872 			result = (xmltype *) cstring_to_text(escaped);
3873 			pfree(escaped);
3874 		}
3875 		PG_FINALLY();
3876 		{
3877 			xmlFree(str);
3878 		}
3879 		PG_END_TRY();
3880 	}
3881 
3882 	return result;
3883 }
3884 
3885 /*
3886  * Convert an XML XPath object (the result of evaluating an XPath expression)
3887  * to an array of xml values, which are appended to astate.  The function
3888  * result value is the number of elements in the array.
3889  *
3890  * If "astate" is NULL then we don't generate the array value, but we still
3891  * return the number of elements it would have had.
3892  *
3893  * Nodesets are converted to an array containing the nodes' textual
3894  * representations.  Primitive values (float, double, string) are converted
3895  * to a single-element array containing the value's string representation.
3896  */
3897 static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,ArrayBuildState * astate,PgXmlErrorContext * xmlerrcxt)3898 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3899 					   ArrayBuildState *astate,
3900 					   PgXmlErrorContext *xmlerrcxt)
3901 {
3902 	int			result = 0;
3903 	Datum		datum;
3904 	Oid			datumtype;
3905 	char	   *result_str;
3906 
3907 	switch (xpathobj->type)
3908 	{
3909 		case XPATH_NODESET:
3910 			if (xpathobj->nodesetval != NULL)
3911 			{
3912 				result = xpathobj->nodesetval->nodeNr;
3913 				if (astate != NULL)
3914 				{
3915 					int			i;
3916 
3917 					for (i = 0; i < result; i++)
3918 					{
3919 						datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3920 																	 xmlerrcxt));
3921 						(void) accumArrayResult(astate, datum, false,
3922 												XMLOID, CurrentMemoryContext);
3923 					}
3924 				}
3925 			}
3926 			return result;
3927 
3928 		case XPATH_BOOLEAN:
3929 			if (astate == NULL)
3930 				return 1;
3931 			datum = BoolGetDatum(xpathobj->boolval);
3932 			datumtype = BOOLOID;
3933 			break;
3934 
3935 		case XPATH_NUMBER:
3936 			if (astate == NULL)
3937 				return 1;
3938 			datum = Float8GetDatum(xpathobj->floatval);
3939 			datumtype = FLOAT8OID;
3940 			break;
3941 
3942 		case XPATH_STRING:
3943 			if (astate == NULL)
3944 				return 1;
3945 			datum = CStringGetDatum((char *) xpathobj->stringval);
3946 			datumtype = CSTRINGOID;
3947 			break;
3948 
3949 		default:
3950 			elog(ERROR, "xpath expression result type %d is unsupported",
3951 				 xpathobj->type);
3952 			return 0;			/* keep compiler quiet */
3953 	}
3954 
3955 	/* Common code for scalar-value cases */
3956 	result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3957 	datum = PointerGetDatum(cstring_to_xmltype(result_str));
3958 	(void) accumArrayResult(astate, datum, false,
3959 							XMLOID, CurrentMemoryContext);
3960 	return 1;
3961 }
3962 
3963 
3964 /*
3965  * Common code for xpath() and xmlexists()
3966  *
3967  * Evaluate XPath expression and return number of nodes in res_nitems
3968  * and array of XML values in astate.  Either of those pointers can be
3969  * NULL if the corresponding result isn't wanted.
3970  *
3971  * It is up to the user to ensure that the XML passed is in fact
3972  * an XML document - XPath doesn't work easily on fragments without
3973  * a context node being known.
3974  */
3975 static void
xpath_internal(text * xpath_expr_text,xmltype * data,ArrayType * namespaces,int * res_nitems,ArrayBuildState * astate)3976 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3977 			   int *res_nitems, ArrayBuildState *astate)
3978 {
3979 	PgXmlErrorContext *xmlerrcxt;
3980 	volatile xmlParserCtxtPtr ctxt = NULL;
3981 	volatile xmlDocPtr doc = NULL;
3982 	volatile xmlXPathContextPtr xpathctx = NULL;
3983 	volatile xmlXPathCompExprPtr xpathcomp = NULL;
3984 	volatile xmlXPathObjectPtr xpathobj = NULL;
3985 	char	   *datastr;
3986 	int32		len;
3987 	int32		xpath_len;
3988 	xmlChar    *string;
3989 	xmlChar    *xpath_expr;
3990 	size_t		xmldecl_len = 0;
3991 	int			i;
3992 	int			ndim;
3993 	Datum	   *ns_names_uris;
3994 	bool	   *ns_names_uris_nulls;
3995 	int			ns_count;
3996 
3997 	/*
3998 	 * Namespace mappings are passed as text[].  If an empty array is passed
3999 	 * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4000 	 * Else, a 2-dimensional array with length of the second axis being equal
4001 	 * to 2 should be passed, i.e., every subarray contains 2 elements, the
4002 	 * first element defining the name, the second one the URI.  Example:
4003 	 * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4004 	 * 'http://example2.com']].
4005 	 */
4006 	ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4007 	if (ndim != 0)
4008 	{
4009 		int		   *dims;
4010 
4011 		dims = ARR_DIMS(namespaces);
4012 
4013 		if (ndim != 2 || dims[1] != 2)
4014 			ereport(ERROR,
4015 					(errcode(ERRCODE_DATA_EXCEPTION),
4016 					 errmsg("invalid array for XML namespace mapping"),
4017 					 errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4018 
4019 		Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4020 
4021 		deconstruct_array(namespaces, TEXTOID, -1, false, TYPALIGN_INT,
4022 						  &ns_names_uris, &ns_names_uris_nulls,
4023 						  &ns_count);
4024 
4025 		Assert((ns_count % 2) == 0);	/* checked above */
4026 		ns_count /= 2;			/* count pairs only */
4027 	}
4028 	else
4029 	{
4030 		ns_names_uris = NULL;
4031 		ns_names_uris_nulls = NULL;
4032 		ns_count = 0;
4033 	}
4034 
4035 	datastr = VARDATA(data);
4036 	len = VARSIZE(data) - VARHDRSZ;
4037 	xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4038 	if (xpath_len == 0)
4039 		ereport(ERROR,
4040 				(errcode(ERRCODE_DATA_EXCEPTION),
4041 				 errmsg("empty XPath expression")));
4042 
4043 	string = pg_xmlCharStrndup(datastr, len);
4044 	xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4045 
4046 	/*
4047 	 * In a UTF8 database, skip any xml declaration, which might assert
4048 	 * another encoding.  Ignore parse_xml_decl() failure, letting
4049 	 * xmlCtxtReadMemory() report parse errors.  Documentation disclaims
4050 	 * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4051 	 * those scenarios bug-compatible with historical behavior.
4052 	 */
4053 	if (GetDatabaseEncoding() == PG_UTF8)
4054 		parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4055 
4056 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4057 
4058 	PG_TRY();
4059 	{
4060 		xmlInitParser();
4061 
4062 		/*
4063 		 * redundant XML parsing (two parsings for the same value during one
4064 		 * command execution are possible)
4065 		 */
4066 		ctxt = xmlNewParserCtxt();
4067 		if (ctxt == NULL || xmlerrcxt->err_occurred)
4068 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4069 						"could not allocate parser context");
4070 		doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4071 								len - xmldecl_len, NULL, NULL, 0);
4072 		if (doc == NULL || xmlerrcxt->err_occurred)
4073 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4074 						"could not parse XML document");
4075 		xpathctx = xmlXPathNewContext(doc);
4076 		if (xpathctx == NULL || xmlerrcxt->err_occurred)
4077 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4078 						"could not allocate XPath context");
4079 		xpathctx->node = (xmlNodePtr) doc;
4080 
4081 		/* register namespaces, if any */
4082 		if (ns_count > 0)
4083 		{
4084 			for (i = 0; i < ns_count; i++)
4085 			{
4086 				char	   *ns_name;
4087 				char	   *ns_uri;
4088 
4089 				if (ns_names_uris_nulls[i * 2] ||
4090 					ns_names_uris_nulls[i * 2 + 1])
4091 					ereport(ERROR,
4092 							(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4093 							 errmsg("neither namespace name nor URI may be null")));
4094 				ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4095 				ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4096 				if (xmlXPathRegisterNs(xpathctx,
4097 									   (xmlChar *) ns_name,
4098 									   (xmlChar *) ns_uri) != 0)
4099 					ereport(ERROR,	/* is this an internal error??? */
4100 							(errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4101 									ns_name, ns_uri)));
4102 			}
4103 		}
4104 
4105 		xpathcomp = xmlXPathCompile(xpath_expr);
4106 		if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4107 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4108 						"invalid XPath expression");
4109 
4110 		/*
4111 		 * Version 2.6.27 introduces a function named
4112 		 * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4113 		 * but we can derive the existence by whether any nodes are returned,
4114 		 * thereby preventing a library version upgrade and keeping the code
4115 		 * the same.
4116 		 */
4117 		xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4118 		if (xpathobj == NULL || xmlerrcxt->err_occurred)
4119 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4120 						"could not create XPath object");
4121 
4122 		/*
4123 		 * Extract the results as requested.
4124 		 */
4125 		if (res_nitems != NULL)
4126 			*res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4127 		else
4128 			(void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4129 	}
4130 	PG_CATCH();
4131 	{
4132 		if (xpathobj)
4133 			xmlXPathFreeObject(xpathobj);
4134 		if (xpathcomp)
4135 			xmlXPathFreeCompExpr(xpathcomp);
4136 		if (xpathctx)
4137 			xmlXPathFreeContext(xpathctx);
4138 		if (doc)
4139 			xmlFreeDoc(doc);
4140 		if (ctxt)
4141 			xmlFreeParserCtxt(ctxt);
4142 
4143 		pg_xml_done(xmlerrcxt, true);
4144 
4145 		PG_RE_THROW();
4146 	}
4147 	PG_END_TRY();
4148 
4149 	xmlXPathFreeObject(xpathobj);
4150 	xmlXPathFreeCompExpr(xpathcomp);
4151 	xmlXPathFreeContext(xpathctx);
4152 	xmlFreeDoc(doc);
4153 	xmlFreeParserCtxt(ctxt);
4154 
4155 	pg_xml_done(xmlerrcxt, false);
4156 }
4157 #endif							/* USE_LIBXML */
4158 
4159 /*
4160  * Evaluate XPath expression and return array of XML values.
4161  *
4162  * As we have no support of XQuery sequences yet, this function seems
4163  * to be the most useful one (array of XML functions plays a role of
4164  * some kind of substitution for XQuery sequences).
4165  */
4166 Datum
xpath(PG_FUNCTION_ARGS)4167 xpath(PG_FUNCTION_ARGS)
4168 {
4169 #ifdef USE_LIBXML
4170 	text	   *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4171 	xmltype    *data = PG_GETARG_XML_P(1);
4172 	ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4173 	ArrayBuildState *astate;
4174 
4175 	astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4176 	xpath_internal(xpath_expr_text, data, namespaces,
4177 				   NULL, astate);
4178 	PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4179 #else
4180 	NO_XML_SUPPORT();
4181 	return 0;
4182 #endif
4183 }
4184 
4185 /*
4186  * Determines if the node specified by the supplied XPath exists
4187  * in a given XML document, returning a boolean.
4188  */
4189 Datum
xmlexists(PG_FUNCTION_ARGS)4190 xmlexists(PG_FUNCTION_ARGS)
4191 {
4192 #ifdef USE_LIBXML
4193 	text	   *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4194 	xmltype    *data = PG_GETARG_XML_P(1);
4195 	int			res_nitems;
4196 
4197 	xpath_internal(xpath_expr_text, data, NULL,
4198 				   &res_nitems, NULL);
4199 
4200 	PG_RETURN_BOOL(res_nitems > 0);
4201 #else
4202 	NO_XML_SUPPORT();
4203 	return 0;
4204 #endif
4205 }
4206 
4207 /*
4208  * Determines if the node specified by the supplied XPath exists
4209  * in a given XML document, returning a boolean. Differs from
4210  * xmlexists as it supports namespaces and is not defined in SQL/XML.
4211  */
4212 Datum
xpath_exists(PG_FUNCTION_ARGS)4213 xpath_exists(PG_FUNCTION_ARGS)
4214 {
4215 #ifdef USE_LIBXML
4216 	text	   *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4217 	xmltype    *data = PG_GETARG_XML_P(1);
4218 	ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4219 	int			res_nitems;
4220 
4221 	xpath_internal(xpath_expr_text, data, namespaces,
4222 				   &res_nitems, NULL);
4223 
4224 	PG_RETURN_BOOL(res_nitems > 0);
4225 #else
4226 	NO_XML_SUPPORT();
4227 	return 0;
4228 #endif
4229 }
4230 
4231 /*
4232  * Functions for checking well-formed-ness
4233  */
4234 
4235 #ifdef USE_LIBXML
4236 static bool
wellformed_xml(text * data,XmlOptionType xmloption_arg)4237 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4238 {
4239 	bool		result;
4240 	volatile xmlDocPtr doc = NULL;
4241 
4242 	/* We want to catch any exceptions and return false */
4243 	PG_TRY();
4244 	{
4245 		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4246 		result = true;
4247 	}
4248 	PG_CATCH();
4249 	{
4250 		FlushErrorState();
4251 		result = false;
4252 	}
4253 	PG_END_TRY();
4254 
4255 	if (doc)
4256 		xmlFreeDoc(doc);
4257 
4258 	return result;
4259 }
4260 #endif
4261 
4262 Datum
xml_is_well_formed(PG_FUNCTION_ARGS)4263 xml_is_well_formed(PG_FUNCTION_ARGS)
4264 {
4265 #ifdef USE_LIBXML
4266 	text	   *data = PG_GETARG_TEXT_PP(0);
4267 
4268 	PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4269 #else
4270 	NO_XML_SUPPORT();
4271 	return 0;
4272 #endif							/* not USE_LIBXML */
4273 }
4274 
4275 Datum
xml_is_well_formed_document(PG_FUNCTION_ARGS)4276 xml_is_well_formed_document(PG_FUNCTION_ARGS)
4277 {
4278 #ifdef USE_LIBXML
4279 	text	   *data = PG_GETARG_TEXT_PP(0);
4280 
4281 	PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4282 #else
4283 	NO_XML_SUPPORT();
4284 	return 0;
4285 #endif							/* not USE_LIBXML */
4286 }
4287 
4288 Datum
xml_is_well_formed_content(PG_FUNCTION_ARGS)4289 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4290 {
4291 #ifdef USE_LIBXML
4292 	text	   *data = PG_GETARG_TEXT_PP(0);
4293 
4294 	PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4295 #else
4296 	NO_XML_SUPPORT();
4297 	return 0;
4298 #endif							/* not USE_LIBXML */
4299 }
4300 
4301 /*
4302  * support functions for XMLTABLE
4303  *
4304  */
4305 #ifdef USE_LIBXML
4306 
4307 /*
4308  * Returns private data from executor state. Ensure validity by check with
4309  * MAGIC number.
4310  */
4311 static inline XmlTableBuilderData *
GetXmlTableBuilderPrivateData(TableFuncScanState * state,const char * fname)4312 GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4313 {
4314 	XmlTableBuilderData *result;
4315 
4316 	if (!IsA(state, TableFuncScanState))
4317 		elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4318 	result = (XmlTableBuilderData *) state->opaque;
4319 	if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4320 		elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4321 
4322 	return result;
4323 }
4324 #endif
4325 
4326 /*
4327  * XmlTableInitOpaque
4328  *		Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4329  *		the XML parser.
4330  *
4331  * Note: Because we call pg_xml_init() here and pg_xml_done() in
4332  * XmlTableDestroyOpaque, it is critical for robustness that no other
4333  * executor nodes run until this node is processed to completion.  Caller
4334  * must execute this to completion (probably filling a tuplestore to exhaust
4335  * this node in a single pass) instead of using row-per-call mode.
4336  */
4337 static void
XmlTableInitOpaque(TableFuncScanState * state,int natts)4338 XmlTableInitOpaque(TableFuncScanState *state, int natts)
4339 {
4340 #ifdef USE_LIBXML
4341 	volatile xmlParserCtxtPtr ctxt = NULL;
4342 	XmlTableBuilderData *xtCxt;
4343 	PgXmlErrorContext *xmlerrcxt;
4344 
4345 	xtCxt = palloc0(sizeof(XmlTableBuilderData));
4346 	xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4347 	xtCxt->natts = natts;
4348 	xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4349 
4350 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4351 
4352 	PG_TRY();
4353 	{
4354 		xmlInitParser();
4355 
4356 		ctxt = xmlNewParserCtxt();
4357 		if (ctxt == NULL || xmlerrcxt->err_occurred)
4358 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4359 						"could not allocate parser context");
4360 	}
4361 	PG_CATCH();
4362 	{
4363 		if (ctxt != NULL)
4364 			xmlFreeParserCtxt(ctxt);
4365 
4366 		pg_xml_done(xmlerrcxt, true);
4367 
4368 		PG_RE_THROW();
4369 	}
4370 	PG_END_TRY();
4371 
4372 	xtCxt->xmlerrcxt = xmlerrcxt;
4373 	xtCxt->ctxt = ctxt;
4374 
4375 	state->opaque = xtCxt;
4376 #else
4377 	NO_XML_SUPPORT();
4378 #endif							/* not USE_LIBXML */
4379 }
4380 
4381 /*
4382  * XmlTableSetDocument
4383  *		Install the input document
4384  */
4385 static void
XmlTableSetDocument(TableFuncScanState * state,Datum value)4386 XmlTableSetDocument(TableFuncScanState *state, Datum value)
4387 {
4388 #ifdef USE_LIBXML
4389 	XmlTableBuilderData *xtCxt;
4390 	xmltype    *xmlval = DatumGetXmlP(value);
4391 	char	   *str;
4392 	xmlChar    *xstr;
4393 	int			length;
4394 	volatile xmlDocPtr doc = NULL;
4395 	volatile xmlXPathContextPtr xpathcxt = NULL;
4396 
4397 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4398 
4399 	/*
4400 	 * Use out function for casting to string (remove encoding property). See
4401 	 * comment in xml_out.
4402 	 */
4403 	str = xml_out_internal(xmlval, 0);
4404 
4405 	length = strlen(str);
4406 	xstr = pg_xmlCharStrndup(str, length);
4407 
4408 	PG_TRY();
4409 	{
4410 		doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4411 		if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4412 			xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4413 						"could not parse XML document");
4414 		xpathcxt = xmlXPathNewContext(doc);
4415 		if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4416 			xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4417 						"could not allocate XPath context");
4418 		xpathcxt->node = (xmlNodePtr) doc;
4419 	}
4420 	PG_CATCH();
4421 	{
4422 		if (xpathcxt != NULL)
4423 			xmlXPathFreeContext(xpathcxt);
4424 		if (doc != NULL)
4425 			xmlFreeDoc(doc);
4426 
4427 		PG_RE_THROW();
4428 	}
4429 	PG_END_TRY();
4430 
4431 	xtCxt->doc = doc;
4432 	xtCxt->xpathcxt = xpathcxt;
4433 #else
4434 	NO_XML_SUPPORT();
4435 #endif							/* not USE_LIBXML */
4436 }
4437 
4438 /*
4439  * XmlTableSetNamespace
4440  *		Add a namespace declaration
4441  */
4442 static void
XmlTableSetNamespace(TableFuncScanState * state,const char * name,const char * uri)4443 XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4444 {
4445 #ifdef USE_LIBXML
4446 	XmlTableBuilderData *xtCxt;
4447 
4448 	if (name == NULL)
4449 		ereport(ERROR,
4450 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4451 				 errmsg("DEFAULT namespace is not supported")));
4452 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4453 
4454 	if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4455 						   pg_xmlCharStrndup(name, strlen(name)),
4456 						   pg_xmlCharStrndup(uri, strlen(uri))))
4457 		xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4458 					"could not set XML namespace");
4459 #else
4460 	NO_XML_SUPPORT();
4461 #endif							/* not USE_LIBXML */
4462 }
4463 
4464 /*
4465  * XmlTableSetRowFilter
4466  *		Install the row-filter Xpath expression.
4467  */
4468 static void
XmlTableSetRowFilter(TableFuncScanState * state,const char * path)4469 XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4470 {
4471 #ifdef USE_LIBXML
4472 	XmlTableBuilderData *xtCxt;
4473 	xmlChar    *xstr;
4474 
4475 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4476 
4477 	if (*path == '\0')
4478 		ereport(ERROR,
4479 				(errcode(ERRCODE_DATA_EXCEPTION),
4480 				 errmsg("row path filter must not be empty string")));
4481 
4482 	xstr = pg_xmlCharStrndup(path, strlen(path));
4483 
4484 	xtCxt->xpathcomp = xmlXPathCompile(xstr);
4485 	if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4486 		xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4487 					"invalid XPath expression");
4488 #else
4489 	NO_XML_SUPPORT();
4490 #endif							/* not USE_LIBXML */
4491 }
4492 
4493 /*
4494  * XmlTableSetColumnFilter
4495  *		Install the column-filter Xpath expression, for the given column.
4496  */
4497 static void
XmlTableSetColumnFilter(TableFuncScanState * state,const char * path,int colnum)4498 XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4499 {
4500 #ifdef USE_LIBXML
4501 	XmlTableBuilderData *xtCxt;
4502 	xmlChar    *xstr;
4503 
4504 	AssertArg(PointerIsValid(path));
4505 
4506 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4507 
4508 	if (*path == '\0')
4509 		ereport(ERROR,
4510 				(errcode(ERRCODE_DATA_EXCEPTION),
4511 				 errmsg("column path filter must not be empty string")));
4512 
4513 	xstr = pg_xmlCharStrndup(path, strlen(path));
4514 
4515 	xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4516 	if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4517 		xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4518 					"invalid XPath expression");
4519 #else
4520 	NO_XML_SUPPORT();
4521 #endif							/* not USE_LIBXML */
4522 }
4523 
4524 /*
4525  * XmlTableFetchRow
4526  *		Prepare the next "current" tuple for upcoming GetValue calls.
4527  *		Returns false if the row-filter expression returned no more rows.
4528  */
4529 static bool
XmlTableFetchRow(TableFuncScanState * state)4530 XmlTableFetchRow(TableFuncScanState *state)
4531 {
4532 #ifdef USE_LIBXML
4533 	XmlTableBuilderData *xtCxt;
4534 
4535 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4536 
4537 	/*
4538 	 * XmlTable returns table - set of composite values. The error context, is
4539 	 * used for producement more values, between two calls, there can be
4540 	 * created and used another libxml2 error context. It is libxml2 global
4541 	 * value, so it should be refreshed any time before any libxml2 usage,
4542 	 * that is finished by returning some value.
4543 	 */
4544 	xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4545 
4546 	if (xtCxt->xpathobj == NULL)
4547 	{
4548 		xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4549 		if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4550 			xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4551 						"could not create XPath object");
4552 
4553 		xtCxt->row_count = 0;
4554 	}
4555 
4556 	if (xtCxt->xpathobj->type == XPATH_NODESET)
4557 	{
4558 		if (xtCxt->xpathobj->nodesetval != NULL)
4559 		{
4560 			if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4561 				return true;
4562 		}
4563 	}
4564 
4565 	return false;
4566 #else
4567 	NO_XML_SUPPORT();
4568 	return false;
4569 #endif							/* not USE_LIBXML */
4570 }
4571 
4572 /*
4573  * XmlTableGetValue
4574  *		Return the value for column number 'colnum' for the current row.  If
4575  *		column -1 is requested, return representation of the whole row.
4576  *
4577  * This leaks memory, so be sure to reset often the context in which it's
4578  * called.
4579  */
4580 static Datum
XmlTableGetValue(TableFuncScanState * state,int colnum,Oid typid,int32 typmod,bool * isnull)4581 XmlTableGetValue(TableFuncScanState *state, int colnum,
4582 				 Oid typid, int32 typmod, bool *isnull)
4583 {
4584 #ifdef USE_LIBXML
4585 	XmlTableBuilderData *xtCxt;
4586 	Datum		result = (Datum) 0;
4587 	xmlNodePtr	cur;
4588 	char	   *cstr = NULL;
4589 	volatile xmlXPathObjectPtr xpathobj = NULL;
4590 
4591 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4592 
4593 	Assert(xtCxt->xpathobj &&
4594 		   xtCxt->xpathobj->type == XPATH_NODESET &&
4595 		   xtCxt->xpathobj->nodesetval != NULL);
4596 
4597 	/* Propagate context related error context to libxml2 */
4598 	xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4599 
4600 	*isnull = false;
4601 
4602 	cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4603 
4604 	Assert(xtCxt->xpathscomp[colnum] != NULL);
4605 
4606 	PG_TRY();
4607 	{
4608 		/* Set current node as entry point for XPath evaluation */
4609 		xtCxt->xpathcxt->node = cur;
4610 
4611 		/* Evaluate column path */
4612 		xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4613 		if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4614 			xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4615 						"could not create XPath object");
4616 
4617 		/*
4618 		 * There are four possible cases, depending on the number of nodes
4619 		 * returned by the XPath expression and the type of the target column:
4620 		 * a) XPath returns no nodes.  b) The target type is XML (return all
4621 		 * as XML).  For non-XML return types:  c) One node (return content).
4622 		 * d) Multiple nodes (error).
4623 		 */
4624 		if (xpathobj->type == XPATH_NODESET)
4625 		{
4626 			int			count = 0;
4627 
4628 			if (xpathobj->nodesetval != NULL)
4629 				count = xpathobj->nodesetval->nodeNr;
4630 
4631 			if (xpathobj->nodesetval == NULL || count == 0)
4632 			{
4633 				*isnull = true;
4634 			}
4635 			else
4636 			{
4637 				if (typid == XMLOID)
4638 				{
4639 					text	   *textstr;
4640 					StringInfoData str;
4641 
4642 					/* Concatenate serialized values */
4643 					initStringInfo(&str);
4644 					for (int i = 0; i < count; i++)
4645 					{
4646 						textstr =
4647 							xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4648 												 xtCxt->xmlerrcxt);
4649 
4650 						appendStringInfoText(&str, textstr);
4651 					}
4652 					cstr = str.data;
4653 				}
4654 				else
4655 				{
4656 					xmlChar    *str;
4657 
4658 					if (count > 1)
4659 						ereport(ERROR,
4660 								(errcode(ERRCODE_CARDINALITY_VIOLATION),
4661 								 errmsg("more than one value returned by column XPath expression")));
4662 
4663 					str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
4664 					cstr = str ? xml_pstrdup_and_free(str) : "";
4665 				}
4666 			}
4667 		}
4668 		else if (xpathobj->type == XPATH_STRING)
4669 		{
4670 			/* Content should be escaped when target will be XML */
4671 			if (typid == XMLOID)
4672 				cstr = escape_xml((char *) xpathobj->stringval);
4673 			else
4674 				cstr = (char *) xpathobj->stringval;
4675 		}
4676 		else if (xpathobj->type == XPATH_BOOLEAN)
4677 		{
4678 			char		typcategory;
4679 			bool		typispreferred;
4680 			xmlChar    *str;
4681 
4682 			/* Allow implicit casting from boolean to numbers */
4683 			get_type_category_preferred(typid, &typcategory, &typispreferred);
4684 
4685 			if (typcategory != TYPCATEGORY_NUMERIC)
4686 				str = xmlXPathCastBooleanToString(xpathobj->boolval);
4687 			else
4688 				str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
4689 
4690 			cstr = xml_pstrdup_and_free(str);
4691 		}
4692 		else if (xpathobj->type == XPATH_NUMBER)
4693 		{
4694 			xmlChar    *str;
4695 
4696 			str = xmlXPathCastNumberToString(xpathobj->floatval);
4697 			cstr = xml_pstrdup_and_free(str);
4698 		}
4699 		else
4700 			elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4701 
4702 		/*
4703 		 * By here, either cstr contains the result value, or the isnull flag
4704 		 * has been set.
4705 		 */
4706 		Assert(cstr || *isnull);
4707 
4708 		if (!*isnull)
4709 			result = InputFunctionCall(&state->in_functions[colnum],
4710 									   cstr,
4711 									   state->typioparams[colnum],
4712 									   typmod);
4713 	}
4714 	PG_FINALLY();
4715 	{
4716 		if (xpathobj != NULL)
4717 			xmlXPathFreeObject(xpathobj);
4718 	}
4719 	PG_END_TRY();
4720 
4721 	return result;
4722 #else
4723 	NO_XML_SUPPORT();
4724 	return 0;
4725 #endif							/* not USE_LIBXML */
4726 }
4727 
4728 /*
4729  * XmlTableDestroyOpaque
4730  *		Release all libxml2 resources
4731  */
4732 static void
XmlTableDestroyOpaque(TableFuncScanState * state)4733 XmlTableDestroyOpaque(TableFuncScanState *state)
4734 {
4735 #ifdef USE_LIBXML
4736 	XmlTableBuilderData *xtCxt;
4737 
4738 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4739 
4740 	/* Propagate context related error context to libxml2 */
4741 	xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4742 
4743 	if (xtCxt->xpathscomp != NULL)
4744 	{
4745 		int			i;
4746 
4747 		for (i = 0; i < xtCxt->natts; i++)
4748 			if (xtCxt->xpathscomp[i] != NULL)
4749 				xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4750 	}
4751 
4752 	if (xtCxt->xpathobj != NULL)
4753 		xmlXPathFreeObject(xtCxt->xpathobj);
4754 	if (xtCxt->xpathcomp != NULL)
4755 		xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4756 	if (xtCxt->xpathcxt != NULL)
4757 		xmlXPathFreeContext(xtCxt->xpathcxt);
4758 	if (xtCxt->doc != NULL)
4759 		xmlFreeDoc(xtCxt->doc);
4760 	if (xtCxt->ctxt != NULL)
4761 		xmlFreeParserCtxt(xtCxt->ctxt);
4762 
4763 	pg_xml_done(xtCxt->xmlerrcxt, true);
4764 
4765 	/* not valid anymore */
4766 	xtCxt->magic = 0;
4767 	state->opaque = NULL;
4768 
4769 #else
4770 	NO_XML_SUPPORT();
4771 #endif							/* not USE_LIBXML */
4772 }
4773