1 /*-------------------------------------------------------------------------
2  *
3  * xml.c
4  *	  XML data type support.
5  *
6  *
7  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/utils/adt/xml.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 /*
16  * Generally, XML type support is only available when libxml use was
17  * configured during the build.  But even if that is not done, the
18  * type and all the functions are available, but most of them will
19  * fail.  For one thing, this avoids having to manage variant catalog
20  * installations.  But it also has nice effects such as that you can
21  * dump a database containing XML type data even if the server is not
22  * linked with libxml.  Thus, make sure xml_out() works even if nothing
23  * else does.
24  */
25 
26 /*
27  * Notes on memory management:
28  *
29  * Sometimes libxml allocates global structures in the hope that it can reuse
30  * them later on.  This makes it impractical to change the xmlMemSetup
31  * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32  * allocated with malloc() or vice versa.  Since libxml might be used by
33  * loadable modules, eg libperl, our only safe choices are to change the
34  * functions at postmaster/backend launch or not at all.  Since we'd rather
35  * not activate libxml in sessions that might never use it, the latter choice
36  * is the preferred one.  However, for debugging purposes it can be awfully
37  * handy to constrain libxml's allocations to be done in a specific palloc
38  * context, where they're easy to track.  Therefore there is code here that
39  * can be enabled in debug builds to redirect libxml's allocations into a
40  * special context LibxmlContext.  It's not recommended to turn this on in
41  * a production build because of the possibility of bad interactions with
42  * external modules.
43  */
44 /* #define USE_LIBXMLCONTEXT */
45 
46 #include "postgres.h"
47 
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
59 
60 /*
61  * We used to check for xmlStructuredErrorContext via a configure test; but
62  * that doesn't work on Windows, so instead use this grottier method of
63  * testing the library version number.
64  */
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 #endif
68 #endif							/* USE_LIBXML */
69 
70 #include "access/htup_details.h"
71 #include "catalog/namespace.h"
72 #include "catalog/pg_class.h"
73 #include "catalog/pg_type.h"
74 #include "commands/dbcommands.h"
75 #include "executor/spi.h"
76 #include "executor/tablefunc.h"
77 #include "fmgr.h"
78 #include "lib/stringinfo.h"
79 #include "libpq/pqformat.h"
80 #include "mb/pg_wchar.h"
81 #include "miscadmin.h"
82 #include "nodes/execnodes.h"
83 #include "nodes/nodeFuncs.h"
84 #include "utils/array.h"
85 #include "utils/builtins.h"
86 #include "utils/date.h"
87 #include "utils/datetime.h"
88 #include "utils/lsyscache.h"
89 #include "utils/memutils.h"
90 #include "utils/rel.h"
91 #include "utils/syscache.h"
92 #include "utils/xml.h"
93 
94 
95 /* GUC variables */
96 int			xmlbinary;
97 int			xmloption;
98 
99 #ifdef USE_LIBXML
100 
101 /* random number to identify PgXmlErrorContext */
102 #define ERRCXT_MAGIC	68275028
103 
104 struct PgXmlErrorContext
105 {
106 	int			magic;
107 	/* strictness argument passed to pg_xml_init */
108 	PgXmlStrictness strictness;
109 	/* current error status and accumulated message, if any */
110 	bool		err_occurred;
111 	StringInfoData err_buf;
112 	/* previous libxml error handling state (saved by pg_xml_init) */
113 	xmlStructuredErrorFunc saved_errfunc;
114 	void	   *saved_errcxt;
115 	/* previous libxml entity handler (saved by pg_xml_init) */
116 	xmlExternalEntityLoader saved_entityfunc;
117 };
118 
119 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
120 				  xmlParserCtxtPtr ctxt);
121 static void xml_errorHandler(void *data, xmlErrorPtr error);
122 static void xml_ereport_by_code(int level, int sqlcode,
123 					const char *msg, int errcode);
124 static void chopStringInfoNewlines(StringInfo str);
125 static void appendStringInfoLineSeparator(StringInfo str);
126 
127 #ifdef USE_LIBXMLCONTEXT
128 
129 static MemoryContext LibxmlContext = NULL;
130 
131 static void xml_memory_init(void);
132 static void *xml_palloc(size_t size);
133 static void *xml_repalloc(void *ptr, size_t size);
134 static void xml_pfree(void *ptr);
135 static char *xml_pstrdup(const char *string);
136 #endif							/* USE_LIBXMLCONTEXT */
137 
138 static xmlChar *xml_text2xmlChar(text *in);
139 static int parse_xml_decl(const xmlChar *str, size_t *lenp,
140 			   xmlChar **version, xmlChar **encoding, int *standalone);
141 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
142 			   pg_enc encoding, int standalone);
143 static bool xml_doctype_in_content(const xmlChar *str);
144 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
145 		  bool preserve_whitespace, int encoding);
146 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
147 static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
148 					   ArrayBuildState *astate,
149 					   PgXmlErrorContext *xmlerrcxt);
150 static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
151 #endif							/* USE_LIBXML */
152 
153 static void xmldata_root_element_start(StringInfo result, const char *eltname,
154 						   const char *xmlschema, const char *targetns,
155 						   bool top_level);
156 static void xmldata_root_element_end(StringInfo result, const char *eltname);
157 static StringInfo query_to_xml_internal(const char *query, char *tablename,
158 					  const char *xmlschema, bool nulls, bool tableforest,
159 					  const char *targetns, bool top_level);
160 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
161 						   bool nulls, bool tableforest, const char *targetns);
162 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
163 								  List *relid_list, bool nulls,
164 								  bool tableforest, const char *targetns);
165 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
166 								   bool nulls, bool tableforest,
167 								   const char *targetns);
168 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
169 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
170 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
171 static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
172 						  char *tablename, bool nulls, bool tableforest,
173 						  const char *targetns, bool top_level);
174 
175 /* XMLTABLE support */
176 #ifdef USE_LIBXML
177 /* random number to identify XmlTableContext */
178 #define XMLTABLE_CONTEXT_MAGIC	46922182
179 typedef struct XmlTableBuilderData
180 {
181 	int			magic;
182 	int			natts;
183 	long int	row_count;
184 	PgXmlErrorContext *xmlerrcxt;
185 	xmlParserCtxtPtr ctxt;
186 	xmlDocPtr	doc;
187 	xmlXPathContextPtr xpathcxt;
188 	xmlXPathCompExprPtr xpathcomp;
189 	xmlXPathObjectPtr xpathobj;
190 	xmlXPathCompExprPtr *xpathscomp;
191 } XmlTableBuilderData;
192 #endif
193 
194 static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
195 static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
196 static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
197 					 const char *uri);
198 static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
199 static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
200 						const char *path, int colnum);
201 static bool XmlTableFetchRow(struct TableFuncScanState *state);
202 static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
203 				 Oid typid, int32 typmod, bool *isnull);
204 static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
205 
206 const TableFuncRoutine XmlTableRoutine =
207 {
208 	XmlTableInitOpaque,
209 	XmlTableSetDocument,
210 	XmlTableSetNamespace,
211 	XmlTableSetRowFilter,
212 	XmlTableSetColumnFilter,
213 	XmlTableFetchRow,
214 	XmlTableGetValue,
215 	XmlTableDestroyOpaque
216 };
217 
218 #define NO_XML_SUPPORT() \
219 	ereport(ERROR, \
220 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
221 			 errmsg("unsupported XML feature"), \
222 			 errdetail("This functionality requires the server to be built with libxml support."), \
223 			 errhint("You need to rebuild PostgreSQL using --with-libxml.")))
224 
225 
226 /* from SQL/XML:2008 section 4.9 */
227 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
228 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
229 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
230 
231 
232 #ifdef USE_LIBXML
233 
234 static int
xmlChar_to_encoding(const xmlChar * encoding_name)235 xmlChar_to_encoding(const xmlChar *encoding_name)
236 {
237 	int			encoding = pg_char_to_encoding((const char *) encoding_name);
238 
239 	if (encoding < 0)
240 		ereport(ERROR,
241 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
242 				 errmsg("invalid encoding name \"%s\"",
243 						(const char *) encoding_name)));
244 	return encoding;
245 }
246 #endif
247 
248 
249 /*
250  * xml_in uses a plain C string to VARDATA conversion, so for the time being
251  * we use the conversion function for the text datatype.
252  *
253  * This is only acceptable so long as xmltype and text use the same
254  * representation.
255  */
256 Datum
xml_in(PG_FUNCTION_ARGS)257 xml_in(PG_FUNCTION_ARGS)
258 {
259 #ifdef USE_LIBXML
260 	char	   *s = PG_GETARG_CSTRING(0);
261 	xmltype    *vardata;
262 	xmlDocPtr	doc;
263 
264 	vardata = (xmltype *) cstring_to_text(s);
265 
266 	/*
267 	 * Parse the data to check if it is well-formed XML data.  Assume that
268 	 * ERROR occurred if parsing failed.
269 	 */
270 	doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
271 	xmlFreeDoc(doc);
272 
273 	PG_RETURN_XML_P(vardata);
274 #else
275 	NO_XML_SUPPORT();
276 	return 0;
277 #endif
278 }
279 
280 
281 #define PG_XML_DEFAULT_VERSION "1.0"
282 
283 
284 /*
285  * xml_out_internal uses a plain VARDATA to C string conversion, so for the
286  * time being we use the conversion function for the text datatype.
287  *
288  * This is only acceptable so long as xmltype and text use the same
289  * representation.
290  */
291 static char *
xml_out_internal(xmltype * x,pg_enc target_encoding)292 xml_out_internal(xmltype *x, pg_enc target_encoding)
293 {
294 	char	   *str = text_to_cstring((text *) x);
295 
296 #ifdef USE_LIBXML
297 	size_t		len = strlen(str);
298 	xmlChar    *version;
299 	int			standalone;
300 	int			res_code;
301 
302 	if ((res_code = parse_xml_decl((xmlChar *) str,
303 								   &len, &version, NULL, &standalone)) == 0)
304 	{
305 		StringInfoData buf;
306 
307 		initStringInfo(&buf);
308 
309 		if (!print_xml_decl(&buf, version, target_encoding, standalone))
310 		{
311 			/*
312 			 * If we are not going to produce an XML declaration, eat a single
313 			 * newline in the original string to prevent empty first lines in
314 			 * the output.
315 			 */
316 			if (*(str + len) == '\n')
317 				len += 1;
318 		}
319 		appendStringInfoString(&buf, str + len);
320 
321 		pfree(str);
322 
323 		return buf.data;
324 	}
325 
326 	xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
327 						"could not parse XML declaration in stored value",
328 						res_code);
329 #endif
330 	return str;
331 }
332 
333 
334 Datum
xml_out(PG_FUNCTION_ARGS)335 xml_out(PG_FUNCTION_ARGS)
336 {
337 	xmltype    *x = PG_GETARG_XML_P(0);
338 
339 	/*
340 	 * xml_out removes the encoding property in all cases.  This is because we
341 	 * cannot control from here whether the datum will be converted to a
342 	 * different client encoding, so we'd do more harm than good by including
343 	 * it.
344 	 */
345 	PG_RETURN_CSTRING(xml_out_internal(x, 0));
346 }
347 
348 
349 Datum
xml_recv(PG_FUNCTION_ARGS)350 xml_recv(PG_FUNCTION_ARGS)
351 {
352 #ifdef USE_LIBXML
353 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
354 	xmltype    *result;
355 	char	   *str;
356 	char	   *newstr;
357 	int			nbytes;
358 	xmlDocPtr	doc;
359 	xmlChar    *encodingStr = NULL;
360 	int			encoding;
361 
362 	/*
363 	 * Read the data in raw format. We don't know yet what the encoding is, as
364 	 * that information is embedded in the xml declaration; so we have to
365 	 * parse that before converting to server encoding.
366 	 */
367 	nbytes = buf->len - buf->cursor;
368 	str = (char *) pq_getmsgbytes(buf, nbytes);
369 
370 	/*
371 	 * We need a null-terminated string to pass to parse_xml_decl().  Rather
372 	 * than make a separate copy, make the temporary result one byte bigger
373 	 * than it needs to be.
374 	 */
375 	result = palloc(nbytes + 1 + VARHDRSZ);
376 	SET_VARSIZE(result, nbytes + VARHDRSZ);
377 	memcpy(VARDATA(result), str, nbytes);
378 	str = VARDATA(result);
379 	str[nbytes] = '\0';
380 
381 	parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
382 
383 	/*
384 	 * If encoding wasn't explicitly specified in the XML header, treat it as
385 	 * UTF-8, as that's the default in XML. This is different from xml_in(),
386 	 * where the input has to go through the normal client to server encoding
387 	 * conversion.
388 	 */
389 	encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
390 
391 	/*
392 	 * Parse the data to check if it is well-formed XML data.  Assume that
393 	 * xml_parse will throw ERROR if not.
394 	 */
395 	doc = xml_parse(result, xmloption, true, encoding);
396 	xmlFreeDoc(doc);
397 
398 	/* Now that we know what we're dealing with, convert to server encoding */
399 	newstr = pg_any_to_server(str, nbytes, encoding);
400 
401 	if (newstr != str)
402 	{
403 		pfree(result);
404 		result = (xmltype *) cstring_to_text(newstr);
405 		pfree(newstr);
406 	}
407 
408 	PG_RETURN_XML_P(result);
409 #else
410 	NO_XML_SUPPORT();
411 	return 0;
412 #endif
413 }
414 
415 
416 Datum
xml_send(PG_FUNCTION_ARGS)417 xml_send(PG_FUNCTION_ARGS)
418 {
419 	xmltype    *x = PG_GETARG_XML_P(0);
420 	char	   *outval;
421 	StringInfoData buf;
422 
423 	/*
424 	 * xml_out_internal doesn't convert the encoding, it just prints the right
425 	 * declaration. pq_sendtext will do the conversion.
426 	 */
427 	outval = xml_out_internal(x, pg_get_client_encoding());
428 
429 	pq_begintypsend(&buf);
430 	pq_sendtext(&buf, outval, strlen(outval));
431 	pfree(outval);
432 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
433 }
434 
435 
436 #ifdef USE_LIBXML
437 static void
appendStringInfoText(StringInfo str,const text * t)438 appendStringInfoText(StringInfo str, const text *t)
439 {
440 	appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
441 }
442 #endif
443 
444 
445 static xmltype *
stringinfo_to_xmltype(StringInfo buf)446 stringinfo_to_xmltype(StringInfo buf)
447 {
448 	return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
449 }
450 
451 
452 static xmltype *
cstring_to_xmltype(const char * string)453 cstring_to_xmltype(const char *string)
454 {
455 	return (xmltype *) cstring_to_text(string);
456 }
457 
458 
459 #ifdef USE_LIBXML
460 static xmltype *
xmlBuffer_to_xmltype(xmlBufferPtr buf)461 xmlBuffer_to_xmltype(xmlBufferPtr buf)
462 {
463 	return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
464 												xmlBufferLength(buf));
465 }
466 #endif
467 
468 
469 Datum
xmlcomment(PG_FUNCTION_ARGS)470 xmlcomment(PG_FUNCTION_ARGS)
471 {
472 #ifdef USE_LIBXML
473 	text	   *arg = PG_GETARG_TEXT_PP(0);
474 	char	   *argdata = VARDATA_ANY(arg);
475 	int			len = VARSIZE_ANY_EXHDR(arg);
476 	StringInfoData buf;
477 	int			i;
478 
479 	/* check for "--" in string or "-" at the end */
480 	for (i = 1; i < len; i++)
481 	{
482 		if (argdata[i] == '-' && argdata[i - 1] == '-')
483 			ereport(ERROR,
484 					(errcode(ERRCODE_INVALID_XML_COMMENT),
485 					 errmsg("invalid XML comment")));
486 	}
487 	if (len > 0 && argdata[len - 1] == '-')
488 		ereport(ERROR,
489 				(errcode(ERRCODE_INVALID_XML_COMMENT),
490 				 errmsg("invalid XML comment")));
491 
492 	initStringInfo(&buf);
493 	appendStringInfoString(&buf, "<!--");
494 	appendStringInfoText(&buf, arg);
495 	appendStringInfoString(&buf, "-->");
496 
497 	PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
498 #else
499 	NO_XML_SUPPORT();
500 	return 0;
501 #endif
502 }
503 
504 
505 
506 /*
507  * TODO: xmlconcat needs to merge the notations and unparsed entities
508  * of the argument values.  Not very important in practice, though.
509  */
510 xmltype *
xmlconcat(List * args)511 xmlconcat(List *args)
512 {
513 #ifdef USE_LIBXML
514 	int			global_standalone = 1;
515 	xmlChar    *global_version = NULL;
516 	bool		global_version_no_value = false;
517 	StringInfoData buf;
518 	ListCell   *v;
519 
520 	initStringInfo(&buf);
521 	foreach(v, args)
522 	{
523 		xmltype    *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
524 		size_t		len;
525 		xmlChar    *version;
526 		int			standalone;
527 		char	   *str;
528 
529 		len = VARSIZE(x) - VARHDRSZ;
530 		str = text_to_cstring((text *) x);
531 
532 		parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
533 
534 		if (standalone == 0 && global_standalone == 1)
535 			global_standalone = 0;
536 		if (standalone < 0)
537 			global_standalone = -1;
538 
539 		if (!version)
540 			global_version_no_value = true;
541 		else if (!global_version)
542 			global_version = version;
543 		else if (xmlStrcmp(version, global_version) != 0)
544 			global_version_no_value = true;
545 
546 		appendStringInfoString(&buf, str + len);
547 		pfree(str);
548 	}
549 
550 	if (!global_version_no_value || global_standalone >= 0)
551 	{
552 		StringInfoData buf2;
553 
554 		initStringInfo(&buf2);
555 
556 		print_xml_decl(&buf2,
557 					   (!global_version_no_value) ? global_version : NULL,
558 					   0,
559 					   global_standalone);
560 
561 		appendStringInfoString(&buf2, buf.data);
562 		buf = buf2;
563 	}
564 
565 	return stringinfo_to_xmltype(&buf);
566 #else
567 	NO_XML_SUPPORT();
568 	return NULL;
569 #endif
570 }
571 
572 
573 /*
574  * XMLAGG support
575  */
576 Datum
xmlconcat2(PG_FUNCTION_ARGS)577 xmlconcat2(PG_FUNCTION_ARGS)
578 {
579 	if (PG_ARGISNULL(0))
580 	{
581 		if (PG_ARGISNULL(1))
582 			PG_RETURN_NULL();
583 		else
584 			PG_RETURN_XML_P(PG_GETARG_XML_P(1));
585 	}
586 	else if (PG_ARGISNULL(1))
587 		PG_RETURN_XML_P(PG_GETARG_XML_P(0));
588 	else
589 		PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
590 											 PG_GETARG_XML_P(1))));
591 }
592 
593 
594 Datum
texttoxml(PG_FUNCTION_ARGS)595 texttoxml(PG_FUNCTION_ARGS)
596 {
597 	text	   *data = PG_GETARG_TEXT_PP(0);
598 
599 	PG_RETURN_XML_P(xmlparse(data, xmloption, true));
600 }
601 
602 
603 Datum
xmltotext(PG_FUNCTION_ARGS)604 xmltotext(PG_FUNCTION_ARGS)
605 {
606 	xmltype    *data = PG_GETARG_XML_P(0);
607 
608 	/* It's actually binary compatible. */
609 	PG_RETURN_TEXT_P((text *) data);
610 }
611 
612 
613 text *
xmltotext_with_xmloption(xmltype * data,XmlOptionType xmloption_arg)614 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
615 {
616 	if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
617 		ereport(ERROR,
618 				(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
619 				 errmsg("not an XML document")));
620 
621 	/* It's actually binary compatible, save for the above check. */
622 	return (text *) data;
623 }
624 
625 
626 xmltype *
xmlelement(XmlExpr * xexpr,Datum * named_argvalue,bool * named_argnull,Datum * argvalue,bool * argnull)627 xmlelement(XmlExpr *xexpr,
628 		   Datum *named_argvalue, bool *named_argnull,
629 		   Datum *argvalue, bool *argnull)
630 {
631 #ifdef USE_LIBXML
632 	xmltype    *result;
633 	List	   *named_arg_strings;
634 	List	   *arg_strings;
635 	int			i;
636 	ListCell   *arg;
637 	ListCell   *narg;
638 	PgXmlErrorContext *xmlerrcxt;
639 	volatile xmlBufferPtr buf = NULL;
640 	volatile xmlTextWriterPtr writer = NULL;
641 
642 	/*
643 	 * All arguments are already evaluated, and their values are passed in the
644 	 * named_argvalue/named_argnull or argvalue/argnull arrays.  This avoids
645 	 * issues if one of the arguments involves a call to some other function
646 	 * or subsystem that wants to use libxml on its own terms.  We examine the
647 	 * original XmlExpr to identify the numbers and types of the arguments.
648 	 */
649 	named_arg_strings = NIL;
650 	i = 0;
651 	foreach(arg, xexpr->named_args)
652 	{
653 		Expr	   *e = (Expr *) lfirst(arg);
654 		char	   *str;
655 
656 		if (named_argnull[i])
657 			str = NULL;
658 		else
659 			str = map_sql_value_to_xml_value(named_argvalue[i],
660 											 exprType((Node *) e),
661 											 false);
662 		named_arg_strings = lappend(named_arg_strings, str);
663 		i++;
664 	}
665 
666 	arg_strings = NIL;
667 	i = 0;
668 	foreach(arg, xexpr->args)
669 	{
670 		Expr	   *e = (Expr *) lfirst(arg);
671 		char	   *str;
672 
673 		/* here we can just forget NULL elements immediately */
674 		if (!argnull[i])
675 		{
676 			str = map_sql_value_to_xml_value(argvalue[i],
677 											 exprType((Node *) e),
678 											 true);
679 			arg_strings = lappend(arg_strings, str);
680 		}
681 		i++;
682 	}
683 
684 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
685 
686 	PG_TRY();
687 	{
688 		buf = xmlBufferCreate();
689 		if (buf == NULL || xmlerrcxt->err_occurred)
690 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
691 						"could not allocate xmlBuffer");
692 		writer = xmlNewTextWriterMemory(buf, 0);
693 		if (writer == NULL || xmlerrcxt->err_occurred)
694 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
695 						"could not allocate xmlTextWriter");
696 
697 		xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
698 
699 		forboth(arg, named_arg_strings, narg, xexpr->arg_names)
700 		{
701 			char	   *str = (char *) lfirst(arg);
702 			char	   *argname = strVal(lfirst(narg));
703 
704 			if (str)
705 				xmlTextWriterWriteAttribute(writer,
706 											(xmlChar *) argname,
707 											(xmlChar *) str);
708 		}
709 
710 		foreach(arg, arg_strings)
711 		{
712 			char	   *str = (char *) lfirst(arg);
713 
714 			xmlTextWriterWriteRaw(writer, (xmlChar *) str);
715 		}
716 
717 		xmlTextWriterEndElement(writer);
718 
719 		/* we MUST do this now to flush data out to the buffer ... */
720 		xmlFreeTextWriter(writer);
721 		writer = NULL;
722 
723 		result = xmlBuffer_to_xmltype(buf);
724 	}
725 	PG_CATCH();
726 	{
727 		if (writer)
728 			xmlFreeTextWriter(writer);
729 		if (buf)
730 			xmlBufferFree(buf);
731 
732 		pg_xml_done(xmlerrcxt, true);
733 
734 		PG_RE_THROW();
735 	}
736 	PG_END_TRY();
737 
738 	xmlBufferFree(buf);
739 
740 	pg_xml_done(xmlerrcxt, false);
741 
742 	return result;
743 #else
744 	NO_XML_SUPPORT();
745 	return NULL;
746 #endif
747 }
748 
749 
750 xmltype *
xmlparse(text * data,XmlOptionType xmloption_arg,bool preserve_whitespace)751 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
752 {
753 #ifdef USE_LIBXML
754 	xmlDocPtr	doc;
755 
756 	doc = xml_parse(data, xmloption_arg, preserve_whitespace,
757 					GetDatabaseEncoding());
758 	xmlFreeDoc(doc);
759 
760 	return (xmltype *) data;
761 #else
762 	NO_XML_SUPPORT();
763 	return NULL;
764 #endif
765 }
766 
767 
768 xmltype *
xmlpi(const char * target,text * arg,bool arg_is_null,bool * result_is_null)769 xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
770 {
771 #ifdef USE_LIBXML
772 	xmltype    *result;
773 	StringInfoData buf;
774 
775 	if (pg_strcasecmp(target, "xml") == 0)
776 		ereport(ERROR,
777 				(errcode(ERRCODE_SYNTAX_ERROR), /* really */
778 				 errmsg("invalid XML processing instruction"),
779 				 errdetail("XML processing instruction target name cannot be \"%s\".", target)));
780 
781 	/*
782 	 * Following the SQL standard, the null check comes after the syntax check
783 	 * above.
784 	 */
785 	*result_is_null = arg_is_null;
786 	if (*result_is_null)
787 		return NULL;
788 
789 	initStringInfo(&buf);
790 
791 	appendStringInfo(&buf, "<?%s", target);
792 
793 	if (arg != NULL)
794 	{
795 		char	   *string;
796 
797 		string = text_to_cstring(arg);
798 		if (strstr(string, "?>") != NULL)
799 			ereport(ERROR,
800 					(errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
801 					 errmsg("invalid XML processing instruction"),
802 					 errdetail("XML processing instruction cannot contain \"?>\".")));
803 
804 		appendStringInfoChar(&buf, ' ');
805 		appendStringInfoString(&buf, string + strspn(string, " "));
806 		pfree(string);
807 	}
808 	appendStringInfoString(&buf, "?>");
809 
810 	result = stringinfo_to_xmltype(&buf);
811 	pfree(buf.data);
812 	return result;
813 #else
814 	NO_XML_SUPPORT();
815 	return NULL;
816 #endif
817 }
818 
819 
820 xmltype *
xmlroot(xmltype * data,text * version,int standalone)821 xmlroot(xmltype *data, text *version, int standalone)
822 {
823 #ifdef USE_LIBXML
824 	char	   *str;
825 	size_t		len;
826 	xmlChar    *orig_version;
827 	int			orig_standalone;
828 	StringInfoData buf;
829 
830 	len = VARSIZE(data) - VARHDRSZ;
831 	str = text_to_cstring((text *) data);
832 
833 	parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
834 
835 	if (version)
836 		orig_version = xml_text2xmlChar(version);
837 	else
838 		orig_version = NULL;
839 
840 	switch (standalone)
841 	{
842 		case XML_STANDALONE_YES:
843 			orig_standalone = 1;
844 			break;
845 		case XML_STANDALONE_NO:
846 			orig_standalone = 0;
847 			break;
848 		case XML_STANDALONE_NO_VALUE:
849 			orig_standalone = -1;
850 			break;
851 		case XML_STANDALONE_OMITTED:
852 			/* leave original value */
853 			break;
854 	}
855 
856 	initStringInfo(&buf);
857 	print_xml_decl(&buf, orig_version, 0, orig_standalone);
858 	appendStringInfoString(&buf, str + len);
859 
860 	return stringinfo_to_xmltype(&buf);
861 #else
862 	NO_XML_SUPPORT();
863 	return NULL;
864 #endif
865 }
866 
867 
868 /*
869  * Validate document (given as string) against DTD (given as external link)
870  *
871  * This has been removed because it is a security hole: unprivileged users
872  * should not be able to use Postgres to fetch arbitrary external files,
873  * which unfortunately is exactly what libxml is willing to do with the DTD
874  * parameter.
875  */
876 Datum
xmlvalidate(PG_FUNCTION_ARGS)877 xmlvalidate(PG_FUNCTION_ARGS)
878 {
879 	ereport(ERROR,
880 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
881 			 errmsg("xmlvalidate is not implemented")));
882 	return 0;
883 }
884 
885 
886 bool
xml_is_document(xmltype * arg)887 xml_is_document(xmltype *arg)
888 {
889 #ifdef USE_LIBXML
890 	bool		result;
891 	volatile xmlDocPtr doc = NULL;
892 	MemoryContext ccxt = CurrentMemoryContext;
893 
894 	/* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
895 	PG_TRY();
896 	{
897 		doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
898 						GetDatabaseEncoding());
899 		result = true;
900 	}
901 	PG_CATCH();
902 	{
903 		ErrorData  *errdata;
904 		MemoryContext ecxt;
905 
906 		ecxt = MemoryContextSwitchTo(ccxt);
907 		errdata = CopyErrorData();
908 		if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
909 		{
910 			FlushErrorState();
911 			result = false;
912 		}
913 		else
914 		{
915 			MemoryContextSwitchTo(ecxt);
916 			PG_RE_THROW();
917 		}
918 	}
919 	PG_END_TRY();
920 
921 	if (doc)
922 		xmlFreeDoc(doc);
923 
924 	return result;
925 #else							/* not USE_LIBXML */
926 	NO_XML_SUPPORT();
927 	return false;
928 #endif							/* not USE_LIBXML */
929 }
930 
931 
932 #ifdef USE_LIBXML
933 
934 /*
935  * pg_xml_init_library --- set up for use of libxml
936  *
937  * This should be called by each function that is about to use libxml
938  * facilities but doesn't require error handling.  It initializes libxml
939  * and verifies compatibility with the loaded libxml version.  These are
940  * once-per-session activities.
941  *
942  * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
943  * check)
944  */
945 void
pg_xml_init_library(void)946 pg_xml_init_library(void)
947 {
948 	static bool first_time = true;
949 
950 	if (first_time)
951 	{
952 		/* Stuff we need do only once per session */
953 
954 		/*
955 		 * Currently, we have no pure UTF-8 support for internals -- check if
956 		 * we can work.
957 		 */
958 		if (sizeof(char) != sizeof(xmlChar))
959 			ereport(ERROR,
960 					(errmsg("could not initialize XML library"),
961 					 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
962 							   (int) sizeof(char), (int) sizeof(xmlChar))));
963 
964 #ifdef USE_LIBXMLCONTEXT
965 		/* Set up libxml's memory allocation our way */
966 		xml_memory_init();
967 #endif
968 
969 		/* Check library compatibility */
970 		LIBXML_TEST_VERSION;
971 
972 		first_time = false;
973 	}
974 }
975 
976 /*
977  * pg_xml_init --- set up for use of libxml and register an error handler
978  *
979  * This should be called by each function that is about to use libxml
980  * facilities and requires error handling.  It initializes libxml with
981  * pg_xml_init_library() and establishes our libxml error handler.
982  *
983  * strictness determines which errors are reported and which are ignored.
984  *
985  * Calls to this function MUST be followed by a PG_TRY block that guarantees
986  * that pg_xml_done() is called during either normal or error exit.
987  *
988  * This is exported for use by contrib/xml2, as well as other code that might
989  * wish to share use of this module's libxml error handler.
990  */
991 PgXmlErrorContext *
pg_xml_init(PgXmlStrictness strictness)992 pg_xml_init(PgXmlStrictness strictness)
993 {
994 	PgXmlErrorContext *errcxt;
995 	void	   *new_errcxt;
996 
997 	/* Do one-time setup if needed */
998 	pg_xml_init_library();
999 
1000 	/* Create error handling context structure */
1001 	errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1002 	errcxt->magic = ERRCXT_MAGIC;
1003 	errcxt->strictness = strictness;
1004 	errcxt->err_occurred = false;
1005 	initStringInfo(&errcxt->err_buf);
1006 
1007 	/*
1008 	 * Save original error handler and install ours. libxml originally didn't
1009 	 * distinguish between the contexts for generic and for structured error
1010 	 * handlers.  If we're using an old libxml version, we must thus save the
1011 	 * generic error context, even though we're using a structured error
1012 	 * handler.
1013 	 */
1014 	errcxt->saved_errfunc = xmlStructuredError;
1015 
1016 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1017 	errcxt->saved_errcxt = xmlStructuredErrorContext;
1018 #else
1019 	errcxt->saved_errcxt = xmlGenericErrorContext;
1020 #endif
1021 
1022 	xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1023 
1024 	/*
1025 	 * Verify that xmlSetStructuredErrorFunc set the context variable we
1026 	 * expected it to.  If not, the error context pointer we just saved is not
1027 	 * the correct thing to restore, and since that leaves us without a way to
1028 	 * restore the context in pg_xml_done, we must fail.
1029 	 *
1030 	 * The only known situation in which this test fails is if we compile with
1031 	 * headers from a libxml2 that doesn't track the structured error context
1032 	 * separately (< 2.7.4), but at runtime use a version that does, or vice
1033 	 * versa.  The libxml2 authors did not treat that change as constituting
1034 	 * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1035 	 * fails to protect us from this.
1036 	 */
1037 
1038 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1039 	new_errcxt = xmlStructuredErrorContext;
1040 #else
1041 	new_errcxt = xmlGenericErrorContext;
1042 #endif
1043 
1044 	if (new_errcxt != (void *) errcxt)
1045 		ereport(ERROR,
1046 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1047 				 errmsg("could not set up XML error handler"),
1048 				 errhint("This probably indicates that the version of libxml2"
1049 						 " being used is not compatible with the libxml2"
1050 						 " header files that PostgreSQL was built with.")));
1051 
1052 	/*
1053 	 * Also, install an entity loader to prevent unwanted fetches of external
1054 	 * files and URLs.
1055 	 */
1056 	errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1057 	xmlSetExternalEntityLoader(xmlPgEntityLoader);
1058 
1059 	return errcxt;
1060 }
1061 
1062 
1063 /*
1064  * pg_xml_done --- restore previous libxml error handling
1065  *
1066  * Resets libxml's global error-handling state to what it was before
1067  * pg_xml_init() was called.
1068  *
1069  * This routine verifies that all pending errors have been dealt with
1070  * (in assert-enabled builds, anyway).
1071  */
1072 void
pg_xml_done(PgXmlErrorContext * errcxt,bool isError)1073 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1074 {
1075 	void	   *cur_errcxt;
1076 
1077 	/* An assert seems like enough protection here */
1078 	Assert(errcxt->magic == ERRCXT_MAGIC);
1079 
1080 	/*
1081 	 * In a normal exit, there should be no un-handled libxml errors.  But we
1082 	 * shouldn't try to enforce this during error recovery, since the longjmp
1083 	 * could have been thrown before xml_ereport had a chance to run.
1084 	 */
1085 	Assert(!errcxt->err_occurred || isError);
1086 
1087 	/*
1088 	 * Check that libxml's global state is correct, warn if not.  This is a
1089 	 * real test and not an Assert because it has a higher probability of
1090 	 * happening.
1091 	 */
1092 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1093 	cur_errcxt = xmlStructuredErrorContext;
1094 #else
1095 	cur_errcxt = xmlGenericErrorContext;
1096 #endif
1097 
1098 	if (cur_errcxt != (void *) errcxt)
1099 		elog(WARNING, "libxml error handling state is out of sync with xml.c");
1100 
1101 	/* Restore the saved handlers */
1102 	xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1103 	xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1104 
1105 	/*
1106 	 * Mark the struct as invalid, just in case somebody somehow manages to
1107 	 * call xml_errorHandler or xml_ereport with it.
1108 	 */
1109 	errcxt->magic = 0;
1110 
1111 	/* Release memory */
1112 	pfree(errcxt->err_buf.data);
1113 	pfree(errcxt);
1114 }
1115 
1116 
1117 /*
1118  * pg_xml_error_occurred() --- test the error flag
1119  */
1120 bool
pg_xml_error_occurred(PgXmlErrorContext * errcxt)1121 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1122 {
1123 	return errcxt->err_occurred;
1124 }
1125 
1126 
1127 /*
1128  * SQL/XML allows storing "XML documents" or "XML content".  "XML
1129  * documents" are specified by the XML specification and are parsed
1130  * easily by libxml.  "XML content" is specified by SQL/XML as the
1131  * production "XMLDecl? content".  But libxml can only parse the
1132  * "content" part, so we have to parse the XML declaration ourselves
1133  * to complete this.
1134  */
1135 
1136 #define CHECK_XML_SPACE(p) \
1137 	do { \
1138 		if (!xmlIsBlank_ch(*(p))) \
1139 			return XML_ERR_SPACE_REQUIRED; \
1140 	} while (0)
1141 
1142 #define SKIP_XML_SPACE(p) \
1143 	while (xmlIsBlank_ch(*(p))) (p)++
1144 
1145 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1146 /* Beware of multiple evaluations of argument! */
1147 #define PG_XMLISNAMECHAR(c) \
1148 	(xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1149 			|| xmlIsDigit_ch(c) \
1150 			|| c == '.' || c == '-' || c == '_' || c == ':' \
1151 			|| xmlIsCombiningQ(c) \
1152 			|| xmlIsExtender_ch(c))
1153 
1154 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1155 static xmlChar *
xml_pnstrdup(const xmlChar * str,size_t len)1156 xml_pnstrdup(const xmlChar *str, size_t len)
1157 {
1158 	xmlChar    *result;
1159 
1160 	result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1161 	memcpy(result, str, len * sizeof(xmlChar));
1162 	result[len] = 0;
1163 	return result;
1164 }
1165 
1166 /* Ditto, except input is char* */
1167 static xmlChar *
pg_xmlCharStrndup(const char * str,size_t len)1168 pg_xmlCharStrndup(const char *str, size_t len)
1169 {
1170 	xmlChar    *result;
1171 
1172 	result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1173 	memcpy(result, str, len);
1174 	result[len] = '\0';
1175 
1176 	return result;
1177 }
1178 
1179 /*
1180  * str is the null-terminated input string.  Remaining arguments are
1181  * output arguments; each can be NULL if value is not wanted.
1182  * version and encoding are returned as locally-palloc'd strings.
1183  * Result is 0 if OK, an error code if not.
1184  */
1185 static int
parse_xml_decl(const xmlChar * str,size_t * lenp,xmlChar ** version,xmlChar ** encoding,int * standalone)1186 parse_xml_decl(const xmlChar *str, size_t *lenp,
1187 			   xmlChar **version, xmlChar **encoding, int *standalone)
1188 {
1189 	const xmlChar *p;
1190 	const xmlChar *save_p;
1191 	size_t		len;
1192 	int			utf8char;
1193 	int			utf8len;
1194 
1195 	/*
1196 	 * Only initialize libxml.  We don't need error handling here, but we do
1197 	 * need to make sure libxml is initialized before calling any of its
1198 	 * functions.  Note that this is safe (and a no-op) if caller has already
1199 	 * done pg_xml_init().
1200 	 */
1201 	pg_xml_init_library();
1202 
1203 	/* Initialize output arguments to "not present" */
1204 	if (version)
1205 		*version = NULL;
1206 	if (encoding)
1207 		*encoding = NULL;
1208 	if (standalone)
1209 		*standalone = -1;
1210 
1211 	p = str;
1212 
1213 	if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1214 		goto finished;
1215 
1216 	/*
1217 	 * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1218 	 * rather than an XMLDecl, so we have done what we came to do and found no
1219 	 * XMLDecl.
1220 	 *
1221 	 * We need an input length value for xmlGetUTF8Char, but there's no need
1222 	 * to count the whole document size, so use strnlen not strlen.
1223 	 */
1224 	utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1225 	utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1226 	if (PG_XMLISNAMECHAR(utf8char))
1227 		goto finished;
1228 
1229 	p += 5;
1230 
1231 	/* version */
1232 	CHECK_XML_SPACE(p);
1233 	SKIP_XML_SPACE(p);
1234 	if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1235 		return XML_ERR_VERSION_MISSING;
1236 	p += 7;
1237 	SKIP_XML_SPACE(p);
1238 	if (*p != '=')
1239 		return XML_ERR_VERSION_MISSING;
1240 	p += 1;
1241 	SKIP_XML_SPACE(p);
1242 
1243 	if (*p == '\'' || *p == '"')
1244 	{
1245 		const xmlChar *q;
1246 
1247 		q = xmlStrchr(p + 1, *p);
1248 		if (!q)
1249 			return XML_ERR_VERSION_MISSING;
1250 
1251 		if (version)
1252 			*version = xml_pnstrdup(p + 1, q - p - 1);
1253 		p = q + 1;
1254 	}
1255 	else
1256 		return XML_ERR_VERSION_MISSING;
1257 
1258 	/* encoding */
1259 	save_p = p;
1260 	SKIP_XML_SPACE(p);
1261 	if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1262 	{
1263 		CHECK_XML_SPACE(save_p);
1264 		p += 8;
1265 		SKIP_XML_SPACE(p);
1266 		if (*p != '=')
1267 			return XML_ERR_MISSING_ENCODING;
1268 		p += 1;
1269 		SKIP_XML_SPACE(p);
1270 
1271 		if (*p == '\'' || *p == '"')
1272 		{
1273 			const xmlChar *q;
1274 
1275 			q = xmlStrchr(p + 1, *p);
1276 			if (!q)
1277 				return XML_ERR_MISSING_ENCODING;
1278 
1279 			if (encoding)
1280 				*encoding = xml_pnstrdup(p + 1, q - p - 1);
1281 			p = q + 1;
1282 		}
1283 		else
1284 			return XML_ERR_MISSING_ENCODING;
1285 	}
1286 	else
1287 	{
1288 		p = save_p;
1289 	}
1290 
1291 	/* standalone */
1292 	save_p = p;
1293 	SKIP_XML_SPACE(p);
1294 	if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1295 	{
1296 		CHECK_XML_SPACE(save_p);
1297 		p += 10;
1298 		SKIP_XML_SPACE(p);
1299 		if (*p != '=')
1300 			return XML_ERR_STANDALONE_VALUE;
1301 		p += 1;
1302 		SKIP_XML_SPACE(p);
1303 		if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1304 			xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1305 		{
1306 			if (standalone)
1307 				*standalone = 1;
1308 			p += 5;
1309 		}
1310 		else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1311 				 xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1312 		{
1313 			if (standalone)
1314 				*standalone = 0;
1315 			p += 4;
1316 		}
1317 		else
1318 			return XML_ERR_STANDALONE_VALUE;
1319 	}
1320 	else
1321 	{
1322 		p = save_p;
1323 	}
1324 
1325 	SKIP_XML_SPACE(p);
1326 	if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1327 		return XML_ERR_XMLDECL_NOT_FINISHED;
1328 	p += 2;
1329 
1330 finished:
1331 	len = p - str;
1332 
1333 	for (p = str; p < str + len; p++)
1334 		if (*p > 127)
1335 			return XML_ERR_INVALID_CHAR;
1336 
1337 	if (lenp)
1338 		*lenp = len;
1339 
1340 	return XML_ERR_OK;
1341 }
1342 
1343 
1344 /*
1345  * Write an XML declaration.  On output, we adjust the XML declaration
1346  * as follows.  (These rules are the moral equivalent of the clause
1347  * "Serialization of an XML value" in the SQL standard.)
1348  *
1349  * We try to avoid generating an XML declaration if possible.  This is
1350  * so that you don't get trivial things like xml '<foo/>' resulting in
1351  * '<?xml version="1.0"?><foo/>', which would surely be annoying.  We
1352  * must provide a declaration if the standalone property is specified
1353  * or if we include an encoding declaration.  If we have a
1354  * declaration, we must specify a version (XML requires this).
1355  * Otherwise we only make a declaration if the version is not "1.0",
1356  * which is the default version specified in SQL:2003.
1357  */
1358 static bool
print_xml_decl(StringInfo buf,const xmlChar * version,pg_enc encoding,int standalone)1359 print_xml_decl(StringInfo buf, const xmlChar *version,
1360 			   pg_enc encoding, int standalone)
1361 {
1362 	if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1363 		|| (encoding && encoding != PG_UTF8)
1364 		|| standalone != -1)
1365 	{
1366 		appendStringInfoString(buf, "<?xml");
1367 
1368 		if (version)
1369 			appendStringInfo(buf, " version=\"%s\"", version);
1370 		else
1371 			appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1372 
1373 		if (encoding && encoding != PG_UTF8)
1374 		{
1375 			/*
1376 			 * XXX might be useful to convert this to IANA names (ISO-8859-1
1377 			 * instead of LATIN1 etc.); needs field experience
1378 			 */
1379 			appendStringInfo(buf, " encoding=\"%s\"",
1380 							 pg_encoding_to_char(encoding));
1381 		}
1382 
1383 		if (standalone == 1)
1384 			appendStringInfoString(buf, " standalone=\"yes\"");
1385 		else if (standalone == 0)
1386 			appendStringInfoString(buf, " standalone=\"no\"");
1387 		appendStringInfoString(buf, "?>");
1388 
1389 		return true;
1390 	}
1391 	else
1392 		return false;
1393 }
1394 
1395 /*
1396  * Test whether an input that is to be parsed as CONTENT contains a DTD.
1397  *
1398  * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1399  * satisfied by a document with a DTD, which is a bit of a wart, as it means
1400  * the CONTENT type is not a proper superset of DOCUMENT.  SQL/XML:2006 and
1401  * later fix that, by redefining content with reference to the "more
1402  * permissive" Document Node of the XQuery/XPath Data Model, such that any
1403  * DOCUMENT value is indeed also a CONTENT value.  That definition is more
1404  * useful, as CONTENT becomes usable for parsing input of unknown form (think
1405  * pg_restore).
1406  *
1407  * As used below in parse_xml when parsing for CONTENT, libxml does not give
1408  * us the 2006+ behavior, but only the 2003; it will choke if the input has
1409  * a DTD.  But we can provide the 2006+ definition of CONTENT easily enough,
1410  * by detecting this case first and simply doing the parse as DOCUMENT.
1411  *
1412  * A DTD can be found arbitrarily far in, but that would be a contrived case;
1413  * it will ordinarily start within a few dozen characters.  The only things
1414  * that can precede it are an XMLDecl (here, the caller will have called
1415  * parse_xml_decl already), whitespace, comments, and processing instructions.
1416  * This function need only return true if it sees a valid sequence of such
1417  * things leading to <!DOCTYPE.  It can simply return false in any other
1418  * cases, including malformed input; that will mean the input gets parsed as
1419  * CONTENT as originally planned, with libxml reporting any errors.
1420  *
1421  * This is only to be called from xml_parse, when pg_xml_init has already
1422  * been called.  The input is already in UTF8 encoding.
1423  */
1424 static bool
xml_doctype_in_content(const xmlChar * str)1425 xml_doctype_in_content(const xmlChar *str)
1426 {
1427 	const xmlChar *p = str;
1428 
1429 	for (;;)
1430 	{
1431 		const xmlChar *e;
1432 
1433 		SKIP_XML_SPACE(p);
1434 		if (*p != '<')
1435 			return false;
1436 		p++;
1437 
1438 		if (*p == '!')
1439 		{
1440 			p++;
1441 
1442 			/* if we see <!DOCTYPE, we can return true */
1443 			if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1444 				return true;
1445 
1446 			/* otherwise, if it's not a comment, fail */
1447 			if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1448 				return false;
1449 			/* find end of comment: find -- and a > must follow */
1450 			p = xmlStrstr(p + 2, (xmlChar *) "--");
1451 			if (!p || p[2] != '>')
1452 				return false;
1453 			/* advance over comment, and keep scanning */
1454 			p += 3;
1455 			continue;
1456 		}
1457 
1458 		/* otherwise, if it's not a PI <?target something?>, fail */
1459 		if (*p != '?')
1460 			return false;
1461 		p++;
1462 
1463 		/* find end of PI (the string ?> is forbidden within a PI) */
1464 		e = xmlStrstr(p, (xmlChar *) "?>");
1465 		if (!e)
1466 			return false;
1467 
1468 		/* advance over PI, keep scanning */
1469 		p = e + 2;
1470 	}
1471 }
1472 
1473 
1474 /*
1475  * Convert a C string to XML internal representation
1476  *
1477  * Note: it is caller's responsibility to xmlFreeDoc() the result,
1478  * else a permanent memory leak will ensue!
1479  *
1480  * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1481  * yet do not use SAX - see xmlreader.c)
1482  */
1483 static xmlDocPtr
xml_parse(text * data,XmlOptionType xmloption_arg,bool preserve_whitespace,int encoding)1484 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1485 		  int encoding)
1486 {
1487 	int32		len;
1488 	xmlChar    *string;
1489 	xmlChar    *utf8string;
1490 	PgXmlErrorContext *xmlerrcxt;
1491 	volatile xmlParserCtxtPtr ctxt = NULL;
1492 	volatile xmlDocPtr doc = NULL;
1493 
1494 	len = VARSIZE_ANY_EXHDR(data);	/* will be useful later */
1495 	string = xml_text2xmlChar(data);
1496 
1497 	utf8string = pg_do_encoding_conversion(string,
1498 										   len,
1499 										   encoding,
1500 										   PG_UTF8);
1501 
1502 	/* Start up libxml and its parser */
1503 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1504 
1505 	/* Use a TRY block to ensure we clean up correctly */
1506 	PG_TRY();
1507 	{
1508 		bool		parse_as_document = false;
1509 		int			res_code;
1510 		size_t		count = 0;
1511 		xmlChar    *version = NULL;
1512 		int			standalone = 0;
1513 
1514 		xmlInitParser();
1515 
1516 		ctxt = xmlNewParserCtxt();
1517 		if (ctxt == NULL || xmlerrcxt->err_occurred)
1518 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1519 						"could not allocate parser context");
1520 
1521 		/* Decide whether to parse as document or content */
1522 		if (xmloption_arg == XMLOPTION_DOCUMENT)
1523 			parse_as_document = true;
1524 		else
1525 		{
1526 			/* Parse and skip over the XML declaration, if any */
1527 			res_code = parse_xml_decl(utf8string,
1528 									  &count, &version, NULL, &standalone);
1529 			if (res_code != 0)
1530 				xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1531 									"invalid XML content: invalid XML declaration",
1532 									res_code);
1533 
1534 			/* Is there a DOCTYPE element? */
1535 			if (xml_doctype_in_content(utf8string + count))
1536 				parse_as_document = true;
1537 		}
1538 
1539 		if (parse_as_document)
1540 		{
1541 			/*
1542 			 * Note, that here we try to apply DTD defaults
1543 			 * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1544 			 * 'Default values defined by internal DTD are applied'. As for
1545 			 * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1546 			 * 10.16.7.e)
1547 			 */
1548 			doc = xmlCtxtReadDoc(ctxt, utf8string,
1549 								 NULL,
1550 								 "UTF-8",
1551 								 XML_PARSE_NOENT | XML_PARSE_DTDATTR
1552 								 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1553 			if (doc == NULL || xmlerrcxt->err_occurred)
1554 			{
1555 				/* Use original option to decide which error code to throw */
1556 				if (xmloption_arg == XMLOPTION_DOCUMENT)
1557 					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1558 								"invalid XML document");
1559 				else
1560 					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1561 								"invalid XML content");
1562 			}
1563 		}
1564 		else
1565 		{
1566 			doc = xmlNewDoc(version);
1567 			Assert(doc->encoding == NULL);
1568 			doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1569 			doc->standalone = standalone;
1570 
1571 			/* allow empty content */
1572 			if (*(utf8string + count))
1573 			{
1574 				res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1575 													   utf8string + count, NULL);
1576 				if (res_code != 0 || xmlerrcxt->err_occurred)
1577 					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1578 								"invalid XML content");
1579 			}
1580 		}
1581 	}
1582 	PG_CATCH();
1583 	{
1584 		if (doc != NULL)
1585 			xmlFreeDoc(doc);
1586 		if (ctxt != NULL)
1587 			xmlFreeParserCtxt(ctxt);
1588 
1589 		pg_xml_done(xmlerrcxt, true);
1590 
1591 		PG_RE_THROW();
1592 	}
1593 	PG_END_TRY();
1594 
1595 	xmlFreeParserCtxt(ctxt);
1596 
1597 	pg_xml_done(xmlerrcxt, false);
1598 
1599 	return doc;
1600 }
1601 
1602 
1603 /*
1604  * xmlChar<->text conversions
1605  */
1606 static xmlChar *
xml_text2xmlChar(text * in)1607 xml_text2xmlChar(text *in)
1608 {
1609 	return (xmlChar *) text_to_cstring(in);
1610 }
1611 
1612 
1613 #ifdef USE_LIBXMLCONTEXT
1614 
1615 /*
1616  * Manage the special context used for all libxml allocations (but only
1617  * in special debug builds; see notes at top of file)
1618  */
1619 static void
xml_memory_init(void)1620 xml_memory_init(void)
1621 {
1622 	/* Create memory context if not there already */
1623 	if (LibxmlContext == NULL)
1624 		LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1625 											  "Libxml context",
1626 											  ALLOCSET_DEFAULT_SIZES);
1627 
1628 	/* Re-establish the callbacks even if already set */
1629 	xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1630 }
1631 
1632 /*
1633  * Wrappers for memory management functions
1634  */
1635 static void *
xml_palloc(size_t size)1636 xml_palloc(size_t size)
1637 {
1638 	return MemoryContextAlloc(LibxmlContext, size);
1639 }
1640 
1641 
1642 static void *
xml_repalloc(void * ptr,size_t size)1643 xml_repalloc(void *ptr, size_t size)
1644 {
1645 	return repalloc(ptr, size);
1646 }
1647 
1648 
1649 static void
xml_pfree(void * ptr)1650 xml_pfree(void *ptr)
1651 {
1652 	/* At least some parts of libxml assume xmlFree(NULL) is allowed */
1653 	if (ptr)
1654 		pfree(ptr);
1655 }
1656 
1657 
1658 static char *
xml_pstrdup(const char * string)1659 xml_pstrdup(const char *string)
1660 {
1661 	return MemoryContextStrdup(LibxmlContext, string);
1662 }
1663 #endif							/* USE_LIBXMLCONTEXT */
1664 
1665 
1666 /*
1667  * xmlPgEntityLoader --- entity loader callback function
1668  *
1669  * Silently prevent any external entity URL from being loaded.  We don't want
1670  * to throw an error, so instead make the entity appear to expand to an empty
1671  * string.
1672  *
1673  * We would prefer to allow loading entities that exist in the system's
1674  * global XML catalog; but the available libxml2 APIs make that a complex
1675  * and fragile task.  For now, just shut down all external access.
1676  */
1677 static xmlParserInputPtr
xmlPgEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)1678 xmlPgEntityLoader(const char *URL, const char *ID,
1679 				  xmlParserCtxtPtr ctxt)
1680 {
1681 	return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1682 }
1683 
1684 
1685 /*
1686  * xml_ereport --- report an XML-related error
1687  *
1688  * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1689  * standard.  This function adds libxml's native error message, if any, as
1690  * detail.
1691  *
1692  * This is exported for modules that want to share the core libxml error
1693  * handler.  Note that pg_xml_init() *must* have been called previously.
1694  */
1695 void
xml_ereport(PgXmlErrorContext * errcxt,int level,int sqlcode,const char * msg)1696 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1697 {
1698 	char	   *detail;
1699 
1700 	/* Defend against someone passing us a bogus context struct */
1701 	if (errcxt->magic != ERRCXT_MAGIC)
1702 		elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1703 
1704 	/* Flag that the current libxml error has been reported */
1705 	errcxt->err_occurred = false;
1706 
1707 	/* Include detail only if we have some text from libxml */
1708 	if (errcxt->err_buf.len > 0)
1709 		detail = errcxt->err_buf.data;
1710 	else
1711 		detail = NULL;
1712 
1713 	ereport(level,
1714 			(errcode(sqlcode),
1715 			 errmsg_internal("%s", msg),
1716 			 detail ? errdetail_internal("%s", detail) : 0));
1717 }
1718 
1719 
1720 /*
1721  * Error handler for libxml errors and warnings
1722  */
1723 static void
xml_errorHandler(void * data,xmlErrorPtr error)1724 xml_errorHandler(void *data, xmlErrorPtr error)
1725 {
1726 	PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1727 	xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1728 	xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1729 	xmlNodePtr	node = error->node;
1730 	const xmlChar *name = (node != NULL &&
1731 						   node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1732 	int			domain = error->domain;
1733 	int			level = error->level;
1734 	StringInfo	errorBuf;
1735 
1736 	/*
1737 	 * Defend against someone passing us a bogus context struct.
1738 	 *
1739 	 * We force a backend exit if this check fails because longjmp'ing out of
1740 	 * libxml would likely render it unsafe to use further.
1741 	 */
1742 	if (xmlerrcxt->magic != ERRCXT_MAGIC)
1743 		elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1744 
1745 	/*----------
1746 	 * Older libxml versions report some errors differently.
1747 	 * First, some errors were previously reported as coming from the parser
1748 	 * domain but are now reported as coming from the namespace domain.
1749 	 * Second, some warnings were upgraded to errors.
1750 	 * We attempt to compensate for that here.
1751 	 *----------
1752 	 */
1753 	switch (error->code)
1754 	{
1755 		case XML_WAR_NS_URI:
1756 			level = XML_ERR_ERROR;
1757 			domain = XML_FROM_NAMESPACE;
1758 			break;
1759 
1760 		case XML_ERR_NS_DECL_ERROR:
1761 		case XML_WAR_NS_URI_RELATIVE:
1762 		case XML_WAR_NS_COLUMN:
1763 		case XML_NS_ERR_XML_NAMESPACE:
1764 		case XML_NS_ERR_UNDEFINED_NAMESPACE:
1765 		case XML_NS_ERR_QNAME:
1766 		case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1767 		case XML_NS_ERR_EMPTY:
1768 			domain = XML_FROM_NAMESPACE;
1769 			break;
1770 	}
1771 
1772 	/* Decide whether to act on the error or not */
1773 	switch (domain)
1774 	{
1775 		case XML_FROM_PARSER:
1776 		case XML_FROM_NONE:
1777 		case XML_FROM_MEMORY:
1778 		case XML_FROM_IO:
1779 
1780 			/*
1781 			 * Suppress warnings about undeclared entities.  We need to do
1782 			 * this to avoid problems due to not loading DTD definitions.
1783 			 */
1784 			if (error->code == XML_WAR_UNDECLARED_ENTITY)
1785 				return;
1786 
1787 			/* Otherwise, accept error regardless of the parsing purpose */
1788 			break;
1789 
1790 		default:
1791 			/* Ignore error if only doing well-formedness check */
1792 			if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1793 				return;
1794 			break;
1795 	}
1796 
1797 	/* Prepare error message in errorBuf */
1798 	errorBuf = makeStringInfo();
1799 
1800 	if (error->line > 0)
1801 		appendStringInfo(errorBuf, "line %d: ", error->line);
1802 	if (name != NULL)
1803 		appendStringInfo(errorBuf, "element %s: ", name);
1804 	if (error->message != NULL)
1805 		appendStringInfoString(errorBuf, error->message);
1806 	else
1807 		appendStringInfoString(errorBuf, "(no message provided)");
1808 
1809 	/*
1810 	 * Append context information to errorBuf.
1811 	 *
1812 	 * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1813 	 * write the context.  Since we don't want to duplicate libxml
1814 	 * functionality here, we set up a generic error handler temporarily.
1815 	 *
1816 	 * We use appendStringInfo() directly as libxml's generic error handler.
1817 	 * This should work because it has essentially the same signature as
1818 	 * libxml expects, namely (void *ptr, const char *msg, ...).
1819 	 */
1820 	if (input != NULL)
1821 	{
1822 		xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1823 		void	   *errCtxSaved = xmlGenericErrorContext;
1824 
1825 		xmlSetGenericErrorFunc((void *) errorBuf,
1826 							   (xmlGenericErrorFunc) appendStringInfo);
1827 
1828 		/* Add context information to errorBuf */
1829 		appendStringInfoLineSeparator(errorBuf);
1830 
1831 		xmlParserPrintFileContext(input);
1832 
1833 		/* Restore generic error func */
1834 		xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1835 	}
1836 
1837 	/* Get rid of any trailing newlines in errorBuf */
1838 	chopStringInfoNewlines(errorBuf);
1839 
1840 	/*
1841 	 * Legacy error handling mode.  err_occurred is never set, we just add the
1842 	 * message to err_buf.  This mode exists because the xml2 contrib module
1843 	 * uses our error-handling infrastructure, but we don't want to change its
1844 	 * behaviour since it's deprecated anyway.  This is also why we don't
1845 	 * distinguish between notices, warnings and errors here --- the old-style
1846 	 * generic error handler wouldn't have done that either.
1847 	 */
1848 	if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1849 	{
1850 		appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1851 		appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1852 
1853 		pfree(errorBuf->data);
1854 		pfree(errorBuf);
1855 		return;
1856 	}
1857 
1858 	/*
1859 	 * We don't want to ereport() here because that'd probably leave libxml in
1860 	 * an inconsistent state.  Instead, we remember the error and ereport()
1861 	 * from xml_ereport().
1862 	 *
1863 	 * Warnings and notices can be reported immediately since they won't cause
1864 	 * a longjmp() out of libxml.
1865 	 */
1866 	if (level >= XML_ERR_ERROR)
1867 	{
1868 		appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1869 		appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1870 
1871 		xmlerrcxt->err_occurred = true;
1872 	}
1873 	else if (level >= XML_ERR_WARNING)
1874 	{
1875 		ereport(WARNING,
1876 				(errmsg_internal("%s", errorBuf->data)));
1877 	}
1878 	else
1879 	{
1880 		ereport(NOTICE,
1881 				(errmsg_internal("%s", errorBuf->data)));
1882 	}
1883 
1884 	pfree(errorBuf->data);
1885 	pfree(errorBuf);
1886 }
1887 
1888 
1889 /*
1890  * Wrapper for "ereport" function for XML-related errors.  The "msg"
1891  * is the SQL-level message; some can be adopted from the SQL/XML
1892  * standard.  This function uses "code" to create a textual detail
1893  * message.  At the moment, we only need to cover those codes that we
1894  * may raise in this file.
1895  */
1896 static void
xml_ereport_by_code(int level,int sqlcode,const char * msg,int code)1897 xml_ereport_by_code(int level, int sqlcode,
1898 					const char *msg, int code)
1899 {
1900 	const char *det;
1901 
1902 	switch (code)
1903 	{
1904 		case XML_ERR_INVALID_CHAR:
1905 			det = gettext_noop("Invalid character value.");
1906 			break;
1907 		case XML_ERR_SPACE_REQUIRED:
1908 			det = gettext_noop("Space required.");
1909 			break;
1910 		case XML_ERR_STANDALONE_VALUE:
1911 			det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1912 			break;
1913 		case XML_ERR_VERSION_MISSING:
1914 			det = gettext_noop("Malformed declaration: missing version.");
1915 			break;
1916 		case XML_ERR_MISSING_ENCODING:
1917 			det = gettext_noop("Missing encoding in text declaration.");
1918 			break;
1919 		case XML_ERR_XMLDECL_NOT_FINISHED:
1920 			det = gettext_noop("Parsing XML declaration: '?>' expected.");
1921 			break;
1922 		default:
1923 			det = gettext_noop("Unrecognized libxml error code: %d.");
1924 			break;
1925 	}
1926 
1927 	ereport(level,
1928 			(errcode(sqlcode),
1929 			 errmsg_internal("%s", msg),
1930 			 errdetail(det, code)));
1931 }
1932 
1933 
1934 /*
1935  * Remove all trailing newlines from a StringInfo string
1936  */
1937 static void
chopStringInfoNewlines(StringInfo str)1938 chopStringInfoNewlines(StringInfo str)
1939 {
1940 	while (str->len > 0 && str->data[str->len - 1] == '\n')
1941 		str->data[--str->len] = '\0';
1942 }
1943 
1944 
1945 /*
1946  * Append a newline after removing any existing trailing newlines
1947  */
1948 static void
appendStringInfoLineSeparator(StringInfo str)1949 appendStringInfoLineSeparator(StringInfo str)
1950 {
1951 	chopStringInfoNewlines(str);
1952 	if (str->len > 0)
1953 		appendStringInfoChar(str, '\n');
1954 }
1955 
1956 
1957 /*
1958  * Convert one char in the current server encoding to a Unicode codepoint.
1959  */
1960 static pg_wchar
sqlchar_to_unicode(const char * s)1961 sqlchar_to_unicode(const char *s)
1962 {
1963 	char	   *utf8string;
1964 	pg_wchar	ret[2];			/* need space for trailing zero */
1965 
1966 	/* note we're not assuming s is null-terminated */
1967 	utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1968 
1969 	pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
1970 								  pg_encoding_mblen(PG_UTF8, utf8string));
1971 
1972 	if (utf8string != s)
1973 		pfree(utf8string);
1974 
1975 	return ret[0];
1976 }
1977 
1978 
1979 static bool
is_valid_xml_namefirst(pg_wchar c)1980 is_valid_xml_namefirst(pg_wchar c)
1981 {
1982 	/* (Letter | '_' | ':') */
1983 	return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1984 			|| c == '_' || c == ':');
1985 }
1986 
1987 
1988 static bool
is_valid_xml_namechar(pg_wchar c)1989 is_valid_xml_namechar(pg_wchar c)
1990 {
1991 	/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1992 	return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1993 			|| xmlIsDigitQ(c)
1994 			|| c == '.' || c == '-' || c == '_' || c == ':'
1995 			|| xmlIsCombiningQ(c)
1996 			|| xmlIsExtenderQ(c));
1997 }
1998 #endif							/* USE_LIBXML */
1999 
2000 
2001 /*
2002  * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2003  */
2004 char *
map_sql_identifier_to_xml_name(const char * ident,bool fully_escaped,bool escape_period)2005 map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2006 							   bool escape_period)
2007 {
2008 #ifdef USE_LIBXML
2009 	StringInfoData buf;
2010 	const char *p;
2011 
2012 	/*
2013 	 * SQL/XML doesn't make use of this case anywhere, so it's probably a
2014 	 * mistake.
2015 	 */
2016 	Assert(fully_escaped || !escape_period);
2017 
2018 	initStringInfo(&buf);
2019 
2020 	for (p = ident; *p; p += pg_mblen(p))
2021 	{
2022 		if (*p == ':' && (p == ident || fully_escaped))
2023 			appendStringInfoString(&buf, "_x003A_");
2024 		else if (*p == '_' && *(p + 1) == 'x')
2025 			appendStringInfoString(&buf, "_x005F_");
2026 		else if (fully_escaped && p == ident &&
2027 				 pg_strncasecmp(p, "xml", 3) == 0)
2028 		{
2029 			if (*p == 'x')
2030 				appendStringInfoString(&buf, "_x0078_");
2031 			else
2032 				appendStringInfoString(&buf, "_x0058_");
2033 		}
2034 		else if (escape_period && *p == '.')
2035 			appendStringInfoString(&buf, "_x002E_");
2036 		else
2037 		{
2038 			pg_wchar	u = sqlchar_to_unicode(p);
2039 
2040 			if ((p == ident)
2041 				? !is_valid_xml_namefirst(u)
2042 				: !is_valid_xml_namechar(u))
2043 				appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2044 			else
2045 				appendBinaryStringInfo(&buf, p, pg_mblen(p));
2046 		}
2047 	}
2048 
2049 	return buf.data;
2050 #else							/* not USE_LIBXML */
2051 	NO_XML_SUPPORT();
2052 	return NULL;
2053 #endif							/* not USE_LIBXML */
2054 }
2055 
2056 
2057 /*
2058  * Map a Unicode codepoint into the current server encoding.
2059  */
2060 static char *
unicode_to_sqlchar(pg_wchar c)2061 unicode_to_sqlchar(pg_wchar c)
2062 {
2063 	char		utf8string[8];	/* need room for trailing zero */
2064 	char	   *result;
2065 
2066 	memset(utf8string, 0, sizeof(utf8string));
2067 	unicode_to_utf8(c, (unsigned char *) utf8string);
2068 
2069 	result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
2070 	/* if pg_any_to_server didn't strdup, we must */
2071 	if (result == utf8string)
2072 		result = pstrdup(result);
2073 	return result;
2074 }
2075 
2076 
2077 /*
2078  * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2079  */
2080 char *
map_xml_name_to_sql_identifier(const char * name)2081 map_xml_name_to_sql_identifier(const char *name)
2082 {
2083 	StringInfoData buf;
2084 	const char *p;
2085 
2086 	initStringInfo(&buf);
2087 
2088 	for (p = name; *p; p += pg_mblen(p))
2089 	{
2090 		if (*p == '_' && *(p + 1) == 'x'
2091 			&& isxdigit((unsigned char) *(p + 2))
2092 			&& isxdigit((unsigned char) *(p + 3))
2093 			&& isxdigit((unsigned char) *(p + 4))
2094 			&& isxdigit((unsigned char) *(p + 5))
2095 			&& *(p + 6) == '_')
2096 		{
2097 			unsigned int u;
2098 
2099 			sscanf(p + 2, "%X", &u);
2100 			appendStringInfoString(&buf, unicode_to_sqlchar(u));
2101 			p += 6;
2102 		}
2103 		else
2104 			appendBinaryStringInfo(&buf, p, pg_mblen(p));
2105 	}
2106 
2107 	return buf.data;
2108 }
2109 
2110 /*
2111  * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2112  *
2113  * When xml_escape_strings is true, then certain characters in string
2114  * values are replaced by entity references (&lt; etc.), as specified
2115  * in SQL/XML:2008 section 9.8 GR 9) a) iii).   This is normally what is
2116  * wanted.  The false case is mainly useful when the resulting value
2117  * is used with xmlTextWriterWriteAttribute() to write out an
2118  * attribute, because that function does the escaping itself.
2119  */
2120 char *
map_sql_value_to_xml_value(Datum value,Oid type,bool xml_escape_strings)2121 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2122 {
2123 	if (type_is_array_domain(type))
2124 	{
2125 		ArrayType  *array;
2126 		Oid			elmtype;
2127 		int16		elmlen;
2128 		bool		elmbyval;
2129 		char		elmalign;
2130 		int			num_elems;
2131 		Datum	   *elem_values;
2132 		bool	   *elem_nulls;
2133 		StringInfoData buf;
2134 		int			i;
2135 
2136 		array = DatumGetArrayTypeP(value);
2137 		elmtype = ARR_ELEMTYPE(array);
2138 		get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2139 
2140 		deconstruct_array(array, elmtype,
2141 						  elmlen, elmbyval, elmalign,
2142 						  &elem_values, &elem_nulls,
2143 						  &num_elems);
2144 
2145 		initStringInfo(&buf);
2146 
2147 		for (i = 0; i < num_elems; i++)
2148 		{
2149 			if (elem_nulls[i])
2150 				continue;
2151 			appendStringInfoString(&buf, "<element>");
2152 			appendStringInfoString(&buf,
2153 								   map_sql_value_to_xml_value(elem_values[i],
2154 															  elmtype, true));
2155 			appendStringInfoString(&buf, "</element>");
2156 		}
2157 
2158 		pfree(elem_values);
2159 		pfree(elem_nulls);
2160 
2161 		return buf.data;
2162 	}
2163 	else
2164 	{
2165 		Oid			typeOut;
2166 		bool		isvarlena;
2167 		char	   *str;
2168 
2169 		/*
2170 		 * Flatten domains; the special-case treatments below should apply to,
2171 		 * eg, domains over boolean not just boolean.
2172 		 */
2173 		type = getBaseType(type);
2174 
2175 		/*
2176 		 * Special XSD formatting for some data types
2177 		 */
2178 		switch (type)
2179 		{
2180 			case BOOLOID:
2181 				if (DatumGetBool(value))
2182 					return "true";
2183 				else
2184 					return "false";
2185 
2186 			case DATEOID:
2187 				{
2188 					DateADT		date;
2189 					struct pg_tm tm;
2190 					char		buf[MAXDATELEN + 1];
2191 
2192 					date = DatumGetDateADT(value);
2193 					/* XSD doesn't support infinite values */
2194 					if (DATE_NOT_FINITE(date))
2195 						ereport(ERROR,
2196 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2197 								 errmsg("date out of range"),
2198 								 errdetail("XML does not support infinite date values.")));
2199 					j2date(date + POSTGRES_EPOCH_JDATE,
2200 						   &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2201 					EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2202 
2203 					return pstrdup(buf);
2204 				}
2205 
2206 			case TIMESTAMPOID:
2207 				{
2208 					Timestamp	timestamp;
2209 					struct pg_tm tm;
2210 					fsec_t		fsec;
2211 					char		buf[MAXDATELEN + 1];
2212 
2213 					timestamp = DatumGetTimestamp(value);
2214 
2215 					/* XSD doesn't support infinite values */
2216 					if (TIMESTAMP_NOT_FINITE(timestamp))
2217 						ereport(ERROR,
2218 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2219 								 errmsg("timestamp out of range"),
2220 								 errdetail("XML does not support infinite timestamp values.")));
2221 					else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2222 						EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2223 					else
2224 						ereport(ERROR,
2225 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2226 								 errmsg("timestamp out of range")));
2227 
2228 					return pstrdup(buf);
2229 				}
2230 
2231 			case TIMESTAMPTZOID:
2232 				{
2233 					TimestampTz timestamp;
2234 					struct pg_tm tm;
2235 					int			tz;
2236 					fsec_t		fsec;
2237 					const char *tzn = NULL;
2238 					char		buf[MAXDATELEN + 1];
2239 
2240 					timestamp = DatumGetTimestamp(value);
2241 
2242 					/* XSD doesn't support infinite values */
2243 					if (TIMESTAMP_NOT_FINITE(timestamp))
2244 						ereport(ERROR,
2245 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2246 								 errmsg("timestamp out of range"),
2247 								 errdetail("XML does not support infinite timestamp values.")));
2248 					else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2249 						EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2250 					else
2251 						ereport(ERROR,
2252 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2253 								 errmsg("timestamp out of range")));
2254 
2255 					return pstrdup(buf);
2256 				}
2257 
2258 #ifdef USE_LIBXML
2259 			case BYTEAOID:
2260 				{
2261 					bytea	   *bstr = DatumGetByteaPP(value);
2262 					PgXmlErrorContext *xmlerrcxt;
2263 					volatile xmlBufferPtr buf = NULL;
2264 					volatile xmlTextWriterPtr writer = NULL;
2265 					char	   *result;
2266 
2267 					xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2268 
2269 					PG_TRY();
2270 					{
2271 						buf = xmlBufferCreate();
2272 						if (buf == NULL || xmlerrcxt->err_occurred)
2273 							xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2274 										"could not allocate xmlBuffer");
2275 						writer = xmlNewTextWriterMemory(buf, 0);
2276 						if (writer == NULL || xmlerrcxt->err_occurred)
2277 							xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2278 										"could not allocate xmlTextWriter");
2279 
2280 						if (xmlbinary == XMLBINARY_BASE64)
2281 							xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2282 													 0, VARSIZE_ANY_EXHDR(bstr));
2283 						else
2284 							xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2285 													 0, VARSIZE_ANY_EXHDR(bstr));
2286 
2287 						/* we MUST do this now to flush data out to the buffer */
2288 						xmlFreeTextWriter(writer);
2289 						writer = NULL;
2290 
2291 						result = pstrdup((const char *) xmlBufferContent(buf));
2292 					}
2293 					PG_CATCH();
2294 					{
2295 						if (writer)
2296 							xmlFreeTextWriter(writer);
2297 						if (buf)
2298 							xmlBufferFree(buf);
2299 
2300 						pg_xml_done(xmlerrcxt, true);
2301 
2302 						PG_RE_THROW();
2303 					}
2304 					PG_END_TRY();
2305 
2306 					xmlBufferFree(buf);
2307 
2308 					pg_xml_done(xmlerrcxt, false);
2309 
2310 					return result;
2311 				}
2312 #endif							/* USE_LIBXML */
2313 
2314 		}
2315 
2316 		/*
2317 		 * otherwise, just use the type's native text representation
2318 		 */
2319 		getTypeOutputInfo(type, &typeOut, &isvarlena);
2320 		str = OidOutputFunctionCall(typeOut, value);
2321 
2322 		/* ... exactly as-is for XML, and when escaping is not wanted */
2323 		if (type == XMLOID || !xml_escape_strings)
2324 			return str;
2325 
2326 		/* otherwise, translate special characters as needed */
2327 		return escape_xml(str);
2328 	}
2329 }
2330 
2331 
2332 /*
2333  * Escape characters in text that have special meanings in XML.
2334  *
2335  * Returns a palloc'd string.
2336  *
2337  * NB: this is intentionally not dependent on libxml.
2338  */
2339 char *
escape_xml(const char * str)2340 escape_xml(const char *str)
2341 {
2342 	StringInfoData buf;
2343 	const char *p;
2344 
2345 	initStringInfo(&buf);
2346 	for (p = str; *p; p++)
2347 	{
2348 		switch (*p)
2349 		{
2350 			case '&':
2351 				appendStringInfoString(&buf, "&amp;");
2352 				break;
2353 			case '<':
2354 				appendStringInfoString(&buf, "&lt;");
2355 				break;
2356 			case '>':
2357 				appendStringInfoString(&buf, "&gt;");
2358 				break;
2359 			case '\r':
2360 				appendStringInfoString(&buf, "&#x0d;");
2361 				break;
2362 			default:
2363 				appendStringInfoCharMacro(&buf, *p);
2364 				break;
2365 		}
2366 	}
2367 	return buf.data;
2368 }
2369 
2370 
2371 static char *
_SPI_strdup(const char * s)2372 _SPI_strdup(const char *s)
2373 {
2374 	size_t		len = strlen(s) + 1;
2375 	char	   *ret = SPI_palloc(len);
2376 
2377 	memcpy(ret, s, len);
2378 	return ret;
2379 }
2380 
2381 
2382 /*
2383  * SQL to XML mapping functions
2384  *
2385  * What follows below was at one point intentionally organized so that
2386  * you can read along in the SQL/XML standard. The functions are
2387  * mostly split up the way the clauses lay out in the standards
2388  * document, and the identifiers are also aligned with the standard
2389  * text.  Unfortunately, SQL/XML:2006 reordered the clauses
2390  * differently than SQL/XML:2003, so the order below doesn't make much
2391  * sense anymore.
2392  *
2393  * There are many things going on there:
2394  *
2395  * There are two kinds of mappings: Mapping SQL data (table contents)
2396  * to XML documents, and mapping SQL structure (the "schema") to XML
2397  * Schema.  And there are functions that do both at the same time.
2398  *
2399  * Then you can map a database, a schema, or a table, each in both
2400  * ways.  This breaks down recursively: Mapping a database invokes
2401  * mapping schemas, which invokes mapping tables, which invokes
2402  * mapping rows, which invokes mapping columns, although you can't
2403  * call the last two from the outside.  Because of this, there are a
2404  * number of xyz_internal() functions which are to be called both from
2405  * the function manager wrapper and from some upper layer in a
2406  * recursive call.
2407  *
2408  * See the documentation about what the common function arguments
2409  * nulls, tableforest, and targetns mean.
2410  *
2411  * Some style guidelines for XML output: Use double quotes for quoting
2412  * XML attributes.  Indent XML elements by two spaces, but remember
2413  * that a lot of code is called recursively at different levels, so
2414  * it's better not to indent rather than create output that indents
2415  * and outdents weirdly.  Add newlines to make the output look nice.
2416  */
2417 
2418 
2419 /*
2420  * Visibility of objects for XML mappings; see SQL/XML:2008 section
2421  * 4.10.8.
2422  */
2423 
2424 /*
2425  * Given a query, which must return type oid as first column, produce
2426  * a list of Oids with the query results.
2427  */
2428 static List *
query_to_oid_list(const char * query)2429 query_to_oid_list(const char *query)
2430 {
2431 	uint64		i;
2432 	List	   *list = NIL;
2433 
2434 	SPI_execute(query, true, 0);
2435 
2436 	for (i = 0; i < SPI_processed; i++)
2437 	{
2438 		Datum		oid;
2439 		bool		isnull;
2440 
2441 		oid = SPI_getbinval(SPI_tuptable->vals[i],
2442 							SPI_tuptable->tupdesc,
2443 							1,
2444 							&isnull);
2445 		if (!isnull)
2446 			list = lappend_oid(list, DatumGetObjectId(oid));
2447 	}
2448 
2449 	return list;
2450 }
2451 
2452 
2453 static List *
schema_get_xml_visible_tables(Oid nspid)2454 schema_get_xml_visible_tables(Oid nspid)
2455 {
2456 	StringInfoData query;
2457 
2458 	initStringInfo(&query);
2459 	appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2460 					 " WHERE relnamespace = %u AND relkind IN ("
2461 					 CppAsString2(RELKIND_RELATION) ","
2462 					 CppAsString2(RELKIND_MATVIEW) ","
2463 					 CppAsString2(RELKIND_VIEW) ")"
2464 					 " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2465 					 " ORDER BY relname;", nspid);
2466 
2467 	return query_to_oid_list(query.data);
2468 }
2469 
2470 
2471 /*
2472  * Including the system schemas is probably not useful for a database
2473  * mapping.
2474  */
2475 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2476 
2477 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2478 
2479 
2480 static List *
database_get_xml_visible_schemas(void)2481 database_get_xml_visible_schemas(void)
2482 {
2483 	return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2484 }
2485 
2486 
2487 static List *
database_get_xml_visible_tables(void)2488 database_get_xml_visible_tables(void)
2489 {
2490 	/* At the moment there is no order required here. */
2491 	return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2492 							 " WHERE relkind IN ("
2493 							 CppAsString2(RELKIND_RELATION) ","
2494 							 CppAsString2(RELKIND_MATVIEW) ","
2495 							 CppAsString2(RELKIND_VIEW) ")"
2496 							 " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2497 							 " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2498 }
2499 
2500 
2501 /*
2502  * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2503  * section 9.11.
2504  */
2505 
2506 static StringInfo
table_to_xml_internal(Oid relid,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2507 table_to_xml_internal(Oid relid,
2508 					  const char *xmlschema, bool nulls, bool tableforest,
2509 					  const char *targetns, bool top_level)
2510 {
2511 	StringInfoData query;
2512 
2513 	initStringInfo(&query);
2514 	appendStringInfo(&query, "SELECT * FROM %s",
2515 					 DatumGetCString(DirectFunctionCall1(regclassout,
2516 														 ObjectIdGetDatum(relid))));
2517 	return query_to_xml_internal(query.data, get_rel_name(relid),
2518 								 xmlschema, nulls, tableforest,
2519 								 targetns, top_level);
2520 }
2521 
2522 
2523 Datum
table_to_xml(PG_FUNCTION_ARGS)2524 table_to_xml(PG_FUNCTION_ARGS)
2525 {
2526 	Oid			relid = PG_GETARG_OID(0);
2527 	bool		nulls = PG_GETARG_BOOL(1);
2528 	bool		tableforest = PG_GETARG_BOOL(2);
2529 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2530 
2531 	PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2532 																nulls, tableforest,
2533 																targetns, true)));
2534 }
2535 
2536 
2537 Datum
query_to_xml(PG_FUNCTION_ARGS)2538 query_to_xml(PG_FUNCTION_ARGS)
2539 {
2540 	char	   *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2541 	bool		nulls = PG_GETARG_BOOL(1);
2542 	bool		tableforest = PG_GETARG_BOOL(2);
2543 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2544 
2545 	PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2546 																NULL, nulls, tableforest,
2547 																targetns, true)));
2548 }
2549 
2550 
2551 Datum
cursor_to_xml(PG_FUNCTION_ARGS)2552 cursor_to_xml(PG_FUNCTION_ARGS)
2553 {
2554 	char	   *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2555 	int32		count = PG_GETARG_INT32(1);
2556 	bool		nulls = PG_GETARG_BOOL(2);
2557 	bool		tableforest = PG_GETARG_BOOL(3);
2558 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2559 
2560 	StringInfoData result;
2561 	Portal		portal;
2562 	uint64		i;
2563 
2564 	initStringInfo(&result);
2565 
2566 	if (!tableforest)
2567 	{
2568 		xmldata_root_element_start(&result, "table", NULL, targetns, true);
2569 		appendStringInfoChar(&result, '\n');
2570 	}
2571 
2572 	SPI_connect();
2573 	portal = SPI_cursor_find(name);
2574 	if (portal == NULL)
2575 		ereport(ERROR,
2576 				(errcode(ERRCODE_UNDEFINED_CURSOR),
2577 				 errmsg("cursor \"%s\" does not exist", name)));
2578 
2579 	SPI_cursor_fetch(portal, true, count);
2580 	for (i = 0; i < SPI_processed; i++)
2581 		SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2582 								  tableforest, targetns, true);
2583 
2584 	SPI_finish();
2585 
2586 	if (!tableforest)
2587 		xmldata_root_element_end(&result, "table");
2588 
2589 	PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2590 }
2591 
2592 
2593 /*
2594  * Write the start tag of the root element of a data mapping.
2595  *
2596  * top_level means that this is the very top level of the eventual
2597  * output.  For example, when the user calls table_to_xml, then a call
2598  * with a table name to this function is the top level.  When the user
2599  * calls database_to_xml, then a call with a schema name to this
2600  * function is not the top level.  If top_level is false, then the XML
2601  * namespace declarations are omitted, because they supposedly already
2602  * appeared earlier in the output.  Repeating them is not wrong, but
2603  * it looks ugly.
2604  */
2605 static void
xmldata_root_element_start(StringInfo result,const char * eltname,const char * xmlschema,const char * targetns,bool top_level)2606 xmldata_root_element_start(StringInfo result, const char *eltname,
2607 						   const char *xmlschema, const char *targetns,
2608 						   bool top_level)
2609 {
2610 	/* This isn't really wrong but currently makes no sense. */
2611 	Assert(top_level || !xmlschema);
2612 
2613 	appendStringInfo(result, "<%s", eltname);
2614 	if (top_level)
2615 	{
2616 		appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2617 		if (strlen(targetns) > 0)
2618 			appendStringInfo(result, " xmlns=\"%s\"", targetns);
2619 	}
2620 	if (xmlschema)
2621 	{
2622 		/* FIXME: better targets */
2623 		if (strlen(targetns) > 0)
2624 			appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2625 		else
2626 			appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2627 	}
2628 	appendStringInfoString(result, ">\n");
2629 }
2630 
2631 
2632 static void
xmldata_root_element_end(StringInfo result,const char * eltname)2633 xmldata_root_element_end(StringInfo result, const char *eltname)
2634 {
2635 	appendStringInfo(result, "</%s>\n", eltname);
2636 }
2637 
2638 
2639 static StringInfo
query_to_xml_internal(const char * query,char * tablename,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2640 query_to_xml_internal(const char *query, char *tablename,
2641 					  const char *xmlschema, bool nulls, bool tableforest,
2642 					  const char *targetns, bool top_level)
2643 {
2644 	StringInfo	result;
2645 	char	   *xmltn;
2646 	uint64		i;
2647 
2648 	if (tablename)
2649 		xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2650 	else
2651 		xmltn = "table";
2652 
2653 	result = makeStringInfo();
2654 
2655 	SPI_connect();
2656 	if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2657 		ereport(ERROR,
2658 				(errcode(ERRCODE_DATA_EXCEPTION),
2659 				 errmsg("invalid query")));
2660 
2661 	if (!tableforest)
2662 	{
2663 		xmldata_root_element_start(result, xmltn, xmlschema,
2664 								   targetns, top_level);
2665 		appendStringInfoChar(result, '\n');
2666 	}
2667 
2668 	if (xmlschema)
2669 		appendStringInfo(result, "%s\n\n", xmlschema);
2670 
2671 	for (i = 0; i < SPI_processed; i++)
2672 		SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2673 								  tableforest, targetns, top_level);
2674 
2675 	if (!tableforest)
2676 		xmldata_root_element_end(result, xmltn);
2677 
2678 	SPI_finish();
2679 
2680 	return result;
2681 }
2682 
2683 
2684 Datum
table_to_xmlschema(PG_FUNCTION_ARGS)2685 table_to_xmlschema(PG_FUNCTION_ARGS)
2686 {
2687 	Oid			relid = PG_GETARG_OID(0);
2688 	bool		nulls = PG_GETARG_BOOL(1);
2689 	bool		tableforest = PG_GETARG_BOOL(2);
2690 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2691 	const char *result;
2692 	Relation	rel;
2693 
2694 	rel = heap_open(relid, AccessShareLock);
2695 	result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2696 										tableforest, targetns);
2697 	heap_close(rel, NoLock);
2698 
2699 	PG_RETURN_XML_P(cstring_to_xmltype(result));
2700 }
2701 
2702 
2703 Datum
query_to_xmlschema(PG_FUNCTION_ARGS)2704 query_to_xmlschema(PG_FUNCTION_ARGS)
2705 {
2706 	char	   *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2707 	bool		nulls = PG_GETARG_BOOL(1);
2708 	bool		tableforest = PG_GETARG_BOOL(2);
2709 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2710 	const char *result;
2711 	SPIPlanPtr	plan;
2712 	Portal		portal;
2713 
2714 	SPI_connect();
2715 
2716 	if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2717 		elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2718 
2719 	if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2720 		elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2721 
2722 	result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2723 													InvalidOid, nulls,
2724 													tableforest, targetns));
2725 	SPI_cursor_close(portal);
2726 	SPI_finish();
2727 
2728 	PG_RETURN_XML_P(cstring_to_xmltype(result));
2729 }
2730 
2731 
2732 Datum
cursor_to_xmlschema(PG_FUNCTION_ARGS)2733 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2734 {
2735 	char	   *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2736 	bool		nulls = PG_GETARG_BOOL(1);
2737 	bool		tableforest = PG_GETARG_BOOL(2);
2738 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2739 	const char *xmlschema;
2740 	Portal		portal;
2741 
2742 	SPI_connect();
2743 	portal = SPI_cursor_find(name);
2744 	if (portal == NULL)
2745 		ereport(ERROR,
2746 				(errcode(ERRCODE_UNDEFINED_CURSOR),
2747 				 errmsg("cursor \"%s\" does not exist", name)));
2748 
2749 	xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2750 													   InvalidOid, nulls,
2751 													   tableforest, targetns));
2752 	SPI_finish();
2753 
2754 	PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2755 }
2756 
2757 
2758 Datum
table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2759 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2760 {
2761 	Oid			relid = PG_GETARG_OID(0);
2762 	bool		nulls = PG_GETARG_BOOL(1);
2763 	bool		tableforest = PG_GETARG_BOOL(2);
2764 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2765 	Relation	rel;
2766 	const char *xmlschema;
2767 
2768 	rel = heap_open(relid, AccessShareLock);
2769 	xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2770 										   tableforest, targetns);
2771 	heap_close(rel, NoLock);
2772 
2773 	PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2774 																xmlschema, nulls, tableforest,
2775 																targetns, true)));
2776 }
2777 
2778 
2779 Datum
query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2780 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2781 {
2782 	char	   *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2783 	bool		nulls = PG_GETARG_BOOL(1);
2784 	bool		tableforest = PG_GETARG_BOOL(2);
2785 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2786 
2787 	const char *xmlschema;
2788 	SPIPlanPtr	plan;
2789 	Portal		portal;
2790 
2791 	SPI_connect();
2792 
2793 	if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2794 		elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2795 
2796 	if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2797 		elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2798 
2799 	xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2800 													   InvalidOid, nulls, tableforest, targetns));
2801 	SPI_cursor_close(portal);
2802 	SPI_finish();
2803 
2804 	PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2805 																xmlschema, nulls, tableforest,
2806 																targetns, true)));
2807 }
2808 
2809 
2810 /*
2811  * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2812  * sections 9.13, 9.14.
2813  */
2814 
2815 static StringInfo
schema_to_xml_internal(Oid nspid,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2816 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2817 					   bool tableforest, const char *targetns, bool top_level)
2818 {
2819 	StringInfo	result;
2820 	char	   *xmlsn;
2821 	List	   *relid_list;
2822 	ListCell   *cell;
2823 
2824 	xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2825 										   true, false);
2826 	result = makeStringInfo();
2827 
2828 	xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2829 	appendStringInfoChar(result, '\n');
2830 
2831 	if (xmlschema)
2832 		appendStringInfo(result, "%s\n\n", xmlschema);
2833 
2834 	SPI_connect();
2835 
2836 	relid_list = schema_get_xml_visible_tables(nspid);
2837 
2838 	foreach(cell, relid_list)
2839 	{
2840 		Oid			relid = lfirst_oid(cell);
2841 		StringInfo	subres;
2842 
2843 		subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2844 									   targetns, false);
2845 
2846 		appendStringInfoString(result, subres->data);
2847 		appendStringInfoChar(result, '\n');
2848 	}
2849 
2850 	SPI_finish();
2851 
2852 	xmldata_root_element_end(result, xmlsn);
2853 
2854 	return result;
2855 }
2856 
2857 
2858 Datum
schema_to_xml(PG_FUNCTION_ARGS)2859 schema_to_xml(PG_FUNCTION_ARGS)
2860 {
2861 	Name		name = PG_GETARG_NAME(0);
2862 	bool		nulls = PG_GETARG_BOOL(1);
2863 	bool		tableforest = PG_GETARG_BOOL(2);
2864 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2865 
2866 	char	   *schemaname;
2867 	Oid			nspid;
2868 
2869 	schemaname = NameStr(*name);
2870 	nspid = LookupExplicitNamespace(schemaname, false);
2871 
2872 	PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2873 																 nulls, tableforest, targetns, true)));
2874 }
2875 
2876 
2877 /*
2878  * Write the start element of the root element of an XML Schema mapping.
2879  */
2880 static void
xsd_schema_element_start(StringInfo result,const char * targetns)2881 xsd_schema_element_start(StringInfo result, const char *targetns)
2882 {
2883 	appendStringInfoString(result,
2884 						   "<xsd:schema\n"
2885 						   "    xmlns:xsd=\"" NAMESPACE_XSD "\"");
2886 	if (strlen(targetns) > 0)
2887 		appendStringInfo(result,
2888 						 "\n"
2889 						 "    targetNamespace=\"%s\"\n"
2890 						 "    elementFormDefault=\"qualified\"",
2891 						 targetns);
2892 	appendStringInfoString(result,
2893 						   ">\n\n");
2894 }
2895 
2896 
2897 static void
xsd_schema_element_end(StringInfo result)2898 xsd_schema_element_end(StringInfo result)
2899 {
2900 	appendStringInfoString(result, "</xsd:schema>");
2901 }
2902 
2903 
2904 static StringInfo
schema_to_xmlschema_internal(const char * schemaname,bool nulls,bool tableforest,const char * targetns)2905 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2906 							 bool tableforest, const char *targetns)
2907 {
2908 	Oid			nspid;
2909 	List	   *relid_list;
2910 	List	   *tupdesc_list;
2911 	ListCell   *cell;
2912 	StringInfo	result;
2913 
2914 	result = makeStringInfo();
2915 
2916 	nspid = LookupExplicitNamespace(schemaname, false);
2917 
2918 	xsd_schema_element_start(result, targetns);
2919 
2920 	SPI_connect();
2921 
2922 	relid_list = schema_get_xml_visible_tables(nspid);
2923 
2924 	tupdesc_list = NIL;
2925 	foreach(cell, relid_list)
2926 	{
2927 		Relation	rel;
2928 
2929 		rel = heap_open(lfirst_oid(cell), AccessShareLock);
2930 		tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2931 		heap_close(rel, NoLock);
2932 	}
2933 
2934 	appendStringInfoString(result,
2935 						   map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2936 
2937 	appendStringInfoString(result,
2938 						   map_sql_schema_to_xmlschema_types(nspid, relid_list,
2939 															 nulls, tableforest, targetns));
2940 
2941 	xsd_schema_element_end(result);
2942 
2943 	SPI_finish();
2944 
2945 	return result;
2946 }
2947 
2948 
2949 Datum
schema_to_xmlschema(PG_FUNCTION_ARGS)2950 schema_to_xmlschema(PG_FUNCTION_ARGS)
2951 {
2952 	Name		name = PG_GETARG_NAME(0);
2953 	bool		nulls = PG_GETARG_BOOL(1);
2954 	bool		tableforest = PG_GETARG_BOOL(2);
2955 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2956 
2957 	PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2958 																	   nulls, tableforest, targetns)));
2959 }
2960 
2961 
2962 Datum
schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2963 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2964 {
2965 	Name		name = PG_GETARG_NAME(0);
2966 	bool		nulls = PG_GETARG_BOOL(1);
2967 	bool		tableforest = PG_GETARG_BOOL(2);
2968 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2969 	char	   *schemaname;
2970 	Oid			nspid;
2971 	StringInfo	xmlschema;
2972 
2973 	schemaname = NameStr(*name);
2974 	nspid = LookupExplicitNamespace(schemaname, false);
2975 
2976 	xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2977 											 tableforest, targetns);
2978 
2979 	PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2980 																 xmlschema->data, nulls,
2981 																 tableforest, targetns, true)));
2982 }
2983 
2984 
2985 /*
2986  * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
2987  * sections 9.16, 9.17.
2988  */
2989 
2990 static StringInfo
database_to_xml_internal(const char * xmlschema,bool nulls,bool tableforest,const char * targetns)2991 database_to_xml_internal(const char *xmlschema, bool nulls,
2992 						 bool tableforest, const char *targetns)
2993 {
2994 	StringInfo	result;
2995 	List	   *nspid_list;
2996 	ListCell   *cell;
2997 	char	   *xmlcn;
2998 
2999 	xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3000 										   true, false);
3001 	result = makeStringInfo();
3002 
3003 	xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3004 	appendStringInfoChar(result, '\n');
3005 
3006 	if (xmlschema)
3007 		appendStringInfo(result, "%s\n\n", xmlschema);
3008 
3009 	SPI_connect();
3010 
3011 	nspid_list = database_get_xml_visible_schemas();
3012 
3013 	foreach(cell, nspid_list)
3014 	{
3015 		Oid			nspid = lfirst_oid(cell);
3016 		StringInfo	subres;
3017 
3018 		subres = schema_to_xml_internal(nspid, NULL, nulls,
3019 										tableforest, targetns, false);
3020 
3021 		appendStringInfoString(result, subres->data);
3022 		appendStringInfoChar(result, '\n');
3023 	}
3024 
3025 	SPI_finish();
3026 
3027 	xmldata_root_element_end(result, xmlcn);
3028 
3029 	return result;
3030 }
3031 
3032 
3033 Datum
database_to_xml(PG_FUNCTION_ARGS)3034 database_to_xml(PG_FUNCTION_ARGS)
3035 {
3036 	bool		nulls = PG_GETARG_BOOL(0);
3037 	bool		tableforest = PG_GETARG_BOOL(1);
3038 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3039 
3040 	PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3041 																   tableforest, targetns)));
3042 }
3043 
3044 
3045 static StringInfo
database_to_xmlschema_internal(bool nulls,bool tableforest,const char * targetns)3046 database_to_xmlschema_internal(bool nulls, bool tableforest,
3047 							   const char *targetns)
3048 {
3049 	List	   *relid_list;
3050 	List	   *nspid_list;
3051 	List	   *tupdesc_list;
3052 	ListCell   *cell;
3053 	StringInfo	result;
3054 
3055 	result = makeStringInfo();
3056 
3057 	xsd_schema_element_start(result, targetns);
3058 
3059 	SPI_connect();
3060 
3061 	relid_list = database_get_xml_visible_tables();
3062 	nspid_list = database_get_xml_visible_schemas();
3063 
3064 	tupdesc_list = NIL;
3065 	foreach(cell, relid_list)
3066 	{
3067 		Relation	rel;
3068 
3069 		rel = heap_open(lfirst_oid(cell), AccessShareLock);
3070 		tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3071 		heap_close(rel, NoLock);
3072 	}
3073 
3074 	appendStringInfoString(result,
3075 						   map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3076 
3077 	appendStringInfoString(result,
3078 						   map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3079 
3080 	xsd_schema_element_end(result);
3081 
3082 	SPI_finish();
3083 
3084 	return result;
3085 }
3086 
3087 
3088 Datum
database_to_xmlschema(PG_FUNCTION_ARGS)3089 database_to_xmlschema(PG_FUNCTION_ARGS)
3090 {
3091 	bool		nulls = PG_GETARG_BOOL(0);
3092 	bool		tableforest = PG_GETARG_BOOL(1);
3093 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3094 
3095 	PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3096 																		 tableforest, targetns)));
3097 }
3098 
3099 
3100 Datum
database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)3101 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3102 {
3103 	bool		nulls = PG_GETARG_BOOL(0);
3104 	bool		tableforest = PG_GETARG_BOOL(1);
3105 	const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3106 	StringInfo	xmlschema;
3107 
3108 	xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3109 
3110 	PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3111 																   nulls, tableforest, targetns)));
3112 }
3113 
3114 
3115 /*
3116  * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3117  * 9.2.
3118  */
3119 static char *
map_multipart_sql_identifier_to_xml_name(const char * a,const char * b,const char * c,const char * d)3120 map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3121 {
3122 	StringInfoData result;
3123 
3124 	initStringInfo(&result);
3125 
3126 	if (a)
3127 		appendStringInfoString(&result,
3128 							   map_sql_identifier_to_xml_name(a, true, true));
3129 	if (b)
3130 		appendStringInfo(&result, ".%s",
3131 						 map_sql_identifier_to_xml_name(b, true, true));
3132 	if (c)
3133 		appendStringInfo(&result, ".%s",
3134 						 map_sql_identifier_to_xml_name(c, true, true));
3135 	if (d)
3136 		appendStringInfo(&result, ".%s",
3137 						 map_sql_identifier_to_xml_name(d, true, true));
3138 
3139 	return result.data;
3140 }
3141 
3142 
3143 /*
3144  * Map an SQL table to an XML Schema document; see SQL/XML:2008
3145  * section 9.11.
3146  *
3147  * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3148  * 9.9.
3149  */
3150 static const char *
map_sql_table_to_xmlschema(TupleDesc tupdesc,Oid relid,bool nulls,bool tableforest,const char * targetns)3151 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3152 						   bool tableforest, const char *targetns)
3153 {
3154 	int			i;
3155 	char	   *xmltn;
3156 	char	   *tabletypename;
3157 	char	   *rowtypename;
3158 	StringInfoData result;
3159 
3160 	initStringInfo(&result);
3161 
3162 	if (OidIsValid(relid))
3163 	{
3164 		HeapTuple	tuple;
3165 		Form_pg_class reltuple;
3166 
3167 		tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3168 		if (!HeapTupleIsValid(tuple))
3169 			elog(ERROR, "cache lookup failed for relation %u", relid);
3170 		reltuple = (Form_pg_class) GETSTRUCT(tuple);
3171 
3172 		xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3173 											   true, false);
3174 
3175 		tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3176 																 get_database_name(MyDatabaseId),
3177 																 get_namespace_name(reltuple->relnamespace),
3178 																 NameStr(reltuple->relname));
3179 
3180 		rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3181 															   get_database_name(MyDatabaseId),
3182 															   get_namespace_name(reltuple->relnamespace),
3183 															   NameStr(reltuple->relname));
3184 
3185 		ReleaseSysCache(tuple);
3186 	}
3187 	else
3188 	{
3189 		if (tableforest)
3190 			xmltn = "row";
3191 		else
3192 			xmltn = "table";
3193 
3194 		tabletypename = "TableType";
3195 		rowtypename = "RowType";
3196 	}
3197 
3198 	xsd_schema_element_start(&result, targetns);
3199 
3200 	appendStringInfoString(&result,
3201 						   map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3202 
3203 	appendStringInfo(&result,
3204 					 "<xsd:complexType name=\"%s\">\n"
3205 					 "  <xsd:sequence>\n",
3206 					 rowtypename);
3207 
3208 	for (i = 0; i < tupdesc->natts; i++)
3209 	{
3210 		Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3211 
3212 		if (att->attisdropped)
3213 			continue;
3214 		appendStringInfo(&result,
3215 						 "    <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3216 						 map_sql_identifier_to_xml_name(NameStr(att->attname),
3217 														true, false),
3218 						 map_sql_type_to_xml_name(att->atttypid, -1),
3219 						 nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3220 	}
3221 
3222 	appendStringInfoString(&result,
3223 						   "  </xsd:sequence>\n"
3224 						   "</xsd:complexType>\n\n");
3225 
3226 	if (!tableforest)
3227 	{
3228 		appendStringInfo(&result,
3229 						 "<xsd:complexType name=\"%s\">\n"
3230 						 "  <xsd:sequence>\n"
3231 						 "    <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3232 						 "  </xsd:sequence>\n"
3233 						 "</xsd:complexType>\n\n",
3234 						 tabletypename, rowtypename);
3235 
3236 		appendStringInfo(&result,
3237 						 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3238 						 xmltn, tabletypename);
3239 	}
3240 	else
3241 		appendStringInfo(&result,
3242 						 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3243 						 xmltn, rowtypename);
3244 
3245 	xsd_schema_element_end(&result);
3246 
3247 	return result.data;
3248 }
3249 
3250 
3251 /*
3252  * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3253  * section 9.12.
3254  */
3255 static const char *
map_sql_schema_to_xmlschema_types(Oid nspid,List * relid_list,bool nulls,bool tableforest,const char * targetns)3256 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3257 								  bool tableforest, const char *targetns)
3258 {
3259 	char	   *dbname;
3260 	char	   *nspname;
3261 	char	   *xmlsn;
3262 	char	   *schematypename;
3263 	StringInfoData result;
3264 	ListCell   *cell;
3265 
3266 	dbname = get_database_name(MyDatabaseId);
3267 	nspname = get_namespace_name(nspid);
3268 
3269 	initStringInfo(&result);
3270 
3271 	xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3272 
3273 	schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3274 															  dbname,
3275 															  nspname,
3276 															  NULL);
3277 
3278 	appendStringInfo(&result,
3279 					 "<xsd:complexType name=\"%s\">\n", schematypename);
3280 	if (!tableforest)
3281 		appendStringInfoString(&result,
3282 							   "  <xsd:all>\n");
3283 	else
3284 		appendStringInfoString(&result,
3285 							   "  <xsd:sequence>\n");
3286 
3287 	foreach(cell, relid_list)
3288 	{
3289 		Oid			relid = lfirst_oid(cell);
3290 		char	   *relname = get_rel_name(relid);
3291 		char	   *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3292 		char	   *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3293 																			 dbname,
3294 																			 nspname,
3295 																			 relname);
3296 
3297 		if (!tableforest)
3298 			appendStringInfo(&result,
3299 							 "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3300 							 xmltn, tabletypename);
3301 		else
3302 			appendStringInfo(&result,
3303 							 "    <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3304 							 xmltn, tabletypename);
3305 	}
3306 
3307 	if (!tableforest)
3308 		appendStringInfoString(&result,
3309 							   "  </xsd:all>\n");
3310 	else
3311 		appendStringInfoString(&result,
3312 							   "  </xsd:sequence>\n");
3313 	appendStringInfoString(&result,
3314 						   "</xsd:complexType>\n\n");
3315 
3316 	appendStringInfo(&result,
3317 					 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3318 					 xmlsn, schematypename);
3319 
3320 	return result.data;
3321 }
3322 
3323 
3324 /*
3325  * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3326  * section 9.15.
3327  */
3328 static const char *
map_sql_catalog_to_xmlschema_types(List * nspid_list,bool nulls,bool tableforest,const char * targetns)3329 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3330 								   bool tableforest, const char *targetns)
3331 {
3332 	char	   *dbname;
3333 	char	   *xmlcn;
3334 	char	   *catalogtypename;
3335 	StringInfoData result;
3336 	ListCell   *cell;
3337 
3338 	dbname = get_database_name(MyDatabaseId);
3339 
3340 	initStringInfo(&result);
3341 
3342 	xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3343 
3344 	catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3345 															   dbname,
3346 															   NULL,
3347 															   NULL);
3348 
3349 	appendStringInfo(&result,
3350 					 "<xsd:complexType name=\"%s\">\n", catalogtypename);
3351 	appendStringInfoString(&result,
3352 						   "  <xsd:all>\n");
3353 
3354 	foreach(cell, nspid_list)
3355 	{
3356 		Oid			nspid = lfirst_oid(cell);
3357 		char	   *nspname = get_namespace_name(nspid);
3358 		char	   *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3359 		char	   *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3360 																			  dbname,
3361 																			  nspname,
3362 																			  NULL);
3363 
3364 		appendStringInfo(&result,
3365 						 "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3366 						 xmlsn, schematypename);
3367 	}
3368 
3369 	appendStringInfoString(&result,
3370 						   "  </xsd:all>\n");
3371 	appendStringInfoString(&result,
3372 						   "</xsd:complexType>\n\n");
3373 
3374 	appendStringInfo(&result,
3375 					 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3376 					 xmlcn, catalogtypename);
3377 
3378 	return result.data;
3379 }
3380 
3381 
3382 /*
3383  * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3384  */
3385 static const char *
map_sql_type_to_xml_name(Oid typeoid,int typmod)3386 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3387 {
3388 	StringInfoData result;
3389 
3390 	initStringInfo(&result);
3391 
3392 	switch (typeoid)
3393 	{
3394 		case BPCHAROID:
3395 			if (typmod == -1)
3396 				appendStringInfoString(&result, "CHAR");
3397 			else
3398 				appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3399 			break;
3400 		case VARCHAROID:
3401 			if (typmod == -1)
3402 				appendStringInfoString(&result, "VARCHAR");
3403 			else
3404 				appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3405 			break;
3406 		case NUMERICOID:
3407 			if (typmod == -1)
3408 				appendStringInfoString(&result, "NUMERIC");
3409 			else
3410 				appendStringInfo(&result, "NUMERIC_%d_%d",
3411 								 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3412 								 (typmod - VARHDRSZ) & 0xffff);
3413 			break;
3414 		case INT4OID:
3415 			appendStringInfoString(&result, "INTEGER");
3416 			break;
3417 		case INT2OID:
3418 			appendStringInfoString(&result, "SMALLINT");
3419 			break;
3420 		case INT8OID:
3421 			appendStringInfoString(&result, "BIGINT");
3422 			break;
3423 		case FLOAT4OID:
3424 			appendStringInfoString(&result, "REAL");
3425 			break;
3426 		case FLOAT8OID:
3427 			appendStringInfoString(&result, "DOUBLE");
3428 			break;
3429 		case BOOLOID:
3430 			appendStringInfoString(&result, "BOOLEAN");
3431 			break;
3432 		case TIMEOID:
3433 			if (typmod == -1)
3434 				appendStringInfoString(&result, "TIME");
3435 			else
3436 				appendStringInfo(&result, "TIME_%d", typmod);
3437 			break;
3438 		case TIMETZOID:
3439 			if (typmod == -1)
3440 				appendStringInfoString(&result, "TIME_WTZ");
3441 			else
3442 				appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3443 			break;
3444 		case TIMESTAMPOID:
3445 			if (typmod == -1)
3446 				appendStringInfoString(&result, "TIMESTAMP");
3447 			else
3448 				appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3449 			break;
3450 		case TIMESTAMPTZOID:
3451 			if (typmod == -1)
3452 				appendStringInfoString(&result, "TIMESTAMP_WTZ");
3453 			else
3454 				appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3455 			break;
3456 		case DATEOID:
3457 			appendStringInfoString(&result, "DATE");
3458 			break;
3459 		case XMLOID:
3460 			appendStringInfoString(&result, "XML");
3461 			break;
3462 		default:
3463 			{
3464 				HeapTuple	tuple;
3465 				Form_pg_type typtuple;
3466 
3467 				tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3468 				if (!HeapTupleIsValid(tuple))
3469 					elog(ERROR, "cache lookup failed for type %u", typeoid);
3470 				typtuple = (Form_pg_type) GETSTRUCT(tuple);
3471 
3472 				appendStringInfoString(&result,
3473 									   map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3474 																				get_database_name(MyDatabaseId),
3475 																				get_namespace_name(typtuple->typnamespace),
3476 																				NameStr(typtuple->typname)));
3477 
3478 				ReleaseSysCache(tuple);
3479 			}
3480 	}
3481 
3482 	return result.data;
3483 }
3484 
3485 
3486 /*
3487  * Map a collection of SQL data types to XML Schema data types; see
3488  * SQL/XML:2008 section 9.7.
3489  */
3490 static const char *
map_sql_typecoll_to_xmlschema_types(List * tupdesc_list)3491 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3492 {
3493 	List	   *uniquetypes = NIL;
3494 	int			i;
3495 	StringInfoData result;
3496 	ListCell   *cell0;
3497 
3498 	/* extract all column types used in the set of TupleDescs */
3499 	foreach(cell0, tupdesc_list)
3500 	{
3501 		TupleDesc	tupdesc = (TupleDesc) lfirst(cell0);
3502 
3503 		for (i = 0; i < tupdesc->natts; i++)
3504 		{
3505 			Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3506 
3507 			if (att->attisdropped)
3508 				continue;
3509 			uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3510 		}
3511 	}
3512 
3513 	/* add base types of domains */
3514 	foreach(cell0, uniquetypes)
3515 	{
3516 		Oid			typid = lfirst_oid(cell0);
3517 		Oid			basetypid = getBaseType(typid);
3518 
3519 		if (basetypid != typid)
3520 			uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3521 	}
3522 
3523 	/* Convert to textual form */
3524 	initStringInfo(&result);
3525 
3526 	foreach(cell0, uniquetypes)
3527 	{
3528 		appendStringInfo(&result, "%s\n",
3529 						 map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3530 														-1));
3531 	}
3532 
3533 	return result.data;
3534 }
3535 
3536 
3537 /*
3538  * Map an SQL data type to a named XML Schema data type; see
3539  * SQL/XML:2008 sections 9.5 and 9.6.
3540  *
3541  * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3542  * a name attribute, which this function does.  The name-less version
3543  * 9.5 doesn't appear to be required anywhere.)
3544  */
3545 static const char *
map_sql_type_to_xmlschema_type(Oid typeoid,int typmod)3546 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3547 {
3548 	StringInfoData result;
3549 	const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3550 
3551 	initStringInfo(&result);
3552 
3553 	if (typeoid == XMLOID)
3554 	{
3555 		appendStringInfoString(&result,
3556 							   "<xsd:complexType mixed=\"true\">\n"
3557 							   "  <xsd:sequence>\n"
3558 							   "    <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3559 							   "  </xsd:sequence>\n"
3560 							   "</xsd:complexType>\n");
3561 	}
3562 	else
3563 	{
3564 		appendStringInfo(&result,
3565 						 "<xsd:simpleType name=\"%s\">\n", typename);
3566 
3567 		switch (typeoid)
3568 		{
3569 			case BPCHAROID:
3570 			case VARCHAROID:
3571 			case TEXTOID:
3572 				appendStringInfoString(&result,
3573 									   "  <xsd:restriction base=\"xsd:string\">\n");
3574 				if (typmod != -1)
3575 					appendStringInfo(&result,
3576 									 "    <xsd:maxLength value=\"%d\"/>\n",
3577 									 typmod - VARHDRSZ);
3578 				appendStringInfoString(&result, "  </xsd:restriction>\n");
3579 				break;
3580 
3581 			case BYTEAOID:
3582 				appendStringInfo(&result,
3583 								 "  <xsd:restriction base=\"xsd:%s\">\n"
3584 								 "  </xsd:restriction>\n",
3585 								 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3586 				break;
3587 
3588 			case NUMERICOID:
3589 				if (typmod != -1)
3590 					appendStringInfo(&result,
3591 									 "  <xsd:restriction base=\"xsd:decimal\">\n"
3592 									 "    <xsd:totalDigits value=\"%d\"/>\n"
3593 									 "    <xsd:fractionDigits value=\"%d\"/>\n"
3594 									 "  </xsd:restriction>\n",
3595 									 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3596 									 (typmod - VARHDRSZ) & 0xffff);
3597 				break;
3598 
3599 			case INT2OID:
3600 				appendStringInfo(&result,
3601 								 "  <xsd:restriction base=\"xsd:short\">\n"
3602 								 "    <xsd:maxInclusive value=\"%d\"/>\n"
3603 								 "    <xsd:minInclusive value=\"%d\"/>\n"
3604 								 "  </xsd:restriction>\n",
3605 								 SHRT_MAX, SHRT_MIN);
3606 				break;
3607 
3608 			case INT4OID:
3609 				appendStringInfo(&result,
3610 								 "  <xsd:restriction base=\"xsd:int\">\n"
3611 								 "    <xsd:maxInclusive value=\"%d\"/>\n"
3612 								 "    <xsd:minInclusive value=\"%d\"/>\n"
3613 								 "  </xsd:restriction>\n",
3614 								 INT_MAX, INT_MIN);
3615 				break;
3616 
3617 			case INT8OID:
3618 				appendStringInfo(&result,
3619 								 "  <xsd:restriction base=\"xsd:long\">\n"
3620 								 "    <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3621 								 "    <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3622 								 "  </xsd:restriction>\n",
3623 								 (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3624 								 (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3625 				break;
3626 
3627 			case FLOAT4OID:
3628 				appendStringInfoString(&result,
3629 									   "  <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3630 				break;
3631 
3632 			case FLOAT8OID:
3633 				appendStringInfoString(&result,
3634 									   "  <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3635 				break;
3636 
3637 			case BOOLOID:
3638 				appendStringInfoString(&result,
3639 									   "  <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3640 				break;
3641 
3642 			case TIMEOID:
3643 			case TIMETZOID:
3644 				{
3645 					const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3646 
3647 					if (typmod == -1)
3648 						appendStringInfo(&result,
3649 										 "  <xsd:restriction base=\"xsd:time\">\n"
3650 										 "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3651 										 "  </xsd:restriction>\n", tz);
3652 					else if (typmod == 0)
3653 						appendStringInfo(&result,
3654 										 "  <xsd:restriction base=\"xsd:time\">\n"
3655 										 "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3656 										 "  </xsd:restriction>\n", tz);
3657 					else
3658 						appendStringInfo(&result,
3659 										 "  <xsd:restriction base=\"xsd:time\">\n"
3660 										 "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3661 										 "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3662 					break;
3663 				}
3664 
3665 			case TIMESTAMPOID:
3666 			case TIMESTAMPTZOID:
3667 				{
3668 					const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3669 
3670 					if (typmod == -1)
3671 						appendStringInfo(&result,
3672 										 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3673 										 "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3674 										 "  </xsd:restriction>\n", tz);
3675 					else if (typmod == 0)
3676 						appendStringInfo(&result,
3677 										 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3678 										 "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3679 										 "  </xsd:restriction>\n", tz);
3680 					else
3681 						appendStringInfo(&result,
3682 										 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3683 										 "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3684 										 "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3685 					break;
3686 				}
3687 
3688 			case DATEOID:
3689 				appendStringInfoString(&result,
3690 									   "  <xsd:restriction base=\"xsd:date\">\n"
3691 									   "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3692 									   "  </xsd:restriction>\n");
3693 				break;
3694 
3695 			default:
3696 				if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3697 				{
3698 					Oid			base_typeoid;
3699 					int32		base_typmod = -1;
3700 
3701 					base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3702 
3703 					appendStringInfo(&result,
3704 									 "  <xsd:restriction base=\"%s\"/>\n",
3705 									 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3706 				}
3707 				break;
3708 		}
3709 		appendStringInfoString(&result, "</xsd:simpleType>\n");
3710 	}
3711 
3712 	return result.data;
3713 }
3714 
3715 
3716 /*
3717  * Map an SQL row to an XML element, taking the row from the active
3718  * SPI cursor.  See also SQL/XML:2008 section 9.10.
3719  */
3720 static void
SPI_sql_row_to_xmlelement(uint64 rownum,StringInfo result,char * tablename,bool nulls,bool tableforest,const char * targetns,bool top_level)3721 SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3722 						  bool nulls, bool tableforest,
3723 						  const char *targetns, bool top_level)
3724 {
3725 	int			i;
3726 	char	   *xmltn;
3727 
3728 	if (tablename)
3729 		xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3730 	else
3731 	{
3732 		if (tableforest)
3733 			xmltn = "row";
3734 		else
3735 			xmltn = "table";
3736 	}
3737 
3738 	if (tableforest)
3739 		xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3740 	else
3741 		appendStringInfoString(result, "<row>\n");
3742 
3743 	for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3744 	{
3745 		char	   *colname;
3746 		Datum		colval;
3747 		bool		isnull;
3748 
3749 		colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3750 												 true, false);
3751 		colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3752 							   SPI_tuptable->tupdesc,
3753 							   i,
3754 							   &isnull);
3755 		if (isnull)
3756 		{
3757 			if (nulls)
3758 				appendStringInfo(result, "  <%s xsi:nil=\"true\"/>\n", colname);
3759 		}
3760 		else
3761 			appendStringInfo(result, "  <%s>%s</%s>\n",
3762 							 colname,
3763 							 map_sql_value_to_xml_value(colval,
3764 														SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3765 							 colname);
3766 	}
3767 
3768 	if (tableforest)
3769 	{
3770 		xmldata_root_element_end(result, xmltn);
3771 		appendStringInfoChar(result, '\n');
3772 	}
3773 	else
3774 		appendStringInfoString(result, "</row>\n\n");
3775 }
3776 
3777 
3778 /*
3779  * XPath related functions
3780  */
3781 
3782 #ifdef USE_LIBXML
3783 
3784 /*
3785  * Convert XML node to text (dump subtree in case of element,
3786  * return value otherwise)
3787  */
3788 static text *
xml_xmlnodetoxmltype(xmlNodePtr cur,PgXmlErrorContext * xmlerrcxt)3789 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3790 {
3791 	xmltype    *result;
3792 
3793 	if (cur->type == XML_ELEMENT_NODE)
3794 	{
3795 		xmlBufferPtr buf;
3796 		xmlNodePtr	cur_copy;
3797 
3798 		buf = xmlBufferCreate();
3799 
3800 		/*
3801 		 * The result of xmlNodeDump() won't contain namespace definitions
3802 		 * from parent nodes, but xmlCopyNode() duplicates a node along with
3803 		 * its required namespace definitions.
3804 		 */
3805 		cur_copy = xmlCopyNode(cur, 1);
3806 
3807 		if (cur_copy == NULL)
3808 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3809 						"could not copy node");
3810 
3811 		PG_TRY();
3812 		{
3813 			xmlNodeDump(buf, NULL, cur_copy, 0, 1);
3814 			result = xmlBuffer_to_xmltype(buf);
3815 		}
3816 		PG_CATCH();
3817 		{
3818 			xmlFreeNode(cur_copy);
3819 			xmlBufferFree(buf);
3820 			PG_RE_THROW();
3821 		}
3822 		PG_END_TRY();
3823 		xmlFreeNode(cur_copy);
3824 		xmlBufferFree(buf);
3825 	}
3826 	else
3827 	{
3828 		xmlChar    *str;
3829 
3830 		str = xmlXPathCastNodeToString(cur);
3831 		PG_TRY();
3832 		{
3833 			/* Here we rely on XML having the same representation as TEXT */
3834 			char	   *escaped = escape_xml((char *) str);
3835 
3836 			result = (xmltype *) cstring_to_text(escaped);
3837 			pfree(escaped);
3838 		}
3839 		PG_CATCH();
3840 		{
3841 			xmlFree(str);
3842 			PG_RE_THROW();
3843 		}
3844 		PG_END_TRY();
3845 		xmlFree(str);
3846 	}
3847 
3848 	return result;
3849 }
3850 
3851 /*
3852  * Convert an XML XPath object (the result of evaluating an XPath expression)
3853  * to an array of xml values, which are appended to astate.  The function
3854  * result value is the number of elements in the array.
3855  *
3856  * If "astate" is NULL then we don't generate the array value, but we still
3857  * return the number of elements it would have had.
3858  *
3859  * Nodesets are converted to an array containing the nodes' textual
3860  * representations.  Primitive values (float, double, string) are converted
3861  * to a single-element array containing the value's string representation.
3862  */
3863 static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,ArrayBuildState * astate,PgXmlErrorContext * xmlerrcxt)3864 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3865 					   ArrayBuildState *astate,
3866 					   PgXmlErrorContext *xmlerrcxt)
3867 {
3868 	int			result = 0;
3869 	Datum		datum;
3870 	Oid			datumtype;
3871 	char	   *result_str;
3872 
3873 	switch (xpathobj->type)
3874 	{
3875 		case XPATH_NODESET:
3876 			if (xpathobj->nodesetval != NULL)
3877 			{
3878 				result = xpathobj->nodesetval->nodeNr;
3879 				if (astate != NULL)
3880 				{
3881 					int			i;
3882 
3883 					for (i = 0; i < result; i++)
3884 					{
3885 						datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3886 																	 xmlerrcxt));
3887 						(void) accumArrayResult(astate, datum, false,
3888 												XMLOID, CurrentMemoryContext);
3889 					}
3890 				}
3891 			}
3892 			return result;
3893 
3894 		case XPATH_BOOLEAN:
3895 			if (astate == NULL)
3896 				return 1;
3897 			datum = BoolGetDatum(xpathobj->boolval);
3898 			datumtype = BOOLOID;
3899 			break;
3900 
3901 		case XPATH_NUMBER:
3902 			if (astate == NULL)
3903 				return 1;
3904 			datum = Float8GetDatum(xpathobj->floatval);
3905 			datumtype = FLOAT8OID;
3906 			break;
3907 
3908 		case XPATH_STRING:
3909 			if (astate == NULL)
3910 				return 1;
3911 			datum = CStringGetDatum((char *) xpathobj->stringval);
3912 			datumtype = CSTRINGOID;
3913 			break;
3914 
3915 		default:
3916 			elog(ERROR, "xpath expression result type %d is unsupported",
3917 				 xpathobj->type);
3918 			return 0;			/* keep compiler quiet */
3919 	}
3920 
3921 	/* Common code for scalar-value cases */
3922 	result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3923 	datum = PointerGetDatum(cstring_to_xmltype(result_str));
3924 	(void) accumArrayResult(astate, datum, false,
3925 							XMLOID, CurrentMemoryContext);
3926 	return 1;
3927 }
3928 
3929 
3930 /*
3931  * Common code for xpath() and xmlexists()
3932  *
3933  * Evaluate XPath expression and return number of nodes in res_items
3934  * and array of XML values in astate.  Either of those pointers can be
3935  * NULL if the corresponding result isn't wanted.
3936  *
3937  * It is up to the user to ensure that the XML passed is in fact
3938  * an XML document - XPath doesn't work easily on fragments without
3939  * a context node being known.
3940  */
3941 static void
xpath_internal(text * xpath_expr_text,xmltype * data,ArrayType * namespaces,int * res_nitems,ArrayBuildState * astate)3942 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3943 			   int *res_nitems, ArrayBuildState *astate)
3944 {
3945 	PgXmlErrorContext *xmlerrcxt;
3946 	volatile xmlParserCtxtPtr ctxt = NULL;
3947 	volatile xmlDocPtr doc = NULL;
3948 	volatile xmlXPathContextPtr xpathctx = NULL;
3949 	volatile xmlXPathCompExprPtr xpathcomp = NULL;
3950 	volatile xmlXPathObjectPtr xpathobj = NULL;
3951 	char	   *datastr;
3952 	int32		len;
3953 	int32		xpath_len;
3954 	xmlChar    *string;
3955 	xmlChar    *xpath_expr;
3956 	size_t		xmldecl_len = 0;
3957 	int			i;
3958 	int			ndim;
3959 	Datum	   *ns_names_uris;
3960 	bool	   *ns_names_uris_nulls;
3961 	int			ns_count;
3962 
3963 	/*
3964 	 * Namespace mappings are passed as text[].  If an empty array is passed
3965 	 * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
3966 	 * Else, a 2-dimensional array with length of the second axis being equal
3967 	 * to 2 should be passed, i.e., every subarray contains 2 elements, the
3968 	 * first element defining the name, the second one the URI.  Example:
3969 	 * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
3970 	 * 'http://example2.com']].
3971 	 */
3972 	ndim = namespaces ? ARR_NDIM(namespaces) : 0;
3973 	if (ndim != 0)
3974 	{
3975 		int		   *dims;
3976 
3977 		dims = ARR_DIMS(namespaces);
3978 
3979 		if (ndim != 2 || dims[1] != 2)
3980 			ereport(ERROR,
3981 					(errcode(ERRCODE_DATA_EXCEPTION),
3982 					 errmsg("invalid array for XML namespace mapping"),
3983 					 errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
3984 
3985 		Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
3986 
3987 		deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
3988 						  &ns_names_uris, &ns_names_uris_nulls,
3989 						  &ns_count);
3990 
3991 		Assert((ns_count % 2) == 0);	/* checked above */
3992 		ns_count /= 2;			/* count pairs only */
3993 	}
3994 	else
3995 	{
3996 		ns_names_uris = NULL;
3997 		ns_names_uris_nulls = NULL;
3998 		ns_count = 0;
3999 	}
4000 
4001 	datastr = VARDATA(data);
4002 	len = VARSIZE(data) - VARHDRSZ;
4003 	xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4004 	if (xpath_len == 0)
4005 		ereport(ERROR,
4006 				(errcode(ERRCODE_DATA_EXCEPTION),
4007 				 errmsg("empty XPath expression")));
4008 
4009 	string = pg_xmlCharStrndup(datastr, len);
4010 	xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4011 
4012 	/*
4013 	 * In a UTF8 database, skip any xml declaration, which might assert
4014 	 * another encoding.  Ignore parse_xml_decl() failure, letting
4015 	 * xmlCtxtReadMemory() report parse errors.  Documentation disclaims
4016 	 * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4017 	 * those scenarios bug-compatible with historical behavior.
4018 	 */
4019 	if (GetDatabaseEncoding() == PG_UTF8)
4020 		parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4021 
4022 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4023 
4024 	PG_TRY();
4025 	{
4026 		xmlInitParser();
4027 
4028 		/*
4029 		 * redundant XML parsing (two parsings for the same value during one
4030 		 * command execution are possible)
4031 		 */
4032 		ctxt = xmlNewParserCtxt();
4033 		if (ctxt == NULL || xmlerrcxt->err_occurred)
4034 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4035 						"could not allocate parser context");
4036 		doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4037 								len - xmldecl_len, NULL, NULL, 0);
4038 		if (doc == NULL || xmlerrcxt->err_occurred)
4039 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4040 						"could not parse XML document");
4041 		xpathctx = xmlXPathNewContext(doc);
4042 		if (xpathctx == NULL || xmlerrcxt->err_occurred)
4043 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4044 						"could not allocate XPath context");
4045 		xpathctx->node = (xmlNodePtr) doc;
4046 
4047 		/* register namespaces, if any */
4048 		if (ns_count > 0)
4049 		{
4050 			for (i = 0; i < ns_count; i++)
4051 			{
4052 				char	   *ns_name;
4053 				char	   *ns_uri;
4054 
4055 				if (ns_names_uris_nulls[i * 2] ||
4056 					ns_names_uris_nulls[i * 2 + 1])
4057 					ereport(ERROR,
4058 							(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4059 							 errmsg("neither namespace name nor URI may be null")));
4060 				ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4061 				ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4062 				if (xmlXPathRegisterNs(xpathctx,
4063 									   (xmlChar *) ns_name,
4064 									   (xmlChar *) ns_uri) != 0)
4065 					ereport(ERROR,	/* is this an internal error??? */
4066 							(errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4067 									ns_name, ns_uri)));
4068 			}
4069 		}
4070 
4071 		xpathcomp = xmlXPathCompile(xpath_expr);
4072 		if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4073 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4074 						"invalid XPath expression");
4075 
4076 		/*
4077 		 * Version 2.6.27 introduces a function named
4078 		 * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4079 		 * but we can derive the existence by whether any nodes are returned,
4080 		 * thereby preventing a library version upgrade and keeping the code
4081 		 * the same.
4082 		 */
4083 		xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4084 		if (xpathobj == NULL || xmlerrcxt->err_occurred)
4085 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4086 						"could not create XPath object");
4087 
4088 		/*
4089 		 * Extract the results as requested.
4090 		 */
4091 		if (res_nitems != NULL)
4092 			*res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4093 		else
4094 			(void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4095 	}
4096 	PG_CATCH();
4097 	{
4098 		if (xpathobj)
4099 			xmlXPathFreeObject(xpathobj);
4100 		if (xpathcomp)
4101 			xmlXPathFreeCompExpr(xpathcomp);
4102 		if (xpathctx)
4103 			xmlXPathFreeContext(xpathctx);
4104 		if (doc)
4105 			xmlFreeDoc(doc);
4106 		if (ctxt)
4107 			xmlFreeParserCtxt(ctxt);
4108 
4109 		pg_xml_done(xmlerrcxt, true);
4110 
4111 		PG_RE_THROW();
4112 	}
4113 	PG_END_TRY();
4114 
4115 	xmlXPathFreeObject(xpathobj);
4116 	xmlXPathFreeCompExpr(xpathcomp);
4117 	xmlXPathFreeContext(xpathctx);
4118 	xmlFreeDoc(doc);
4119 	xmlFreeParserCtxt(ctxt);
4120 
4121 	pg_xml_done(xmlerrcxt, false);
4122 }
4123 #endif							/* USE_LIBXML */
4124 
4125 /*
4126  * Evaluate XPath expression and return array of XML values.
4127  *
4128  * As we have no support of XQuery sequences yet, this function seems
4129  * to be the most useful one (array of XML functions plays a role of
4130  * some kind of substitution for XQuery sequences).
4131  */
4132 Datum
xpath(PG_FUNCTION_ARGS)4133 xpath(PG_FUNCTION_ARGS)
4134 {
4135 #ifdef USE_LIBXML
4136 	text	   *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4137 	xmltype    *data = PG_GETARG_XML_P(1);
4138 	ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4139 	ArrayBuildState *astate;
4140 
4141 	astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4142 	xpath_internal(xpath_expr_text, data, namespaces,
4143 				   NULL, astate);
4144 	PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4145 #else
4146 	NO_XML_SUPPORT();
4147 	return 0;
4148 #endif
4149 }
4150 
4151 /*
4152  * Determines if the node specified by the supplied XPath exists
4153  * in a given XML document, returning a boolean.
4154  */
4155 Datum
xmlexists(PG_FUNCTION_ARGS)4156 xmlexists(PG_FUNCTION_ARGS)
4157 {
4158 #ifdef USE_LIBXML
4159 	text	   *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4160 	xmltype    *data = PG_GETARG_XML_P(1);
4161 	int			res_nitems;
4162 
4163 	xpath_internal(xpath_expr_text, data, NULL,
4164 				   &res_nitems, NULL);
4165 
4166 	PG_RETURN_BOOL(res_nitems > 0);
4167 #else
4168 	NO_XML_SUPPORT();
4169 	return 0;
4170 #endif
4171 }
4172 
4173 /*
4174  * Determines if the node specified by the supplied XPath exists
4175  * in a given XML document, returning a boolean. Differs from
4176  * xmlexists as it supports namespaces and is not defined in SQL/XML.
4177  */
4178 Datum
xpath_exists(PG_FUNCTION_ARGS)4179 xpath_exists(PG_FUNCTION_ARGS)
4180 {
4181 #ifdef USE_LIBXML
4182 	text	   *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4183 	xmltype    *data = PG_GETARG_XML_P(1);
4184 	ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4185 	int			res_nitems;
4186 
4187 	xpath_internal(xpath_expr_text, data, namespaces,
4188 				   &res_nitems, NULL);
4189 
4190 	PG_RETURN_BOOL(res_nitems > 0);
4191 #else
4192 	NO_XML_SUPPORT();
4193 	return 0;
4194 #endif
4195 }
4196 
4197 /*
4198  * Functions for checking well-formed-ness
4199  */
4200 
4201 #ifdef USE_LIBXML
4202 static bool
wellformed_xml(text * data,XmlOptionType xmloption_arg)4203 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4204 {
4205 	bool		result;
4206 	volatile xmlDocPtr doc = NULL;
4207 
4208 	/* We want to catch any exceptions and return false */
4209 	PG_TRY();
4210 	{
4211 		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4212 		result = true;
4213 	}
4214 	PG_CATCH();
4215 	{
4216 		FlushErrorState();
4217 		result = false;
4218 	}
4219 	PG_END_TRY();
4220 
4221 	if (doc)
4222 		xmlFreeDoc(doc);
4223 
4224 	return result;
4225 }
4226 #endif
4227 
4228 Datum
xml_is_well_formed(PG_FUNCTION_ARGS)4229 xml_is_well_formed(PG_FUNCTION_ARGS)
4230 {
4231 #ifdef USE_LIBXML
4232 	text	   *data = PG_GETARG_TEXT_PP(0);
4233 
4234 	PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4235 #else
4236 	NO_XML_SUPPORT();
4237 	return 0;
4238 #endif							/* not USE_LIBXML */
4239 }
4240 
4241 Datum
xml_is_well_formed_document(PG_FUNCTION_ARGS)4242 xml_is_well_formed_document(PG_FUNCTION_ARGS)
4243 {
4244 #ifdef USE_LIBXML
4245 	text	   *data = PG_GETARG_TEXT_PP(0);
4246 
4247 	PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4248 #else
4249 	NO_XML_SUPPORT();
4250 	return 0;
4251 #endif							/* not USE_LIBXML */
4252 }
4253 
4254 Datum
xml_is_well_formed_content(PG_FUNCTION_ARGS)4255 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4256 {
4257 #ifdef USE_LIBXML
4258 	text	   *data = PG_GETARG_TEXT_PP(0);
4259 
4260 	PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4261 #else
4262 	NO_XML_SUPPORT();
4263 	return 0;
4264 #endif							/* not USE_LIBXML */
4265 }
4266 
4267 /*
4268  * support functions for XMLTABLE
4269  *
4270  */
4271 #ifdef USE_LIBXML
4272 
4273 /*
4274  * Returns private data from executor state. Ensure validity by check with
4275  * MAGIC number.
4276  */
4277 static inline XmlTableBuilderData *
GetXmlTableBuilderPrivateData(TableFuncScanState * state,const char * fname)4278 GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4279 {
4280 	XmlTableBuilderData *result;
4281 
4282 	if (!IsA(state, TableFuncScanState))
4283 		elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4284 	result = (XmlTableBuilderData *) state->opaque;
4285 	if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4286 		elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4287 
4288 	return result;
4289 }
4290 #endif
4291 
4292 /*
4293  * XmlTableInitOpaque
4294  *		Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4295  *		the XML parser.
4296  *
4297  * Note: Because we call pg_xml_init() here and pg_xml_done() in
4298  * XmlTableDestroyOpaque, it is critical for robustness that no other
4299  * executor nodes run until this node is processed to completion.  Caller
4300  * must execute this to completion (probably filling a tuplestore to exhaust
4301  * this node in a single pass) instead of using row-per-call mode.
4302  */
4303 static void
XmlTableInitOpaque(TableFuncScanState * state,int natts)4304 XmlTableInitOpaque(TableFuncScanState *state, int natts)
4305 {
4306 #ifdef USE_LIBXML
4307 	volatile xmlParserCtxtPtr ctxt = NULL;
4308 	XmlTableBuilderData *xtCxt;
4309 	PgXmlErrorContext *xmlerrcxt;
4310 
4311 	xtCxt = palloc0(sizeof(XmlTableBuilderData));
4312 	xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4313 	xtCxt->natts = natts;
4314 	xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4315 
4316 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4317 
4318 	PG_TRY();
4319 	{
4320 		xmlInitParser();
4321 
4322 		ctxt = xmlNewParserCtxt();
4323 		if (ctxt == NULL || xmlerrcxt->err_occurred)
4324 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4325 						"could not allocate parser context");
4326 	}
4327 	PG_CATCH();
4328 	{
4329 		if (ctxt != NULL)
4330 			xmlFreeParserCtxt(ctxt);
4331 
4332 		pg_xml_done(xmlerrcxt, true);
4333 
4334 		PG_RE_THROW();
4335 	}
4336 	PG_END_TRY();
4337 
4338 	xtCxt->xmlerrcxt = xmlerrcxt;
4339 	xtCxt->ctxt = ctxt;
4340 
4341 	state->opaque = xtCxt;
4342 #else
4343 	NO_XML_SUPPORT();
4344 #endif							/* not USE_LIBXML */
4345 }
4346 
4347 /*
4348  * XmlTableSetDocument
4349  *		Install the input document
4350  */
4351 static void
XmlTableSetDocument(TableFuncScanState * state,Datum value)4352 XmlTableSetDocument(TableFuncScanState *state, Datum value)
4353 {
4354 #ifdef USE_LIBXML
4355 	XmlTableBuilderData *xtCxt;
4356 	xmltype    *xmlval = DatumGetXmlP(value);
4357 	char	   *str;
4358 	xmlChar    *xstr;
4359 	int			length;
4360 	volatile xmlDocPtr doc = NULL;
4361 	volatile xmlXPathContextPtr xpathcxt = NULL;
4362 
4363 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4364 
4365 	/*
4366 	 * Use out function for casting to string (remove encoding property). See
4367 	 * comment in xml_out.
4368 	 */
4369 	str = xml_out_internal(xmlval, 0);
4370 
4371 	length = strlen(str);
4372 	xstr = pg_xmlCharStrndup(str, length);
4373 
4374 	PG_TRY();
4375 	{
4376 		doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4377 		if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4378 			xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4379 						"could not parse XML document");
4380 		xpathcxt = xmlXPathNewContext(doc);
4381 		if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4382 			xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4383 						"could not allocate XPath context");
4384 		xpathcxt->node = (xmlNodePtr) doc;
4385 	}
4386 	PG_CATCH();
4387 	{
4388 		if (xpathcxt != NULL)
4389 			xmlXPathFreeContext(xpathcxt);
4390 		if (doc != NULL)
4391 			xmlFreeDoc(doc);
4392 
4393 		PG_RE_THROW();
4394 	}
4395 	PG_END_TRY();
4396 
4397 	xtCxt->doc = doc;
4398 	xtCxt->xpathcxt = xpathcxt;
4399 #else
4400 	NO_XML_SUPPORT();
4401 #endif							/* not USE_LIBXML */
4402 }
4403 
4404 /*
4405  * XmlTableSetNamespace
4406  *		Add a namespace declaration
4407  */
4408 static void
XmlTableSetNamespace(TableFuncScanState * state,const char * name,const char * uri)4409 XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4410 {
4411 #ifdef USE_LIBXML
4412 	XmlTableBuilderData *xtCxt;
4413 
4414 	if (name == NULL)
4415 		ereport(ERROR,
4416 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4417 				 errmsg("DEFAULT namespace is not supported")));
4418 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4419 
4420 	if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4421 						   pg_xmlCharStrndup(name, strlen(name)),
4422 						   pg_xmlCharStrndup(uri, strlen(uri))))
4423 		xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4424 					"could not set XML namespace");
4425 #else
4426 	NO_XML_SUPPORT();
4427 #endif							/* not USE_LIBXML */
4428 }
4429 
4430 /*
4431  * XmlTableSetRowFilter
4432  *		Install the row-filter Xpath expression.
4433  */
4434 static void
XmlTableSetRowFilter(TableFuncScanState * state,const char * path)4435 XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4436 {
4437 #ifdef USE_LIBXML
4438 	XmlTableBuilderData *xtCxt;
4439 	xmlChar    *xstr;
4440 
4441 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4442 
4443 	if (*path == '\0')
4444 		ereport(ERROR,
4445 				(errcode(ERRCODE_DATA_EXCEPTION),
4446 				 errmsg("row path filter must not be empty string")));
4447 
4448 	xstr = pg_xmlCharStrndup(path, strlen(path));
4449 
4450 	xtCxt->xpathcomp = xmlXPathCompile(xstr);
4451 	if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4452 		xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4453 					"invalid XPath expression");
4454 #else
4455 	NO_XML_SUPPORT();
4456 #endif							/* not USE_LIBXML */
4457 }
4458 
4459 /*
4460  * XmlTableSetColumnFilter
4461  *		Install the column-filter Xpath expression, for the given column.
4462  */
4463 static void
XmlTableSetColumnFilter(TableFuncScanState * state,const char * path,int colnum)4464 XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4465 {
4466 #ifdef USE_LIBXML
4467 	XmlTableBuilderData *xtCxt;
4468 	xmlChar    *xstr;
4469 
4470 	AssertArg(PointerIsValid(path));
4471 
4472 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4473 
4474 	if (*path == '\0')
4475 		ereport(ERROR,
4476 				(errcode(ERRCODE_DATA_EXCEPTION),
4477 				 errmsg("column path filter must not be empty string")));
4478 
4479 	xstr = pg_xmlCharStrndup(path, strlen(path));
4480 
4481 	xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4482 	if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4483 		xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4484 					"invalid XPath expression");
4485 #else
4486 	NO_XML_SUPPORT();
4487 #endif							/* not USE_LIBXML */
4488 }
4489 
4490 /*
4491  * XmlTableFetchRow
4492  *		Prepare the next "current" tuple for upcoming GetValue calls.
4493  *		Returns false if the row-filter expression returned no more rows.
4494  */
4495 static bool
XmlTableFetchRow(TableFuncScanState * state)4496 XmlTableFetchRow(TableFuncScanState *state)
4497 {
4498 #ifdef USE_LIBXML
4499 	XmlTableBuilderData *xtCxt;
4500 
4501 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4502 
4503 	/*
4504 	 * XmlTable returns table - set of composite values. The error context, is
4505 	 * used for producement more values, between two calls, there can be
4506 	 * created and used another libxml2 error context. It is libxml2 global
4507 	 * value, so it should be refreshed any time before any libxml2 usage,
4508 	 * that is finished by returning some value.
4509 	 */
4510 	xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4511 
4512 	if (xtCxt->xpathobj == NULL)
4513 	{
4514 		xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4515 		if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4516 			xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4517 						"could not create XPath object");
4518 
4519 		xtCxt->row_count = 0;
4520 	}
4521 
4522 	if (xtCxt->xpathobj->type == XPATH_NODESET)
4523 	{
4524 		if (xtCxt->xpathobj->nodesetval != NULL)
4525 		{
4526 			if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4527 				return true;
4528 		}
4529 	}
4530 
4531 	return false;
4532 #else
4533 	NO_XML_SUPPORT();
4534 	return false;
4535 #endif							/* not USE_LIBXML */
4536 }
4537 
4538 /*
4539  * XmlTableGetValue
4540  *		Return the value for column number 'colnum' for the current row.  If
4541  *		column -1 is requested, return representation of the whole row.
4542  *
4543  * This leaks memory, so be sure to reset often the context in which it's
4544  * called.
4545  */
4546 static Datum
XmlTableGetValue(TableFuncScanState * state,int colnum,Oid typid,int32 typmod,bool * isnull)4547 XmlTableGetValue(TableFuncScanState *state, int colnum,
4548 				 Oid typid, int32 typmod, bool *isnull)
4549 {
4550 #ifdef USE_LIBXML
4551 	XmlTableBuilderData *xtCxt;
4552 	Datum		result = (Datum) 0;
4553 	xmlNodePtr	cur;
4554 	char	   *cstr = NULL;
4555 	volatile xmlXPathObjectPtr xpathobj = NULL;
4556 
4557 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4558 
4559 	Assert(xtCxt->xpathobj &&
4560 		   xtCxt->xpathobj->type == XPATH_NODESET &&
4561 		   xtCxt->xpathobj->nodesetval != NULL);
4562 
4563 	/* Propagate context related error context to libxml2 */
4564 	xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4565 
4566 	*isnull = false;
4567 
4568 	cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4569 
4570 	Assert(xtCxt->xpathscomp[colnum] != NULL);
4571 
4572 	PG_TRY();
4573 	{
4574 		/* Set current node as entry point for XPath evaluation */
4575 		xtCxt->xpathcxt->node = cur;
4576 
4577 		/* Evaluate column path */
4578 		xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4579 		if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4580 			xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4581 						"could not create XPath object");
4582 
4583 		/*
4584 		 * There are four possible cases, depending on the number of nodes
4585 		 * returned by the XPath expression and the type of the target column:
4586 		 * a) XPath returns no nodes.  b) One node is returned, and column is
4587 		 * of type XML.  c) One node, column type other than XML.  d) Multiple
4588 		 * nodes are returned.
4589 		 */
4590 		if (xpathobj->type == XPATH_NODESET)
4591 		{
4592 			int			count = 0;
4593 
4594 			if (xpathobj->nodesetval != NULL)
4595 				count = xpathobj->nodesetval->nodeNr;
4596 
4597 			if (xpathobj->nodesetval == NULL || count == 0)
4598 			{
4599 				*isnull = true;
4600 			}
4601 			else if (count == 1 && typid == XMLOID)
4602 			{
4603 				text	   *textstr;
4604 
4605 				/* simple case, result is one value */
4606 				textstr = xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[0],
4607 											   xtCxt->xmlerrcxt);
4608 				cstr = text_to_cstring(textstr);
4609 			}
4610 			else if (count == 1)
4611 			{
4612 				xmlChar    *str;
4613 				xmlNodePtr	node;
4614 
4615 				node = xpathobj->nodesetval->nodeTab[0];
4616 				if (node->type == XML_NAMESPACE_DECL)
4617 					ereport(ERROR,
4618 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4619 							 errmsg("XMLTABLE cannot cast a namespace node to a non-XML result type")));
4620 
4621 				/*
4622 				 * Most nodes (elements and even attributes) store their data
4623 				 * in children nodes. If they don't have children nodes, it
4624 				 * means that they are empty (e.g. <element/>). Text nodes and
4625 				 * CDATA sections are an exception: they don't have children
4626 				 * but have content in the Text/CDATA node itself.
4627 				 */
4628 				if (node->type != XML_CDATA_SECTION_NODE &&
4629 					node->type != XML_TEXT_NODE)
4630 					node = node->xmlChildrenNode;
4631 
4632 				str = xmlNodeListGetString(xtCxt->doc, node, 1);
4633 				if (str != NULL)
4634 				{
4635 					PG_TRY();
4636 					{
4637 						cstr = pstrdup((char *) str);
4638 					}
4639 					PG_CATCH();
4640 					{
4641 						xmlFree(str);
4642 						PG_RE_THROW();
4643 					}
4644 					PG_END_TRY();
4645 					xmlFree(str);
4646 				}
4647 				else
4648 				{
4649 					/* Ensure mapping of empty tags to PostgreSQL values. */
4650 					cstr = "";
4651 				}
4652 			}
4653 			else
4654 			{
4655 				StringInfoData str;
4656 				int			i;
4657 
4658 				Assert(count > 1);
4659 
4660 				/*
4661 				 * When evaluating the XPath expression returns multiple
4662 				 * nodes, the result is the concatenation of them all. The
4663 				 * target type must be XML.
4664 				 */
4665 				if (typid != XMLOID)
4666 					ereport(ERROR,
4667 							(errcode(ERRCODE_CARDINALITY_VIOLATION),
4668 							 errmsg("more than one value returned by column XPath expression")));
4669 
4670 				/* Concatenate serialized values */
4671 				initStringInfo(&str);
4672 				for (i = 0; i < count; i++)
4673 				{
4674 					appendStringInfoText(&str,
4675 										 xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4676 															  xtCxt->xmlerrcxt));
4677 				}
4678 				cstr = str.data;
4679 			}
4680 		}
4681 		else if (xpathobj->type == XPATH_STRING)
4682 		{
4683 			cstr = (char *) xpathobj->stringval;
4684 		}
4685 		else
4686 			elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4687 
4688 		/*
4689 		 * By here, either cstr contains the result value, or the isnull flag
4690 		 * has been set.
4691 		 */
4692 		Assert(cstr || *isnull);
4693 
4694 		if (!*isnull)
4695 			result = InputFunctionCall(&state->in_functions[colnum],
4696 									   cstr,
4697 									   state->typioparams[colnum],
4698 									   typmod);
4699 	}
4700 	PG_CATCH();
4701 	{
4702 		if (xpathobj != NULL)
4703 			xmlXPathFreeObject(xpathobj);
4704 		PG_RE_THROW();
4705 	}
4706 	PG_END_TRY();
4707 
4708 	xmlXPathFreeObject(xpathobj);
4709 
4710 	return result;
4711 #else
4712 	NO_XML_SUPPORT();
4713 	return 0;
4714 #endif							/* not USE_LIBXML */
4715 }
4716 
4717 /*
4718  * XmlTableDestroyOpaque
4719  *		Release all libxml2 resources
4720  */
4721 static void
XmlTableDestroyOpaque(TableFuncScanState * state)4722 XmlTableDestroyOpaque(TableFuncScanState *state)
4723 {
4724 #ifdef USE_LIBXML
4725 	XmlTableBuilderData *xtCxt;
4726 
4727 	xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4728 
4729 	/* Propagate context related error context to libxml2 */
4730 	xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4731 
4732 	if (xtCxt->xpathscomp != NULL)
4733 	{
4734 		int			i;
4735 
4736 		for (i = 0; i < xtCxt->natts; i++)
4737 			if (xtCxt->xpathscomp[i] != NULL)
4738 				xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4739 	}
4740 
4741 	if (xtCxt->xpathobj != NULL)
4742 		xmlXPathFreeObject(xtCxt->xpathobj);
4743 	if (xtCxt->xpathcomp != NULL)
4744 		xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4745 	if (xtCxt->xpathcxt != NULL)
4746 		xmlXPathFreeContext(xtCxt->xpathcxt);
4747 	if (xtCxt->doc != NULL)
4748 		xmlFreeDoc(xtCxt->doc);
4749 	if (xtCxt->ctxt != NULL)
4750 		xmlFreeParserCtxt(xtCxt->ctxt);
4751 
4752 	pg_xml_done(xtCxt->xmlerrcxt, true);
4753 
4754 	/* not valid anymore */
4755 	xtCxt->magic = 0;
4756 	state->opaque = NULL;
4757 
4758 #else
4759 	NO_XML_SUPPORT();
4760 #endif							/* not USE_LIBXML */
4761 }
4762