1 /*-------------------------------------------------------------------------
2 *
3 * xml.c
4 * XML data type support.
5 *
6 *
7 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/backend/utils/adt/xml.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15 /*
16 * Generally, XML type support is only available when libxml use was
17 * configured during the build. But even if that is not done, the
18 * type and all the functions are available, but most of them will
19 * fail. For one thing, this avoids having to manage variant catalog
20 * installations. But it also has nice effects such as that you can
21 * dump a database containing XML type data even if the server is not
22 * linked with libxml. Thus, make sure xml_out() works even if nothing
23 * else does.
24 */
25
26 /*
27 * Notes on memory management:
28 *
29 * Sometimes libxml allocates global structures in the hope that it can reuse
30 * them later on. This makes it impractical to change the xmlMemSetup
31 * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 * allocated with malloc() or vice versa. Since libxml might be used by
33 * loadable modules, eg libperl, our only safe choices are to change the
34 * functions at postmaster/backend launch or not at all. Since we'd rather
35 * not activate libxml in sessions that might never use it, the latter choice
36 * is the preferred one. However, for debugging purposes it can be awfully
37 * handy to constrain libxml's allocations to be done in a specific palloc
38 * context, where they're easy to track. Therefore there is code here that
39 * can be enabled in debug builds to redirect libxml's allocations into a
40 * special context LibxmlContext. It's not recommended to turn this on in
41 * a production build because of the possibility of bad interactions with
42 * external modules.
43 */
44 /* #define USE_LIBXMLCONTEXT */
45
46 #include "postgres.h"
47
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
59
60 /*
61 * We used to check for xmlStructuredErrorContext via a configure test; but
62 * that doesn't work on Windows, so instead use this grottier method of
63 * testing the library version number.
64 */
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 #endif
68 #endif /* USE_LIBXML */
69
70 #include "access/htup_details.h"
71 #include "catalog/namespace.h"
72 #include "catalog/pg_class.h"
73 #include "catalog/pg_type.h"
74 #include "commands/dbcommands.h"
75 #include "executor/spi.h"
76 #include "executor/tablefunc.h"
77 #include "fmgr.h"
78 #include "lib/stringinfo.h"
79 #include "libpq/pqformat.h"
80 #include "mb/pg_wchar.h"
81 #include "miscadmin.h"
82 #include "nodes/execnodes.h"
83 #include "nodes/nodeFuncs.h"
84 #include "utils/array.h"
85 #include "utils/builtins.h"
86 #include "utils/date.h"
87 #include "utils/datetime.h"
88 #include "utils/lsyscache.h"
89 #include "utils/memutils.h"
90 #include "utils/rel.h"
91 #include "utils/syscache.h"
92 #include "utils/xml.h"
93
94
95 /* GUC variables */
96 int xmlbinary;
97 int xmloption;
98
99 #ifdef USE_LIBXML
100
101 /* random number to identify PgXmlErrorContext */
102 #define ERRCXT_MAGIC 68275028
103
104 struct PgXmlErrorContext
105 {
106 int magic;
107 /* strictness argument passed to pg_xml_init */
108 PgXmlStrictness strictness;
109 /* current error status and accumulated message, if any */
110 bool err_occurred;
111 StringInfoData err_buf;
112 /* previous libxml error handling state (saved by pg_xml_init) */
113 xmlStructuredErrorFunc saved_errfunc;
114 void *saved_errcxt;
115 /* previous libxml entity handler (saved by pg_xml_init) */
116 xmlExternalEntityLoader saved_entityfunc;
117 };
118
119 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
120 xmlParserCtxtPtr ctxt);
121 static void xml_errorHandler(void *data, xmlErrorPtr error);
122 static void xml_ereport_by_code(int level, int sqlcode,
123 const char *msg, int errcode);
124 static void chopStringInfoNewlines(StringInfo str);
125 static void appendStringInfoLineSeparator(StringInfo str);
126
127 #ifdef USE_LIBXMLCONTEXT
128
129 static MemoryContext LibxmlContext = NULL;
130
131 static void xml_memory_init(void);
132 static void *xml_palloc(size_t size);
133 static void *xml_repalloc(void *ptr, size_t size);
134 static void xml_pfree(void *ptr);
135 static char *xml_pstrdup(const char *string);
136 #endif /* USE_LIBXMLCONTEXT */
137
138 static xmlChar *xml_text2xmlChar(text *in);
139 static int parse_xml_decl(const xmlChar *str, size_t *lenp,
140 xmlChar **version, xmlChar **encoding, int *standalone);
141 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
142 pg_enc encoding, int standalone);
143 static bool xml_doctype_in_content(const xmlChar *str);
144 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
145 bool preserve_whitespace, int encoding);
146 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
147 static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
148 ArrayBuildState *astate,
149 PgXmlErrorContext *xmlerrcxt);
150 static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
151 #endif /* USE_LIBXML */
152
153 static void xmldata_root_element_start(StringInfo result, const char *eltname,
154 const char *xmlschema, const char *targetns,
155 bool top_level);
156 static void xmldata_root_element_end(StringInfo result, const char *eltname);
157 static StringInfo query_to_xml_internal(const char *query, char *tablename,
158 const char *xmlschema, bool nulls, bool tableforest,
159 const char *targetns, bool top_level);
160 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
161 bool nulls, bool tableforest, const char *targetns);
162 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
163 List *relid_list, bool nulls,
164 bool tableforest, const char *targetns);
165 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
166 bool nulls, bool tableforest,
167 const char *targetns);
168 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
169 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
170 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
171 static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
172 char *tablename, bool nulls, bool tableforest,
173 const char *targetns, bool top_level);
174
175 /* XMLTABLE support */
176 #ifdef USE_LIBXML
177 /* random number to identify XmlTableContext */
178 #define XMLTABLE_CONTEXT_MAGIC 46922182
179 typedef struct XmlTableBuilderData
180 {
181 int magic;
182 int natts;
183 long int row_count;
184 PgXmlErrorContext *xmlerrcxt;
185 xmlParserCtxtPtr ctxt;
186 xmlDocPtr doc;
187 xmlXPathContextPtr xpathcxt;
188 xmlXPathCompExprPtr xpathcomp;
189 xmlXPathObjectPtr xpathobj;
190 xmlXPathCompExprPtr *xpathscomp;
191 } XmlTableBuilderData;
192 #endif
193
194 static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
195 static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
196 static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
197 const char *uri);
198 static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
199 static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
200 const char *path, int colnum);
201 static bool XmlTableFetchRow(struct TableFuncScanState *state);
202 static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
203 Oid typid, int32 typmod, bool *isnull);
204 static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
205
206 const TableFuncRoutine XmlTableRoutine =
207 {
208 XmlTableInitOpaque,
209 XmlTableSetDocument,
210 XmlTableSetNamespace,
211 XmlTableSetRowFilter,
212 XmlTableSetColumnFilter,
213 XmlTableFetchRow,
214 XmlTableGetValue,
215 XmlTableDestroyOpaque
216 };
217
218 #define NO_XML_SUPPORT() \
219 ereport(ERROR, \
220 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
221 errmsg("unsupported XML feature"), \
222 errdetail("This functionality requires the server to be built with libxml support."), \
223 errhint("You need to rebuild PostgreSQL using --with-libxml.")))
224
225
226 /* from SQL/XML:2008 section 4.9 */
227 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
228 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
229 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
230
231
232 #ifdef USE_LIBXML
233
234 static int
xmlChar_to_encoding(const xmlChar * encoding_name)235 xmlChar_to_encoding(const xmlChar *encoding_name)
236 {
237 int encoding = pg_char_to_encoding((const char *) encoding_name);
238
239 if (encoding < 0)
240 ereport(ERROR,
241 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
242 errmsg("invalid encoding name \"%s\"",
243 (const char *) encoding_name)));
244 return encoding;
245 }
246 #endif
247
248
249 /*
250 * xml_in uses a plain C string to VARDATA conversion, so for the time being
251 * we use the conversion function for the text datatype.
252 *
253 * This is only acceptable so long as xmltype and text use the same
254 * representation.
255 */
256 Datum
xml_in(PG_FUNCTION_ARGS)257 xml_in(PG_FUNCTION_ARGS)
258 {
259 #ifdef USE_LIBXML
260 char *s = PG_GETARG_CSTRING(0);
261 xmltype *vardata;
262 xmlDocPtr doc;
263
264 vardata = (xmltype *) cstring_to_text(s);
265
266 /*
267 * Parse the data to check if it is well-formed XML data. Assume that
268 * ERROR occurred if parsing failed.
269 */
270 doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
271 xmlFreeDoc(doc);
272
273 PG_RETURN_XML_P(vardata);
274 #else
275 NO_XML_SUPPORT();
276 return 0;
277 #endif
278 }
279
280
281 #define PG_XML_DEFAULT_VERSION "1.0"
282
283
284 /*
285 * xml_out_internal uses a plain VARDATA to C string conversion, so for the
286 * time being we use the conversion function for the text datatype.
287 *
288 * This is only acceptable so long as xmltype and text use the same
289 * representation.
290 */
291 static char *
xml_out_internal(xmltype * x,pg_enc target_encoding)292 xml_out_internal(xmltype *x, pg_enc target_encoding)
293 {
294 char *str = text_to_cstring((text *) x);
295
296 #ifdef USE_LIBXML
297 size_t len = strlen(str);
298 xmlChar *version;
299 int standalone;
300 int res_code;
301
302 if ((res_code = parse_xml_decl((xmlChar *) str,
303 &len, &version, NULL, &standalone)) == 0)
304 {
305 StringInfoData buf;
306
307 initStringInfo(&buf);
308
309 if (!print_xml_decl(&buf, version, target_encoding, standalone))
310 {
311 /*
312 * If we are not going to produce an XML declaration, eat a single
313 * newline in the original string to prevent empty first lines in
314 * the output.
315 */
316 if (*(str + len) == '\n')
317 len += 1;
318 }
319 appendStringInfoString(&buf, str + len);
320
321 pfree(str);
322
323 return buf.data;
324 }
325
326 xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
327 "could not parse XML declaration in stored value",
328 res_code);
329 #endif
330 return str;
331 }
332
333
334 Datum
xml_out(PG_FUNCTION_ARGS)335 xml_out(PG_FUNCTION_ARGS)
336 {
337 xmltype *x = PG_GETARG_XML_P(0);
338
339 /*
340 * xml_out removes the encoding property in all cases. This is because we
341 * cannot control from here whether the datum will be converted to a
342 * different client encoding, so we'd do more harm than good by including
343 * it.
344 */
345 PG_RETURN_CSTRING(xml_out_internal(x, 0));
346 }
347
348
349 Datum
xml_recv(PG_FUNCTION_ARGS)350 xml_recv(PG_FUNCTION_ARGS)
351 {
352 #ifdef USE_LIBXML
353 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
354 xmltype *result;
355 char *str;
356 char *newstr;
357 int nbytes;
358 xmlDocPtr doc;
359 xmlChar *encodingStr = NULL;
360 int encoding;
361
362 /*
363 * Read the data in raw format. We don't know yet what the encoding is, as
364 * that information is embedded in the xml declaration; so we have to
365 * parse that before converting to server encoding.
366 */
367 nbytes = buf->len - buf->cursor;
368 str = (char *) pq_getmsgbytes(buf, nbytes);
369
370 /*
371 * We need a null-terminated string to pass to parse_xml_decl(). Rather
372 * than make a separate copy, make the temporary result one byte bigger
373 * than it needs to be.
374 */
375 result = palloc(nbytes + 1 + VARHDRSZ);
376 SET_VARSIZE(result, nbytes + VARHDRSZ);
377 memcpy(VARDATA(result), str, nbytes);
378 str = VARDATA(result);
379 str[nbytes] = '\0';
380
381 parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
382
383 /*
384 * If encoding wasn't explicitly specified in the XML header, treat it as
385 * UTF-8, as that's the default in XML. This is different from xml_in(),
386 * where the input has to go through the normal client to server encoding
387 * conversion.
388 */
389 encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
390
391 /*
392 * Parse the data to check if it is well-formed XML data. Assume that
393 * xml_parse will throw ERROR if not.
394 */
395 doc = xml_parse(result, xmloption, true, encoding);
396 xmlFreeDoc(doc);
397
398 /* Now that we know what we're dealing with, convert to server encoding */
399 newstr = pg_any_to_server(str, nbytes, encoding);
400
401 if (newstr != str)
402 {
403 pfree(result);
404 result = (xmltype *) cstring_to_text(newstr);
405 pfree(newstr);
406 }
407
408 PG_RETURN_XML_P(result);
409 #else
410 NO_XML_SUPPORT();
411 return 0;
412 #endif
413 }
414
415
416 Datum
xml_send(PG_FUNCTION_ARGS)417 xml_send(PG_FUNCTION_ARGS)
418 {
419 xmltype *x = PG_GETARG_XML_P(0);
420 char *outval;
421 StringInfoData buf;
422
423 /*
424 * xml_out_internal doesn't convert the encoding, it just prints the right
425 * declaration. pq_sendtext will do the conversion.
426 */
427 outval = xml_out_internal(x, pg_get_client_encoding());
428
429 pq_begintypsend(&buf);
430 pq_sendtext(&buf, outval, strlen(outval));
431 pfree(outval);
432 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
433 }
434
435
436 #ifdef USE_LIBXML
437 static void
appendStringInfoText(StringInfo str,const text * t)438 appendStringInfoText(StringInfo str, const text *t)
439 {
440 appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
441 }
442 #endif
443
444
445 static xmltype *
stringinfo_to_xmltype(StringInfo buf)446 stringinfo_to_xmltype(StringInfo buf)
447 {
448 return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
449 }
450
451
452 static xmltype *
cstring_to_xmltype(const char * string)453 cstring_to_xmltype(const char *string)
454 {
455 return (xmltype *) cstring_to_text(string);
456 }
457
458
459 #ifdef USE_LIBXML
460 static xmltype *
xmlBuffer_to_xmltype(xmlBufferPtr buf)461 xmlBuffer_to_xmltype(xmlBufferPtr buf)
462 {
463 return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
464 xmlBufferLength(buf));
465 }
466 #endif
467
468
469 Datum
xmlcomment(PG_FUNCTION_ARGS)470 xmlcomment(PG_FUNCTION_ARGS)
471 {
472 #ifdef USE_LIBXML
473 text *arg = PG_GETARG_TEXT_PP(0);
474 char *argdata = VARDATA_ANY(arg);
475 int len = VARSIZE_ANY_EXHDR(arg);
476 StringInfoData buf;
477 int i;
478
479 /* check for "--" in string or "-" at the end */
480 for (i = 1; i < len; i++)
481 {
482 if (argdata[i] == '-' && argdata[i - 1] == '-')
483 ereport(ERROR,
484 (errcode(ERRCODE_INVALID_XML_COMMENT),
485 errmsg("invalid XML comment")));
486 }
487 if (len > 0 && argdata[len - 1] == '-')
488 ereport(ERROR,
489 (errcode(ERRCODE_INVALID_XML_COMMENT),
490 errmsg("invalid XML comment")));
491
492 initStringInfo(&buf);
493 appendStringInfoString(&buf, "<!--");
494 appendStringInfoText(&buf, arg);
495 appendStringInfoString(&buf, "-->");
496
497 PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
498 #else
499 NO_XML_SUPPORT();
500 return 0;
501 #endif
502 }
503
504
505
506 /*
507 * TODO: xmlconcat needs to merge the notations and unparsed entities
508 * of the argument values. Not very important in practice, though.
509 */
510 xmltype *
xmlconcat(List * args)511 xmlconcat(List *args)
512 {
513 #ifdef USE_LIBXML
514 int global_standalone = 1;
515 xmlChar *global_version = NULL;
516 bool global_version_no_value = false;
517 StringInfoData buf;
518 ListCell *v;
519
520 initStringInfo(&buf);
521 foreach(v, args)
522 {
523 xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
524 size_t len;
525 xmlChar *version;
526 int standalone;
527 char *str;
528
529 len = VARSIZE(x) - VARHDRSZ;
530 str = text_to_cstring((text *) x);
531
532 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
533
534 if (standalone == 0 && global_standalone == 1)
535 global_standalone = 0;
536 if (standalone < 0)
537 global_standalone = -1;
538
539 if (!version)
540 global_version_no_value = true;
541 else if (!global_version)
542 global_version = version;
543 else if (xmlStrcmp(version, global_version) != 0)
544 global_version_no_value = true;
545
546 appendStringInfoString(&buf, str + len);
547 pfree(str);
548 }
549
550 if (!global_version_no_value || global_standalone >= 0)
551 {
552 StringInfoData buf2;
553
554 initStringInfo(&buf2);
555
556 print_xml_decl(&buf2,
557 (!global_version_no_value) ? global_version : NULL,
558 0,
559 global_standalone);
560
561 appendStringInfoString(&buf2, buf.data);
562 buf = buf2;
563 }
564
565 return stringinfo_to_xmltype(&buf);
566 #else
567 NO_XML_SUPPORT();
568 return NULL;
569 #endif
570 }
571
572
573 /*
574 * XMLAGG support
575 */
576 Datum
xmlconcat2(PG_FUNCTION_ARGS)577 xmlconcat2(PG_FUNCTION_ARGS)
578 {
579 if (PG_ARGISNULL(0))
580 {
581 if (PG_ARGISNULL(1))
582 PG_RETURN_NULL();
583 else
584 PG_RETURN_XML_P(PG_GETARG_XML_P(1));
585 }
586 else if (PG_ARGISNULL(1))
587 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
588 else
589 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
590 PG_GETARG_XML_P(1))));
591 }
592
593
594 Datum
texttoxml(PG_FUNCTION_ARGS)595 texttoxml(PG_FUNCTION_ARGS)
596 {
597 text *data = PG_GETARG_TEXT_PP(0);
598
599 PG_RETURN_XML_P(xmlparse(data, xmloption, true));
600 }
601
602
603 Datum
xmltotext(PG_FUNCTION_ARGS)604 xmltotext(PG_FUNCTION_ARGS)
605 {
606 xmltype *data = PG_GETARG_XML_P(0);
607
608 /* It's actually binary compatible. */
609 PG_RETURN_TEXT_P((text *) data);
610 }
611
612
613 text *
xmltotext_with_xmloption(xmltype * data,XmlOptionType xmloption_arg)614 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
615 {
616 if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
617 ereport(ERROR,
618 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
619 errmsg("not an XML document")));
620
621 /* It's actually binary compatible, save for the above check. */
622 return (text *) data;
623 }
624
625
626 xmltype *
xmlelement(XmlExpr * xexpr,Datum * named_argvalue,bool * named_argnull,Datum * argvalue,bool * argnull)627 xmlelement(XmlExpr *xexpr,
628 Datum *named_argvalue, bool *named_argnull,
629 Datum *argvalue, bool *argnull)
630 {
631 #ifdef USE_LIBXML
632 xmltype *result;
633 List *named_arg_strings;
634 List *arg_strings;
635 int i;
636 ListCell *arg;
637 ListCell *narg;
638 PgXmlErrorContext *xmlerrcxt;
639 volatile xmlBufferPtr buf = NULL;
640 volatile xmlTextWriterPtr writer = NULL;
641
642 /*
643 * All arguments are already evaluated, and their values are passed in the
644 * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
645 * issues if one of the arguments involves a call to some other function
646 * or subsystem that wants to use libxml on its own terms. We examine the
647 * original XmlExpr to identify the numbers and types of the arguments.
648 */
649 named_arg_strings = NIL;
650 i = 0;
651 foreach(arg, xexpr->named_args)
652 {
653 Expr *e = (Expr *) lfirst(arg);
654 char *str;
655
656 if (named_argnull[i])
657 str = NULL;
658 else
659 str = map_sql_value_to_xml_value(named_argvalue[i],
660 exprType((Node *) e),
661 false);
662 named_arg_strings = lappend(named_arg_strings, str);
663 i++;
664 }
665
666 arg_strings = NIL;
667 i = 0;
668 foreach(arg, xexpr->args)
669 {
670 Expr *e = (Expr *) lfirst(arg);
671 char *str;
672
673 /* here we can just forget NULL elements immediately */
674 if (!argnull[i])
675 {
676 str = map_sql_value_to_xml_value(argvalue[i],
677 exprType((Node *) e),
678 true);
679 arg_strings = lappend(arg_strings, str);
680 }
681 i++;
682 }
683
684 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
685
686 PG_TRY();
687 {
688 buf = xmlBufferCreate();
689 if (buf == NULL || xmlerrcxt->err_occurred)
690 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
691 "could not allocate xmlBuffer");
692 writer = xmlNewTextWriterMemory(buf, 0);
693 if (writer == NULL || xmlerrcxt->err_occurred)
694 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
695 "could not allocate xmlTextWriter");
696
697 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
698
699 forboth(arg, named_arg_strings, narg, xexpr->arg_names)
700 {
701 char *str = (char *) lfirst(arg);
702 char *argname = strVal(lfirst(narg));
703
704 if (str)
705 xmlTextWriterWriteAttribute(writer,
706 (xmlChar *) argname,
707 (xmlChar *) str);
708 }
709
710 foreach(arg, arg_strings)
711 {
712 char *str = (char *) lfirst(arg);
713
714 xmlTextWriterWriteRaw(writer, (xmlChar *) str);
715 }
716
717 xmlTextWriterEndElement(writer);
718
719 /* we MUST do this now to flush data out to the buffer ... */
720 xmlFreeTextWriter(writer);
721 writer = NULL;
722
723 result = xmlBuffer_to_xmltype(buf);
724 }
725 PG_CATCH();
726 {
727 if (writer)
728 xmlFreeTextWriter(writer);
729 if (buf)
730 xmlBufferFree(buf);
731
732 pg_xml_done(xmlerrcxt, true);
733
734 PG_RE_THROW();
735 }
736 PG_END_TRY();
737
738 xmlBufferFree(buf);
739
740 pg_xml_done(xmlerrcxt, false);
741
742 return result;
743 #else
744 NO_XML_SUPPORT();
745 return NULL;
746 #endif
747 }
748
749
750 xmltype *
xmlparse(text * data,XmlOptionType xmloption_arg,bool preserve_whitespace)751 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
752 {
753 #ifdef USE_LIBXML
754 xmlDocPtr doc;
755
756 doc = xml_parse(data, xmloption_arg, preserve_whitespace,
757 GetDatabaseEncoding());
758 xmlFreeDoc(doc);
759
760 return (xmltype *) data;
761 #else
762 NO_XML_SUPPORT();
763 return NULL;
764 #endif
765 }
766
767
768 xmltype *
xmlpi(const char * target,text * arg,bool arg_is_null,bool * result_is_null)769 xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
770 {
771 #ifdef USE_LIBXML
772 xmltype *result;
773 StringInfoData buf;
774
775 if (pg_strcasecmp(target, "xml") == 0)
776 ereport(ERROR,
777 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
778 errmsg("invalid XML processing instruction"),
779 errdetail("XML processing instruction target name cannot be \"%s\".", target)));
780
781 /*
782 * Following the SQL standard, the null check comes after the syntax check
783 * above.
784 */
785 *result_is_null = arg_is_null;
786 if (*result_is_null)
787 return NULL;
788
789 initStringInfo(&buf);
790
791 appendStringInfo(&buf, "<?%s", target);
792
793 if (arg != NULL)
794 {
795 char *string;
796
797 string = text_to_cstring(arg);
798 if (strstr(string, "?>") != NULL)
799 ereport(ERROR,
800 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
801 errmsg("invalid XML processing instruction"),
802 errdetail("XML processing instruction cannot contain \"?>\".")));
803
804 appendStringInfoChar(&buf, ' ');
805 appendStringInfoString(&buf, string + strspn(string, " "));
806 pfree(string);
807 }
808 appendStringInfoString(&buf, "?>");
809
810 result = stringinfo_to_xmltype(&buf);
811 pfree(buf.data);
812 return result;
813 #else
814 NO_XML_SUPPORT();
815 return NULL;
816 #endif
817 }
818
819
820 xmltype *
xmlroot(xmltype * data,text * version,int standalone)821 xmlroot(xmltype *data, text *version, int standalone)
822 {
823 #ifdef USE_LIBXML
824 char *str;
825 size_t len;
826 xmlChar *orig_version;
827 int orig_standalone;
828 StringInfoData buf;
829
830 len = VARSIZE(data) - VARHDRSZ;
831 str = text_to_cstring((text *) data);
832
833 parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
834
835 if (version)
836 orig_version = xml_text2xmlChar(version);
837 else
838 orig_version = NULL;
839
840 switch (standalone)
841 {
842 case XML_STANDALONE_YES:
843 orig_standalone = 1;
844 break;
845 case XML_STANDALONE_NO:
846 orig_standalone = 0;
847 break;
848 case XML_STANDALONE_NO_VALUE:
849 orig_standalone = -1;
850 break;
851 case XML_STANDALONE_OMITTED:
852 /* leave original value */
853 break;
854 }
855
856 initStringInfo(&buf);
857 print_xml_decl(&buf, orig_version, 0, orig_standalone);
858 appendStringInfoString(&buf, str + len);
859
860 return stringinfo_to_xmltype(&buf);
861 #else
862 NO_XML_SUPPORT();
863 return NULL;
864 #endif
865 }
866
867
868 /*
869 * Validate document (given as string) against DTD (given as external link)
870 *
871 * This has been removed because it is a security hole: unprivileged users
872 * should not be able to use Postgres to fetch arbitrary external files,
873 * which unfortunately is exactly what libxml is willing to do with the DTD
874 * parameter.
875 */
876 Datum
xmlvalidate(PG_FUNCTION_ARGS)877 xmlvalidate(PG_FUNCTION_ARGS)
878 {
879 ereport(ERROR,
880 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
881 errmsg("xmlvalidate is not implemented")));
882 return 0;
883 }
884
885
886 bool
xml_is_document(xmltype * arg)887 xml_is_document(xmltype *arg)
888 {
889 #ifdef USE_LIBXML
890 bool result;
891 volatile xmlDocPtr doc = NULL;
892 MemoryContext ccxt = CurrentMemoryContext;
893
894 /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
895 PG_TRY();
896 {
897 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
898 GetDatabaseEncoding());
899 result = true;
900 }
901 PG_CATCH();
902 {
903 ErrorData *errdata;
904 MemoryContext ecxt;
905
906 ecxt = MemoryContextSwitchTo(ccxt);
907 errdata = CopyErrorData();
908 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
909 {
910 FlushErrorState();
911 result = false;
912 }
913 else
914 {
915 MemoryContextSwitchTo(ecxt);
916 PG_RE_THROW();
917 }
918 }
919 PG_END_TRY();
920
921 if (doc)
922 xmlFreeDoc(doc);
923
924 return result;
925 #else /* not USE_LIBXML */
926 NO_XML_SUPPORT();
927 return false;
928 #endif /* not USE_LIBXML */
929 }
930
931
932 #ifdef USE_LIBXML
933
934 /*
935 * pg_xml_init_library --- set up for use of libxml
936 *
937 * This should be called by each function that is about to use libxml
938 * facilities but doesn't require error handling. It initializes libxml
939 * and verifies compatibility with the loaded libxml version. These are
940 * once-per-session activities.
941 *
942 * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
943 * check)
944 */
945 void
pg_xml_init_library(void)946 pg_xml_init_library(void)
947 {
948 static bool first_time = true;
949
950 if (first_time)
951 {
952 /* Stuff we need do only once per session */
953
954 /*
955 * Currently, we have no pure UTF-8 support for internals -- check if
956 * we can work.
957 */
958 if (sizeof(char) != sizeof(xmlChar))
959 ereport(ERROR,
960 (errmsg("could not initialize XML library"),
961 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
962 (int) sizeof(char), (int) sizeof(xmlChar))));
963
964 #ifdef USE_LIBXMLCONTEXT
965 /* Set up libxml's memory allocation our way */
966 xml_memory_init();
967 #endif
968
969 /* Check library compatibility */
970 LIBXML_TEST_VERSION;
971
972 first_time = false;
973 }
974 }
975
976 /*
977 * pg_xml_init --- set up for use of libxml and register an error handler
978 *
979 * This should be called by each function that is about to use libxml
980 * facilities and requires error handling. It initializes libxml with
981 * pg_xml_init_library() and establishes our libxml error handler.
982 *
983 * strictness determines which errors are reported and which are ignored.
984 *
985 * Calls to this function MUST be followed by a PG_TRY block that guarantees
986 * that pg_xml_done() is called during either normal or error exit.
987 *
988 * This is exported for use by contrib/xml2, as well as other code that might
989 * wish to share use of this module's libxml error handler.
990 */
991 PgXmlErrorContext *
pg_xml_init(PgXmlStrictness strictness)992 pg_xml_init(PgXmlStrictness strictness)
993 {
994 PgXmlErrorContext *errcxt;
995 void *new_errcxt;
996
997 /* Do one-time setup if needed */
998 pg_xml_init_library();
999
1000 /* Create error handling context structure */
1001 errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1002 errcxt->magic = ERRCXT_MAGIC;
1003 errcxt->strictness = strictness;
1004 errcxt->err_occurred = false;
1005 initStringInfo(&errcxt->err_buf);
1006
1007 /*
1008 * Save original error handler and install ours. libxml originally didn't
1009 * distinguish between the contexts for generic and for structured error
1010 * handlers. If we're using an old libxml version, we must thus save the
1011 * generic error context, even though we're using a structured error
1012 * handler.
1013 */
1014 errcxt->saved_errfunc = xmlStructuredError;
1015
1016 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1017 errcxt->saved_errcxt = xmlStructuredErrorContext;
1018 #else
1019 errcxt->saved_errcxt = xmlGenericErrorContext;
1020 #endif
1021
1022 xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1023
1024 /*
1025 * Verify that xmlSetStructuredErrorFunc set the context variable we
1026 * expected it to. If not, the error context pointer we just saved is not
1027 * the correct thing to restore, and since that leaves us without a way to
1028 * restore the context in pg_xml_done, we must fail.
1029 *
1030 * The only known situation in which this test fails is if we compile with
1031 * headers from a libxml2 that doesn't track the structured error context
1032 * separately (< 2.7.4), but at runtime use a version that does, or vice
1033 * versa. The libxml2 authors did not treat that change as constituting
1034 * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1035 * fails to protect us from this.
1036 */
1037
1038 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1039 new_errcxt = xmlStructuredErrorContext;
1040 #else
1041 new_errcxt = xmlGenericErrorContext;
1042 #endif
1043
1044 if (new_errcxt != (void *) errcxt)
1045 ereport(ERROR,
1046 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1047 errmsg("could not set up XML error handler"),
1048 errhint("This probably indicates that the version of libxml2"
1049 " being used is not compatible with the libxml2"
1050 " header files that PostgreSQL was built with.")));
1051
1052 /*
1053 * Also, install an entity loader to prevent unwanted fetches of external
1054 * files and URLs.
1055 */
1056 errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1057 xmlSetExternalEntityLoader(xmlPgEntityLoader);
1058
1059 return errcxt;
1060 }
1061
1062
1063 /*
1064 * pg_xml_done --- restore previous libxml error handling
1065 *
1066 * Resets libxml's global error-handling state to what it was before
1067 * pg_xml_init() was called.
1068 *
1069 * This routine verifies that all pending errors have been dealt with
1070 * (in assert-enabled builds, anyway).
1071 */
1072 void
pg_xml_done(PgXmlErrorContext * errcxt,bool isError)1073 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1074 {
1075 void *cur_errcxt;
1076
1077 /* An assert seems like enough protection here */
1078 Assert(errcxt->magic == ERRCXT_MAGIC);
1079
1080 /*
1081 * In a normal exit, there should be no un-handled libxml errors. But we
1082 * shouldn't try to enforce this during error recovery, since the longjmp
1083 * could have been thrown before xml_ereport had a chance to run.
1084 */
1085 Assert(!errcxt->err_occurred || isError);
1086
1087 /*
1088 * Check that libxml's global state is correct, warn if not. This is a
1089 * real test and not an Assert because it has a higher probability of
1090 * happening.
1091 */
1092 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1093 cur_errcxt = xmlStructuredErrorContext;
1094 #else
1095 cur_errcxt = xmlGenericErrorContext;
1096 #endif
1097
1098 if (cur_errcxt != (void *) errcxt)
1099 elog(WARNING, "libxml error handling state is out of sync with xml.c");
1100
1101 /* Restore the saved handlers */
1102 xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1103 xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1104
1105 /*
1106 * Mark the struct as invalid, just in case somebody somehow manages to
1107 * call xml_errorHandler or xml_ereport with it.
1108 */
1109 errcxt->magic = 0;
1110
1111 /* Release memory */
1112 pfree(errcxt->err_buf.data);
1113 pfree(errcxt);
1114 }
1115
1116
1117 /*
1118 * pg_xml_error_occurred() --- test the error flag
1119 */
1120 bool
pg_xml_error_occurred(PgXmlErrorContext * errcxt)1121 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1122 {
1123 return errcxt->err_occurred;
1124 }
1125
1126
1127 /*
1128 * SQL/XML allows storing "XML documents" or "XML content". "XML
1129 * documents" are specified by the XML specification and are parsed
1130 * easily by libxml. "XML content" is specified by SQL/XML as the
1131 * production "XMLDecl? content". But libxml can only parse the
1132 * "content" part, so we have to parse the XML declaration ourselves
1133 * to complete this.
1134 */
1135
1136 #define CHECK_XML_SPACE(p) \
1137 do { \
1138 if (!xmlIsBlank_ch(*(p))) \
1139 return XML_ERR_SPACE_REQUIRED; \
1140 } while (0)
1141
1142 #define SKIP_XML_SPACE(p) \
1143 while (xmlIsBlank_ch(*(p))) (p)++
1144
1145 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1146 /* Beware of multiple evaluations of argument! */
1147 #define PG_XMLISNAMECHAR(c) \
1148 (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1149 || xmlIsDigit_ch(c) \
1150 || c == '.' || c == '-' || c == '_' || c == ':' \
1151 || xmlIsCombiningQ(c) \
1152 || xmlIsExtender_ch(c))
1153
1154 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1155 static xmlChar *
xml_pnstrdup(const xmlChar * str,size_t len)1156 xml_pnstrdup(const xmlChar *str, size_t len)
1157 {
1158 xmlChar *result;
1159
1160 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1161 memcpy(result, str, len * sizeof(xmlChar));
1162 result[len] = 0;
1163 return result;
1164 }
1165
1166 /* Ditto, except input is char* */
1167 static xmlChar *
pg_xmlCharStrndup(const char * str,size_t len)1168 pg_xmlCharStrndup(const char *str, size_t len)
1169 {
1170 xmlChar *result;
1171
1172 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1173 memcpy(result, str, len);
1174 result[len] = '\0';
1175
1176 return result;
1177 }
1178
1179 /*
1180 * str is the null-terminated input string. Remaining arguments are
1181 * output arguments; each can be NULL if value is not wanted.
1182 * version and encoding are returned as locally-palloc'd strings.
1183 * Result is 0 if OK, an error code if not.
1184 */
1185 static int
parse_xml_decl(const xmlChar * str,size_t * lenp,xmlChar ** version,xmlChar ** encoding,int * standalone)1186 parse_xml_decl(const xmlChar *str, size_t *lenp,
1187 xmlChar **version, xmlChar **encoding, int *standalone)
1188 {
1189 const xmlChar *p;
1190 const xmlChar *save_p;
1191 size_t len;
1192 int utf8char;
1193 int utf8len;
1194
1195 /*
1196 * Only initialize libxml. We don't need error handling here, but we do
1197 * need to make sure libxml is initialized before calling any of its
1198 * functions. Note that this is safe (and a no-op) if caller has already
1199 * done pg_xml_init().
1200 */
1201 pg_xml_init_library();
1202
1203 /* Initialize output arguments to "not present" */
1204 if (version)
1205 *version = NULL;
1206 if (encoding)
1207 *encoding = NULL;
1208 if (standalone)
1209 *standalone = -1;
1210
1211 p = str;
1212
1213 if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1214 goto finished;
1215
1216 /*
1217 * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1218 * rather than an XMLDecl, so we have done what we came to do and found no
1219 * XMLDecl.
1220 *
1221 * We need an input length value for xmlGetUTF8Char, but there's no need
1222 * to count the whole document size, so use strnlen not strlen.
1223 */
1224 utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1225 utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1226 if (PG_XMLISNAMECHAR(utf8char))
1227 goto finished;
1228
1229 p += 5;
1230
1231 /* version */
1232 CHECK_XML_SPACE(p);
1233 SKIP_XML_SPACE(p);
1234 if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1235 return XML_ERR_VERSION_MISSING;
1236 p += 7;
1237 SKIP_XML_SPACE(p);
1238 if (*p != '=')
1239 return XML_ERR_VERSION_MISSING;
1240 p += 1;
1241 SKIP_XML_SPACE(p);
1242
1243 if (*p == '\'' || *p == '"')
1244 {
1245 const xmlChar *q;
1246
1247 q = xmlStrchr(p + 1, *p);
1248 if (!q)
1249 return XML_ERR_VERSION_MISSING;
1250
1251 if (version)
1252 *version = xml_pnstrdup(p + 1, q - p - 1);
1253 p = q + 1;
1254 }
1255 else
1256 return XML_ERR_VERSION_MISSING;
1257
1258 /* encoding */
1259 save_p = p;
1260 SKIP_XML_SPACE(p);
1261 if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1262 {
1263 CHECK_XML_SPACE(save_p);
1264 p += 8;
1265 SKIP_XML_SPACE(p);
1266 if (*p != '=')
1267 return XML_ERR_MISSING_ENCODING;
1268 p += 1;
1269 SKIP_XML_SPACE(p);
1270
1271 if (*p == '\'' || *p == '"')
1272 {
1273 const xmlChar *q;
1274
1275 q = xmlStrchr(p + 1, *p);
1276 if (!q)
1277 return XML_ERR_MISSING_ENCODING;
1278
1279 if (encoding)
1280 *encoding = xml_pnstrdup(p + 1, q - p - 1);
1281 p = q + 1;
1282 }
1283 else
1284 return XML_ERR_MISSING_ENCODING;
1285 }
1286 else
1287 {
1288 p = save_p;
1289 }
1290
1291 /* standalone */
1292 save_p = p;
1293 SKIP_XML_SPACE(p);
1294 if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1295 {
1296 CHECK_XML_SPACE(save_p);
1297 p += 10;
1298 SKIP_XML_SPACE(p);
1299 if (*p != '=')
1300 return XML_ERR_STANDALONE_VALUE;
1301 p += 1;
1302 SKIP_XML_SPACE(p);
1303 if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1304 xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1305 {
1306 if (standalone)
1307 *standalone = 1;
1308 p += 5;
1309 }
1310 else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1311 xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1312 {
1313 if (standalone)
1314 *standalone = 0;
1315 p += 4;
1316 }
1317 else
1318 return XML_ERR_STANDALONE_VALUE;
1319 }
1320 else
1321 {
1322 p = save_p;
1323 }
1324
1325 SKIP_XML_SPACE(p);
1326 if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1327 return XML_ERR_XMLDECL_NOT_FINISHED;
1328 p += 2;
1329
1330 finished:
1331 len = p - str;
1332
1333 for (p = str; p < str + len; p++)
1334 if (*p > 127)
1335 return XML_ERR_INVALID_CHAR;
1336
1337 if (lenp)
1338 *lenp = len;
1339
1340 return XML_ERR_OK;
1341 }
1342
1343
1344 /*
1345 * Write an XML declaration. On output, we adjust the XML declaration
1346 * as follows. (These rules are the moral equivalent of the clause
1347 * "Serialization of an XML value" in the SQL standard.)
1348 *
1349 * We try to avoid generating an XML declaration if possible. This is
1350 * so that you don't get trivial things like xml '<foo/>' resulting in
1351 * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1352 * must provide a declaration if the standalone property is specified
1353 * or if we include an encoding declaration. If we have a
1354 * declaration, we must specify a version (XML requires this).
1355 * Otherwise we only make a declaration if the version is not "1.0",
1356 * which is the default version specified in SQL:2003.
1357 */
1358 static bool
print_xml_decl(StringInfo buf,const xmlChar * version,pg_enc encoding,int standalone)1359 print_xml_decl(StringInfo buf, const xmlChar *version,
1360 pg_enc encoding, int standalone)
1361 {
1362 if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1363 || (encoding && encoding != PG_UTF8)
1364 || standalone != -1)
1365 {
1366 appendStringInfoString(buf, "<?xml");
1367
1368 if (version)
1369 appendStringInfo(buf, " version=\"%s\"", version);
1370 else
1371 appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1372
1373 if (encoding && encoding != PG_UTF8)
1374 {
1375 /*
1376 * XXX might be useful to convert this to IANA names (ISO-8859-1
1377 * instead of LATIN1 etc.); needs field experience
1378 */
1379 appendStringInfo(buf, " encoding=\"%s\"",
1380 pg_encoding_to_char(encoding));
1381 }
1382
1383 if (standalone == 1)
1384 appendStringInfoString(buf, " standalone=\"yes\"");
1385 else if (standalone == 0)
1386 appendStringInfoString(buf, " standalone=\"no\"");
1387 appendStringInfoString(buf, "?>");
1388
1389 return true;
1390 }
1391 else
1392 return false;
1393 }
1394
1395 /*
1396 * Test whether an input that is to be parsed as CONTENT contains a DTD.
1397 *
1398 * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1399 * satisfied by a document with a DTD, which is a bit of a wart, as it means
1400 * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1401 * later fix that, by redefining content with reference to the "more
1402 * permissive" Document Node of the XQuery/XPath Data Model, such that any
1403 * DOCUMENT value is indeed also a CONTENT value. That definition is more
1404 * useful, as CONTENT becomes usable for parsing input of unknown form (think
1405 * pg_restore).
1406 *
1407 * As used below in parse_xml when parsing for CONTENT, libxml does not give
1408 * us the 2006+ behavior, but only the 2003; it will choke if the input has
1409 * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1410 * by detecting this case first and simply doing the parse as DOCUMENT.
1411 *
1412 * A DTD can be found arbitrarily far in, but that would be a contrived case;
1413 * it will ordinarily start within a few dozen characters. The only things
1414 * that can precede it are an XMLDecl (here, the caller will have called
1415 * parse_xml_decl already), whitespace, comments, and processing instructions.
1416 * This function need only return true if it sees a valid sequence of such
1417 * things leading to <!DOCTYPE. It can simply return false in any other
1418 * cases, including malformed input; that will mean the input gets parsed as
1419 * CONTENT as originally planned, with libxml reporting any errors.
1420 *
1421 * This is only to be called from xml_parse, when pg_xml_init has already
1422 * been called. The input is already in UTF8 encoding.
1423 */
1424 static bool
xml_doctype_in_content(const xmlChar * str)1425 xml_doctype_in_content(const xmlChar *str)
1426 {
1427 const xmlChar *p = str;
1428
1429 for (;;)
1430 {
1431 const xmlChar *e;
1432
1433 SKIP_XML_SPACE(p);
1434 if (*p != '<')
1435 return false;
1436 p++;
1437
1438 if (*p == '!')
1439 {
1440 p++;
1441
1442 /* if we see <!DOCTYPE, we can return true */
1443 if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1444 return true;
1445
1446 /* otherwise, if it's not a comment, fail */
1447 if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1448 return false;
1449 /* find end of comment: find -- and a > must follow */
1450 p = xmlStrstr(p + 2, (xmlChar *) "--");
1451 if (!p || p[2] != '>')
1452 return false;
1453 /* advance over comment, and keep scanning */
1454 p += 3;
1455 continue;
1456 }
1457
1458 /* otherwise, if it's not a PI <?target something?>, fail */
1459 if (*p != '?')
1460 return false;
1461 p++;
1462
1463 /* find end of PI (the string ?> is forbidden within a PI) */
1464 e = xmlStrstr(p, (xmlChar *) "?>");
1465 if (!e)
1466 return false;
1467
1468 /* advance over PI, keep scanning */
1469 p = e + 2;
1470 }
1471 }
1472
1473
1474 /*
1475 * Convert a C string to XML internal representation
1476 *
1477 * Note: it is caller's responsibility to xmlFreeDoc() the result,
1478 * else a permanent memory leak will ensue!
1479 *
1480 * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1481 * yet do not use SAX - see xmlreader.c)
1482 */
1483 static xmlDocPtr
xml_parse(text * data,XmlOptionType xmloption_arg,bool preserve_whitespace,int encoding)1484 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1485 int encoding)
1486 {
1487 int32 len;
1488 xmlChar *string;
1489 xmlChar *utf8string;
1490 PgXmlErrorContext *xmlerrcxt;
1491 volatile xmlParserCtxtPtr ctxt = NULL;
1492 volatile xmlDocPtr doc = NULL;
1493
1494 len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1495 string = xml_text2xmlChar(data);
1496
1497 utf8string = pg_do_encoding_conversion(string,
1498 len,
1499 encoding,
1500 PG_UTF8);
1501
1502 /* Start up libxml and its parser */
1503 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1504
1505 /* Use a TRY block to ensure we clean up correctly */
1506 PG_TRY();
1507 {
1508 bool parse_as_document = false;
1509 int res_code;
1510 size_t count = 0;
1511 xmlChar *version = NULL;
1512 int standalone = 0;
1513
1514 xmlInitParser();
1515
1516 ctxt = xmlNewParserCtxt();
1517 if (ctxt == NULL || xmlerrcxt->err_occurred)
1518 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1519 "could not allocate parser context");
1520
1521 /* Decide whether to parse as document or content */
1522 if (xmloption_arg == XMLOPTION_DOCUMENT)
1523 parse_as_document = true;
1524 else
1525 {
1526 /* Parse and skip over the XML declaration, if any */
1527 res_code = parse_xml_decl(utf8string,
1528 &count, &version, NULL, &standalone);
1529 if (res_code != 0)
1530 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1531 "invalid XML content: invalid XML declaration",
1532 res_code);
1533
1534 /* Is there a DOCTYPE element? */
1535 if (xml_doctype_in_content(utf8string + count))
1536 parse_as_document = true;
1537 }
1538
1539 if (parse_as_document)
1540 {
1541 /*
1542 * Note, that here we try to apply DTD defaults
1543 * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1544 * 'Default values defined by internal DTD are applied'. As for
1545 * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1546 * 10.16.7.e)
1547 */
1548 doc = xmlCtxtReadDoc(ctxt, utf8string,
1549 NULL,
1550 "UTF-8",
1551 XML_PARSE_NOENT | XML_PARSE_DTDATTR
1552 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1553 if (doc == NULL || xmlerrcxt->err_occurred)
1554 {
1555 /* Use original option to decide which error code to throw */
1556 if (xmloption_arg == XMLOPTION_DOCUMENT)
1557 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1558 "invalid XML document");
1559 else
1560 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1561 "invalid XML content");
1562 }
1563 }
1564 else
1565 {
1566 doc = xmlNewDoc(version);
1567 Assert(doc->encoding == NULL);
1568 doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1569 doc->standalone = standalone;
1570
1571 /* allow empty content */
1572 if (*(utf8string + count))
1573 {
1574 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1575 utf8string + count, NULL);
1576 if (res_code != 0 || xmlerrcxt->err_occurred)
1577 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1578 "invalid XML content");
1579 }
1580 }
1581 }
1582 PG_CATCH();
1583 {
1584 if (doc != NULL)
1585 xmlFreeDoc(doc);
1586 if (ctxt != NULL)
1587 xmlFreeParserCtxt(ctxt);
1588
1589 pg_xml_done(xmlerrcxt, true);
1590
1591 PG_RE_THROW();
1592 }
1593 PG_END_TRY();
1594
1595 xmlFreeParserCtxt(ctxt);
1596
1597 pg_xml_done(xmlerrcxt, false);
1598
1599 return doc;
1600 }
1601
1602
1603 /*
1604 * xmlChar<->text conversions
1605 */
1606 static xmlChar *
xml_text2xmlChar(text * in)1607 xml_text2xmlChar(text *in)
1608 {
1609 return (xmlChar *) text_to_cstring(in);
1610 }
1611
1612
1613 #ifdef USE_LIBXMLCONTEXT
1614
1615 /*
1616 * Manage the special context used for all libxml allocations (but only
1617 * in special debug builds; see notes at top of file)
1618 */
1619 static void
xml_memory_init(void)1620 xml_memory_init(void)
1621 {
1622 /* Create memory context if not there already */
1623 if (LibxmlContext == NULL)
1624 LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1625 "Libxml context",
1626 ALLOCSET_DEFAULT_SIZES);
1627
1628 /* Re-establish the callbacks even if already set */
1629 xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1630 }
1631
1632 /*
1633 * Wrappers for memory management functions
1634 */
1635 static void *
xml_palloc(size_t size)1636 xml_palloc(size_t size)
1637 {
1638 return MemoryContextAlloc(LibxmlContext, size);
1639 }
1640
1641
1642 static void *
xml_repalloc(void * ptr,size_t size)1643 xml_repalloc(void *ptr, size_t size)
1644 {
1645 return repalloc(ptr, size);
1646 }
1647
1648
1649 static void
xml_pfree(void * ptr)1650 xml_pfree(void *ptr)
1651 {
1652 /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1653 if (ptr)
1654 pfree(ptr);
1655 }
1656
1657
1658 static char *
xml_pstrdup(const char * string)1659 xml_pstrdup(const char *string)
1660 {
1661 return MemoryContextStrdup(LibxmlContext, string);
1662 }
1663 #endif /* USE_LIBXMLCONTEXT */
1664
1665
1666 /*
1667 * xmlPgEntityLoader --- entity loader callback function
1668 *
1669 * Silently prevent any external entity URL from being loaded. We don't want
1670 * to throw an error, so instead make the entity appear to expand to an empty
1671 * string.
1672 *
1673 * We would prefer to allow loading entities that exist in the system's
1674 * global XML catalog; but the available libxml2 APIs make that a complex
1675 * and fragile task. For now, just shut down all external access.
1676 */
1677 static xmlParserInputPtr
xmlPgEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)1678 xmlPgEntityLoader(const char *URL, const char *ID,
1679 xmlParserCtxtPtr ctxt)
1680 {
1681 return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1682 }
1683
1684
1685 /*
1686 * xml_ereport --- report an XML-related error
1687 *
1688 * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1689 * standard. This function adds libxml's native error message, if any, as
1690 * detail.
1691 *
1692 * This is exported for modules that want to share the core libxml error
1693 * handler. Note that pg_xml_init() *must* have been called previously.
1694 */
1695 void
xml_ereport(PgXmlErrorContext * errcxt,int level,int sqlcode,const char * msg)1696 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1697 {
1698 char *detail;
1699
1700 /* Defend against someone passing us a bogus context struct */
1701 if (errcxt->magic != ERRCXT_MAGIC)
1702 elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1703
1704 /* Flag that the current libxml error has been reported */
1705 errcxt->err_occurred = false;
1706
1707 /* Include detail only if we have some text from libxml */
1708 if (errcxt->err_buf.len > 0)
1709 detail = errcxt->err_buf.data;
1710 else
1711 detail = NULL;
1712
1713 ereport(level,
1714 (errcode(sqlcode),
1715 errmsg_internal("%s", msg),
1716 detail ? errdetail_internal("%s", detail) : 0));
1717 }
1718
1719
1720 /*
1721 * Error handler for libxml errors and warnings
1722 */
1723 static void
xml_errorHandler(void * data,xmlErrorPtr error)1724 xml_errorHandler(void *data, xmlErrorPtr error)
1725 {
1726 PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1727 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1728 xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1729 xmlNodePtr node = error->node;
1730 const xmlChar *name = (node != NULL &&
1731 node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1732 int domain = error->domain;
1733 int level = error->level;
1734 StringInfo errorBuf;
1735
1736 /*
1737 * Defend against someone passing us a bogus context struct.
1738 *
1739 * We force a backend exit if this check fails because longjmp'ing out of
1740 * libxml would likely render it unsafe to use further.
1741 */
1742 if (xmlerrcxt->magic != ERRCXT_MAGIC)
1743 elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1744
1745 /*----------
1746 * Older libxml versions report some errors differently.
1747 * First, some errors were previously reported as coming from the parser
1748 * domain but are now reported as coming from the namespace domain.
1749 * Second, some warnings were upgraded to errors.
1750 * We attempt to compensate for that here.
1751 *----------
1752 */
1753 switch (error->code)
1754 {
1755 case XML_WAR_NS_URI:
1756 level = XML_ERR_ERROR;
1757 domain = XML_FROM_NAMESPACE;
1758 break;
1759
1760 case XML_ERR_NS_DECL_ERROR:
1761 case XML_WAR_NS_URI_RELATIVE:
1762 case XML_WAR_NS_COLUMN:
1763 case XML_NS_ERR_XML_NAMESPACE:
1764 case XML_NS_ERR_UNDEFINED_NAMESPACE:
1765 case XML_NS_ERR_QNAME:
1766 case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1767 case XML_NS_ERR_EMPTY:
1768 domain = XML_FROM_NAMESPACE;
1769 break;
1770 }
1771
1772 /* Decide whether to act on the error or not */
1773 switch (domain)
1774 {
1775 case XML_FROM_PARSER:
1776 case XML_FROM_NONE:
1777 case XML_FROM_MEMORY:
1778 case XML_FROM_IO:
1779
1780 /*
1781 * Suppress warnings about undeclared entities. We need to do
1782 * this to avoid problems due to not loading DTD definitions.
1783 */
1784 if (error->code == XML_WAR_UNDECLARED_ENTITY)
1785 return;
1786
1787 /* Otherwise, accept error regardless of the parsing purpose */
1788 break;
1789
1790 default:
1791 /* Ignore error if only doing well-formedness check */
1792 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1793 return;
1794 break;
1795 }
1796
1797 /* Prepare error message in errorBuf */
1798 errorBuf = makeStringInfo();
1799
1800 if (error->line > 0)
1801 appendStringInfo(errorBuf, "line %d: ", error->line);
1802 if (name != NULL)
1803 appendStringInfo(errorBuf, "element %s: ", name);
1804 if (error->message != NULL)
1805 appendStringInfoString(errorBuf, error->message);
1806 else
1807 appendStringInfoString(errorBuf, "(no message provided)");
1808
1809 /*
1810 * Append context information to errorBuf.
1811 *
1812 * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1813 * write the context. Since we don't want to duplicate libxml
1814 * functionality here, we set up a generic error handler temporarily.
1815 *
1816 * We use appendStringInfo() directly as libxml's generic error handler.
1817 * This should work because it has essentially the same signature as
1818 * libxml expects, namely (void *ptr, const char *msg, ...).
1819 */
1820 if (input != NULL)
1821 {
1822 xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1823 void *errCtxSaved = xmlGenericErrorContext;
1824
1825 xmlSetGenericErrorFunc((void *) errorBuf,
1826 (xmlGenericErrorFunc) appendStringInfo);
1827
1828 /* Add context information to errorBuf */
1829 appendStringInfoLineSeparator(errorBuf);
1830
1831 xmlParserPrintFileContext(input);
1832
1833 /* Restore generic error func */
1834 xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1835 }
1836
1837 /* Get rid of any trailing newlines in errorBuf */
1838 chopStringInfoNewlines(errorBuf);
1839
1840 /*
1841 * Legacy error handling mode. err_occurred is never set, we just add the
1842 * message to err_buf. This mode exists because the xml2 contrib module
1843 * uses our error-handling infrastructure, but we don't want to change its
1844 * behaviour since it's deprecated anyway. This is also why we don't
1845 * distinguish between notices, warnings and errors here --- the old-style
1846 * generic error handler wouldn't have done that either.
1847 */
1848 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1849 {
1850 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1851 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1852
1853 pfree(errorBuf->data);
1854 pfree(errorBuf);
1855 return;
1856 }
1857
1858 /*
1859 * We don't want to ereport() here because that'd probably leave libxml in
1860 * an inconsistent state. Instead, we remember the error and ereport()
1861 * from xml_ereport().
1862 *
1863 * Warnings and notices can be reported immediately since they won't cause
1864 * a longjmp() out of libxml.
1865 */
1866 if (level >= XML_ERR_ERROR)
1867 {
1868 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1869 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1870
1871 xmlerrcxt->err_occurred = true;
1872 }
1873 else if (level >= XML_ERR_WARNING)
1874 {
1875 ereport(WARNING,
1876 (errmsg_internal("%s", errorBuf->data)));
1877 }
1878 else
1879 {
1880 ereport(NOTICE,
1881 (errmsg_internal("%s", errorBuf->data)));
1882 }
1883
1884 pfree(errorBuf->data);
1885 pfree(errorBuf);
1886 }
1887
1888
1889 /*
1890 * Wrapper for "ereport" function for XML-related errors. The "msg"
1891 * is the SQL-level message; some can be adopted from the SQL/XML
1892 * standard. This function uses "code" to create a textual detail
1893 * message. At the moment, we only need to cover those codes that we
1894 * may raise in this file.
1895 */
1896 static void
xml_ereport_by_code(int level,int sqlcode,const char * msg,int code)1897 xml_ereport_by_code(int level, int sqlcode,
1898 const char *msg, int code)
1899 {
1900 const char *det;
1901
1902 switch (code)
1903 {
1904 case XML_ERR_INVALID_CHAR:
1905 det = gettext_noop("Invalid character value.");
1906 break;
1907 case XML_ERR_SPACE_REQUIRED:
1908 det = gettext_noop("Space required.");
1909 break;
1910 case XML_ERR_STANDALONE_VALUE:
1911 det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1912 break;
1913 case XML_ERR_VERSION_MISSING:
1914 det = gettext_noop("Malformed declaration: missing version.");
1915 break;
1916 case XML_ERR_MISSING_ENCODING:
1917 det = gettext_noop("Missing encoding in text declaration.");
1918 break;
1919 case XML_ERR_XMLDECL_NOT_FINISHED:
1920 det = gettext_noop("Parsing XML declaration: '?>' expected.");
1921 break;
1922 default:
1923 det = gettext_noop("Unrecognized libxml error code: %d.");
1924 break;
1925 }
1926
1927 ereport(level,
1928 (errcode(sqlcode),
1929 errmsg_internal("%s", msg),
1930 errdetail(det, code)));
1931 }
1932
1933
1934 /*
1935 * Remove all trailing newlines from a StringInfo string
1936 */
1937 static void
chopStringInfoNewlines(StringInfo str)1938 chopStringInfoNewlines(StringInfo str)
1939 {
1940 while (str->len > 0 && str->data[str->len - 1] == '\n')
1941 str->data[--str->len] = '\0';
1942 }
1943
1944
1945 /*
1946 * Append a newline after removing any existing trailing newlines
1947 */
1948 static void
appendStringInfoLineSeparator(StringInfo str)1949 appendStringInfoLineSeparator(StringInfo str)
1950 {
1951 chopStringInfoNewlines(str);
1952 if (str->len > 0)
1953 appendStringInfoChar(str, '\n');
1954 }
1955
1956
1957 /*
1958 * Convert one char in the current server encoding to a Unicode codepoint.
1959 */
1960 static pg_wchar
sqlchar_to_unicode(const char * s)1961 sqlchar_to_unicode(const char *s)
1962 {
1963 char *utf8string;
1964 pg_wchar ret[2]; /* need space for trailing zero */
1965
1966 /* note we're not assuming s is null-terminated */
1967 utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1968
1969 pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
1970 pg_encoding_mblen(PG_UTF8, utf8string));
1971
1972 if (utf8string != s)
1973 pfree(utf8string);
1974
1975 return ret[0];
1976 }
1977
1978
1979 static bool
is_valid_xml_namefirst(pg_wchar c)1980 is_valid_xml_namefirst(pg_wchar c)
1981 {
1982 /* (Letter | '_' | ':') */
1983 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1984 || c == '_' || c == ':');
1985 }
1986
1987
1988 static bool
is_valid_xml_namechar(pg_wchar c)1989 is_valid_xml_namechar(pg_wchar c)
1990 {
1991 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1992 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1993 || xmlIsDigitQ(c)
1994 || c == '.' || c == '-' || c == '_' || c == ':'
1995 || xmlIsCombiningQ(c)
1996 || xmlIsExtenderQ(c));
1997 }
1998 #endif /* USE_LIBXML */
1999
2000
2001 /*
2002 * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2003 */
2004 char *
map_sql_identifier_to_xml_name(const char * ident,bool fully_escaped,bool escape_period)2005 map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2006 bool escape_period)
2007 {
2008 #ifdef USE_LIBXML
2009 StringInfoData buf;
2010 const char *p;
2011
2012 /*
2013 * SQL/XML doesn't make use of this case anywhere, so it's probably a
2014 * mistake.
2015 */
2016 Assert(fully_escaped || !escape_period);
2017
2018 initStringInfo(&buf);
2019
2020 for (p = ident; *p; p += pg_mblen(p))
2021 {
2022 if (*p == ':' && (p == ident || fully_escaped))
2023 appendStringInfoString(&buf, "_x003A_");
2024 else if (*p == '_' && *(p + 1) == 'x')
2025 appendStringInfoString(&buf, "_x005F_");
2026 else if (fully_escaped && p == ident &&
2027 pg_strncasecmp(p, "xml", 3) == 0)
2028 {
2029 if (*p == 'x')
2030 appendStringInfoString(&buf, "_x0078_");
2031 else
2032 appendStringInfoString(&buf, "_x0058_");
2033 }
2034 else if (escape_period && *p == '.')
2035 appendStringInfoString(&buf, "_x002E_");
2036 else
2037 {
2038 pg_wchar u = sqlchar_to_unicode(p);
2039
2040 if ((p == ident)
2041 ? !is_valid_xml_namefirst(u)
2042 : !is_valid_xml_namechar(u))
2043 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2044 else
2045 appendBinaryStringInfo(&buf, p, pg_mblen(p));
2046 }
2047 }
2048
2049 return buf.data;
2050 #else /* not USE_LIBXML */
2051 NO_XML_SUPPORT();
2052 return NULL;
2053 #endif /* not USE_LIBXML */
2054 }
2055
2056
2057 /*
2058 * Map a Unicode codepoint into the current server encoding.
2059 */
2060 static char *
unicode_to_sqlchar(pg_wchar c)2061 unicode_to_sqlchar(pg_wchar c)
2062 {
2063 char utf8string[8]; /* need room for trailing zero */
2064 char *result;
2065
2066 memset(utf8string, 0, sizeof(utf8string));
2067 unicode_to_utf8(c, (unsigned char *) utf8string);
2068
2069 result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
2070 /* if pg_any_to_server didn't strdup, we must */
2071 if (result == utf8string)
2072 result = pstrdup(result);
2073 return result;
2074 }
2075
2076
2077 /*
2078 * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2079 */
2080 char *
map_xml_name_to_sql_identifier(const char * name)2081 map_xml_name_to_sql_identifier(const char *name)
2082 {
2083 StringInfoData buf;
2084 const char *p;
2085
2086 initStringInfo(&buf);
2087
2088 for (p = name; *p; p += pg_mblen(p))
2089 {
2090 if (*p == '_' && *(p + 1) == 'x'
2091 && isxdigit((unsigned char) *(p + 2))
2092 && isxdigit((unsigned char) *(p + 3))
2093 && isxdigit((unsigned char) *(p + 4))
2094 && isxdigit((unsigned char) *(p + 5))
2095 && *(p + 6) == '_')
2096 {
2097 unsigned int u;
2098
2099 sscanf(p + 2, "%X", &u);
2100 appendStringInfoString(&buf, unicode_to_sqlchar(u));
2101 p += 6;
2102 }
2103 else
2104 appendBinaryStringInfo(&buf, p, pg_mblen(p));
2105 }
2106
2107 return buf.data;
2108 }
2109
2110 /*
2111 * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2112 *
2113 * When xml_escape_strings is true, then certain characters in string
2114 * values are replaced by entity references (< etc.), as specified
2115 * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2116 * wanted. The false case is mainly useful when the resulting value
2117 * is used with xmlTextWriterWriteAttribute() to write out an
2118 * attribute, because that function does the escaping itself.
2119 */
2120 char *
map_sql_value_to_xml_value(Datum value,Oid type,bool xml_escape_strings)2121 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2122 {
2123 if (type_is_array_domain(type))
2124 {
2125 ArrayType *array;
2126 Oid elmtype;
2127 int16 elmlen;
2128 bool elmbyval;
2129 char elmalign;
2130 int num_elems;
2131 Datum *elem_values;
2132 bool *elem_nulls;
2133 StringInfoData buf;
2134 int i;
2135
2136 array = DatumGetArrayTypeP(value);
2137 elmtype = ARR_ELEMTYPE(array);
2138 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2139
2140 deconstruct_array(array, elmtype,
2141 elmlen, elmbyval, elmalign,
2142 &elem_values, &elem_nulls,
2143 &num_elems);
2144
2145 initStringInfo(&buf);
2146
2147 for (i = 0; i < num_elems; i++)
2148 {
2149 if (elem_nulls[i])
2150 continue;
2151 appendStringInfoString(&buf, "<element>");
2152 appendStringInfoString(&buf,
2153 map_sql_value_to_xml_value(elem_values[i],
2154 elmtype, true));
2155 appendStringInfoString(&buf, "</element>");
2156 }
2157
2158 pfree(elem_values);
2159 pfree(elem_nulls);
2160
2161 return buf.data;
2162 }
2163 else
2164 {
2165 Oid typeOut;
2166 bool isvarlena;
2167 char *str;
2168
2169 /*
2170 * Flatten domains; the special-case treatments below should apply to,
2171 * eg, domains over boolean not just boolean.
2172 */
2173 type = getBaseType(type);
2174
2175 /*
2176 * Special XSD formatting for some data types
2177 */
2178 switch (type)
2179 {
2180 case BOOLOID:
2181 if (DatumGetBool(value))
2182 return "true";
2183 else
2184 return "false";
2185
2186 case DATEOID:
2187 {
2188 DateADT date;
2189 struct pg_tm tm;
2190 char buf[MAXDATELEN + 1];
2191
2192 date = DatumGetDateADT(value);
2193 /* XSD doesn't support infinite values */
2194 if (DATE_NOT_FINITE(date))
2195 ereport(ERROR,
2196 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2197 errmsg("date out of range"),
2198 errdetail("XML does not support infinite date values.")));
2199 j2date(date + POSTGRES_EPOCH_JDATE,
2200 &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2201 EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2202
2203 return pstrdup(buf);
2204 }
2205
2206 case TIMESTAMPOID:
2207 {
2208 Timestamp timestamp;
2209 struct pg_tm tm;
2210 fsec_t fsec;
2211 char buf[MAXDATELEN + 1];
2212
2213 timestamp = DatumGetTimestamp(value);
2214
2215 /* XSD doesn't support infinite values */
2216 if (TIMESTAMP_NOT_FINITE(timestamp))
2217 ereport(ERROR,
2218 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2219 errmsg("timestamp out of range"),
2220 errdetail("XML does not support infinite timestamp values.")));
2221 else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2222 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2223 else
2224 ereport(ERROR,
2225 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2226 errmsg("timestamp out of range")));
2227
2228 return pstrdup(buf);
2229 }
2230
2231 case TIMESTAMPTZOID:
2232 {
2233 TimestampTz timestamp;
2234 struct pg_tm tm;
2235 int tz;
2236 fsec_t fsec;
2237 const char *tzn = NULL;
2238 char buf[MAXDATELEN + 1];
2239
2240 timestamp = DatumGetTimestamp(value);
2241
2242 /* XSD doesn't support infinite values */
2243 if (TIMESTAMP_NOT_FINITE(timestamp))
2244 ereport(ERROR,
2245 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2246 errmsg("timestamp out of range"),
2247 errdetail("XML does not support infinite timestamp values.")));
2248 else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2249 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2250 else
2251 ereport(ERROR,
2252 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2253 errmsg("timestamp out of range")));
2254
2255 return pstrdup(buf);
2256 }
2257
2258 #ifdef USE_LIBXML
2259 case BYTEAOID:
2260 {
2261 bytea *bstr = DatumGetByteaPP(value);
2262 PgXmlErrorContext *xmlerrcxt;
2263 volatile xmlBufferPtr buf = NULL;
2264 volatile xmlTextWriterPtr writer = NULL;
2265 char *result;
2266
2267 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2268
2269 PG_TRY();
2270 {
2271 buf = xmlBufferCreate();
2272 if (buf == NULL || xmlerrcxt->err_occurred)
2273 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2274 "could not allocate xmlBuffer");
2275 writer = xmlNewTextWriterMemory(buf, 0);
2276 if (writer == NULL || xmlerrcxt->err_occurred)
2277 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2278 "could not allocate xmlTextWriter");
2279
2280 if (xmlbinary == XMLBINARY_BASE64)
2281 xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2282 0, VARSIZE_ANY_EXHDR(bstr));
2283 else
2284 xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2285 0, VARSIZE_ANY_EXHDR(bstr));
2286
2287 /* we MUST do this now to flush data out to the buffer */
2288 xmlFreeTextWriter(writer);
2289 writer = NULL;
2290
2291 result = pstrdup((const char *) xmlBufferContent(buf));
2292 }
2293 PG_CATCH();
2294 {
2295 if (writer)
2296 xmlFreeTextWriter(writer);
2297 if (buf)
2298 xmlBufferFree(buf);
2299
2300 pg_xml_done(xmlerrcxt, true);
2301
2302 PG_RE_THROW();
2303 }
2304 PG_END_TRY();
2305
2306 xmlBufferFree(buf);
2307
2308 pg_xml_done(xmlerrcxt, false);
2309
2310 return result;
2311 }
2312 #endif /* USE_LIBXML */
2313
2314 }
2315
2316 /*
2317 * otherwise, just use the type's native text representation
2318 */
2319 getTypeOutputInfo(type, &typeOut, &isvarlena);
2320 str = OidOutputFunctionCall(typeOut, value);
2321
2322 /* ... exactly as-is for XML, and when escaping is not wanted */
2323 if (type == XMLOID || !xml_escape_strings)
2324 return str;
2325
2326 /* otherwise, translate special characters as needed */
2327 return escape_xml(str);
2328 }
2329 }
2330
2331
2332 /*
2333 * Escape characters in text that have special meanings in XML.
2334 *
2335 * Returns a palloc'd string.
2336 *
2337 * NB: this is intentionally not dependent on libxml.
2338 */
2339 char *
escape_xml(const char * str)2340 escape_xml(const char *str)
2341 {
2342 StringInfoData buf;
2343 const char *p;
2344
2345 initStringInfo(&buf);
2346 for (p = str; *p; p++)
2347 {
2348 switch (*p)
2349 {
2350 case '&':
2351 appendStringInfoString(&buf, "&");
2352 break;
2353 case '<':
2354 appendStringInfoString(&buf, "<");
2355 break;
2356 case '>':
2357 appendStringInfoString(&buf, ">");
2358 break;
2359 case '\r':
2360 appendStringInfoString(&buf, "
");
2361 break;
2362 default:
2363 appendStringInfoCharMacro(&buf, *p);
2364 break;
2365 }
2366 }
2367 return buf.data;
2368 }
2369
2370
2371 static char *
_SPI_strdup(const char * s)2372 _SPI_strdup(const char *s)
2373 {
2374 size_t len = strlen(s) + 1;
2375 char *ret = SPI_palloc(len);
2376
2377 memcpy(ret, s, len);
2378 return ret;
2379 }
2380
2381
2382 /*
2383 * SQL to XML mapping functions
2384 *
2385 * What follows below was at one point intentionally organized so that
2386 * you can read along in the SQL/XML standard. The functions are
2387 * mostly split up the way the clauses lay out in the standards
2388 * document, and the identifiers are also aligned with the standard
2389 * text. Unfortunately, SQL/XML:2006 reordered the clauses
2390 * differently than SQL/XML:2003, so the order below doesn't make much
2391 * sense anymore.
2392 *
2393 * There are many things going on there:
2394 *
2395 * There are two kinds of mappings: Mapping SQL data (table contents)
2396 * to XML documents, and mapping SQL structure (the "schema") to XML
2397 * Schema. And there are functions that do both at the same time.
2398 *
2399 * Then you can map a database, a schema, or a table, each in both
2400 * ways. This breaks down recursively: Mapping a database invokes
2401 * mapping schemas, which invokes mapping tables, which invokes
2402 * mapping rows, which invokes mapping columns, although you can't
2403 * call the last two from the outside. Because of this, there are a
2404 * number of xyz_internal() functions which are to be called both from
2405 * the function manager wrapper and from some upper layer in a
2406 * recursive call.
2407 *
2408 * See the documentation about what the common function arguments
2409 * nulls, tableforest, and targetns mean.
2410 *
2411 * Some style guidelines for XML output: Use double quotes for quoting
2412 * XML attributes. Indent XML elements by two spaces, but remember
2413 * that a lot of code is called recursively at different levels, so
2414 * it's better not to indent rather than create output that indents
2415 * and outdents weirdly. Add newlines to make the output look nice.
2416 */
2417
2418
2419 /*
2420 * Visibility of objects for XML mappings; see SQL/XML:2008 section
2421 * 4.10.8.
2422 */
2423
2424 /*
2425 * Given a query, which must return type oid as first column, produce
2426 * a list of Oids with the query results.
2427 */
2428 static List *
query_to_oid_list(const char * query)2429 query_to_oid_list(const char *query)
2430 {
2431 uint64 i;
2432 List *list = NIL;
2433
2434 SPI_execute(query, true, 0);
2435
2436 for (i = 0; i < SPI_processed; i++)
2437 {
2438 Datum oid;
2439 bool isnull;
2440
2441 oid = SPI_getbinval(SPI_tuptable->vals[i],
2442 SPI_tuptable->tupdesc,
2443 1,
2444 &isnull);
2445 if (!isnull)
2446 list = lappend_oid(list, DatumGetObjectId(oid));
2447 }
2448
2449 return list;
2450 }
2451
2452
2453 static List *
schema_get_xml_visible_tables(Oid nspid)2454 schema_get_xml_visible_tables(Oid nspid)
2455 {
2456 StringInfoData query;
2457
2458 initStringInfo(&query);
2459 appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2460 " WHERE relnamespace = %u AND relkind IN ("
2461 CppAsString2(RELKIND_RELATION) ","
2462 CppAsString2(RELKIND_MATVIEW) ","
2463 CppAsString2(RELKIND_VIEW) ")"
2464 " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2465 " ORDER BY relname;", nspid);
2466
2467 return query_to_oid_list(query.data);
2468 }
2469
2470
2471 /*
2472 * Including the system schemas is probably not useful for a database
2473 * mapping.
2474 */
2475 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2476
2477 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2478
2479
2480 static List *
database_get_xml_visible_schemas(void)2481 database_get_xml_visible_schemas(void)
2482 {
2483 return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2484 }
2485
2486
2487 static List *
database_get_xml_visible_tables(void)2488 database_get_xml_visible_tables(void)
2489 {
2490 /* At the moment there is no order required here. */
2491 return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2492 " WHERE relkind IN ("
2493 CppAsString2(RELKIND_RELATION) ","
2494 CppAsString2(RELKIND_MATVIEW) ","
2495 CppAsString2(RELKIND_VIEW) ")"
2496 " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2497 " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2498 }
2499
2500
2501 /*
2502 * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2503 * section 9.11.
2504 */
2505
2506 static StringInfo
table_to_xml_internal(Oid relid,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2507 table_to_xml_internal(Oid relid,
2508 const char *xmlschema, bool nulls, bool tableforest,
2509 const char *targetns, bool top_level)
2510 {
2511 StringInfoData query;
2512
2513 initStringInfo(&query);
2514 appendStringInfo(&query, "SELECT * FROM %s",
2515 DatumGetCString(DirectFunctionCall1(regclassout,
2516 ObjectIdGetDatum(relid))));
2517 return query_to_xml_internal(query.data, get_rel_name(relid),
2518 xmlschema, nulls, tableforest,
2519 targetns, top_level);
2520 }
2521
2522
2523 Datum
table_to_xml(PG_FUNCTION_ARGS)2524 table_to_xml(PG_FUNCTION_ARGS)
2525 {
2526 Oid relid = PG_GETARG_OID(0);
2527 bool nulls = PG_GETARG_BOOL(1);
2528 bool tableforest = PG_GETARG_BOOL(2);
2529 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2530
2531 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2532 nulls, tableforest,
2533 targetns, true)));
2534 }
2535
2536
2537 Datum
query_to_xml(PG_FUNCTION_ARGS)2538 query_to_xml(PG_FUNCTION_ARGS)
2539 {
2540 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2541 bool nulls = PG_GETARG_BOOL(1);
2542 bool tableforest = PG_GETARG_BOOL(2);
2543 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2544
2545 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2546 NULL, nulls, tableforest,
2547 targetns, true)));
2548 }
2549
2550
2551 Datum
cursor_to_xml(PG_FUNCTION_ARGS)2552 cursor_to_xml(PG_FUNCTION_ARGS)
2553 {
2554 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2555 int32 count = PG_GETARG_INT32(1);
2556 bool nulls = PG_GETARG_BOOL(2);
2557 bool tableforest = PG_GETARG_BOOL(3);
2558 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2559
2560 StringInfoData result;
2561 Portal portal;
2562 uint64 i;
2563
2564 initStringInfo(&result);
2565
2566 if (!tableforest)
2567 {
2568 xmldata_root_element_start(&result, "table", NULL, targetns, true);
2569 appendStringInfoChar(&result, '\n');
2570 }
2571
2572 SPI_connect();
2573 portal = SPI_cursor_find(name);
2574 if (portal == NULL)
2575 ereport(ERROR,
2576 (errcode(ERRCODE_UNDEFINED_CURSOR),
2577 errmsg("cursor \"%s\" does not exist", name)));
2578
2579 SPI_cursor_fetch(portal, true, count);
2580 for (i = 0; i < SPI_processed; i++)
2581 SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2582 tableforest, targetns, true);
2583
2584 SPI_finish();
2585
2586 if (!tableforest)
2587 xmldata_root_element_end(&result, "table");
2588
2589 PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2590 }
2591
2592
2593 /*
2594 * Write the start tag of the root element of a data mapping.
2595 *
2596 * top_level means that this is the very top level of the eventual
2597 * output. For example, when the user calls table_to_xml, then a call
2598 * with a table name to this function is the top level. When the user
2599 * calls database_to_xml, then a call with a schema name to this
2600 * function is not the top level. If top_level is false, then the XML
2601 * namespace declarations are omitted, because they supposedly already
2602 * appeared earlier in the output. Repeating them is not wrong, but
2603 * it looks ugly.
2604 */
2605 static void
xmldata_root_element_start(StringInfo result,const char * eltname,const char * xmlschema,const char * targetns,bool top_level)2606 xmldata_root_element_start(StringInfo result, const char *eltname,
2607 const char *xmlschema, const char *targetns,
2608 bool top_level)
2609 {
2610 /* This isn't really wrong but currently makes no sense. */
2611 Assert(top_level || !xmlschema);
2612
2613 appendStringInfo(result, "<%s", eltname);
2614 if (top_level)
2615 {
2616 appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2617 if (strlen(targetns) > 0)
2618 appendStringInfo(result, " xmlns=\"%s\"", targetns);
2619 }
2620 if (xmlschema)
2621 {
2622 /* FIXME: better targets */
2623 if (strlen(targetns) > 0)
2624 appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2625 else
2626 appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2627 }
2628 appendStringInfoString(result, ">\n");
2629 }
2630
2631
2632 static void
xmldata_root_element_end(StringInfo result,const char * eltname)2633 xmldata_root_element_end(StringInfo result, const char *eltname)
2634 {
2635 appendStringInfo(result, "</%s>\n", eltname);
2636 }
2637
2638
2639 static StringInfo
query_to_xml_internal(const char * query,char * tablename,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2640 query_to_xml_internal(const char *query, char *tablename,
2641 const char *xmlschema, bool nulls, bool tableforest,
2642 const char *targetns, bool top_level)
2643 {
2644 StringInfo result;
2645 char *xmltn;
2646 uint64 i;
2647
2648 if (tablename)
2649 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2650 else
2651 xmltn = "table";
2652
2653 result = makeStringInfo();
2654
2655 SPI_connect();
2656 if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2657 ereport(ERROR,
2658 (errcode(ERRCODE_DATA_EXCEPTION),
2659 errmsg("invalid query")));
2660
2661 if (!tableforest)
2662 {
2663 xmldata_root_element_start(result, xmltn, xmlschema,
2664 targetns, top_level);
2665 appendStringInfoChar(result, '\n');
2666 }
2667
2668 if (xmlschema)
2669 appendStringInfo(result, "%s\n\n", xmlschema);
2670
2671 for (i = 0; i < SPI_processed; i++)
2672 SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2673 tableforest, targetns, top_level);
2674
2675 if (!tableforest)
2676 xmldata_root_element_end(result, xmltn);
2677
2678 SPI_finish();
2679
2680 return result;
2681 }
2682
2683
2684 Datum
table_to_xmlschema(PG_FUNCTION_ARGS)2685 table_to_xmlschema(PG_FUNCTION_ARGS)
2686 {
2687 Oid relid = PG_GETARG_OID(0);
2688 bool nulls = PG_GETARG_BOOL(1);
2689 bool tableforest = PG_GETARG_BOOL(2);
2690 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2691 const char *result;
2692 Relation rel;
2693
2694 rel = heap_open(relid, AccessShareLock);
2695 result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2696 tableforest, targetns);
2697 heap_close(rel, NoLock);
2698
2699 PG_RETURN_XML_P(cstring_to_xmltype(result));
2700 }
2701
2702
2703 Datum
query_to_xmlschema(PG_FUNCTION_ARGS)2704 query_to_xmlschema(PG_FUNCTION_ARGS)
2705 {
2706 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2707 bool nulls = PG_GETARG_BOOL(1);
2708 bool tableforest = PG_GETARG_BOOL(2);
2709 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2710 const char *result;
2711 SPIPlanPtr plan;
2712 Portal portal;
2713
2714 SPI_connect();
2715
2716 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2717 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2718
2719 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2720 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2721
2722 result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2723 InvalidOid, nulls,
2724 tableforest, targetns));
2725 SPI_cursor_close(portal);
2726 SPI_finish();
2727
2728 PG_RETURN_XML_P(cstring_to_xmltype(result));
2729 }
2730
2731
2732 Datum
cursor_to_xmlschema(PG_FUNCTION_ARGS)2733 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2734 {
2735 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2736 bool nulls = PG_GETARG_BOOL(1);
2737 bool tableforest = PG_GETARG_BOOL(2);
2738 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2739 const char *xmlschema;
2740 Portal portal;
2741
2742 SPI_connect();
2743 portal = SPI_cursor_find(name);
2744 if (portal == NULL)
2745 ereport(ERROR,
2746 (errcode(ERRCODE_UNDEFINED_CURSOR),
2747 errmsg("cursor \"%s\" does not exist", name)));
2748
2749 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2750 InvalidOid, nulls,
2751 tableforest, targetns));
2752 SPI_finish();
2753
2754 PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2755 }
2756
2757
2758 Datum
table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2759 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2760 {
2761 Oid relid = PG_GETARG_OID(0);
2762 bool nulls = PG_GETARG_BOOL(1);
2763 bool tableforest = PG_GETARG_BOOL(2);
2764 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2765 Relation rel;
2766 const char *xmlschema;
2767
2768 rel = heap_open(relid, AccessShareLock);
2769 xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2770 tableforest, targetns);
2771 heap_close(rel, NoLock);
2772
2773 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2774 xmlschema, nulls, tableforest,
2775 targetns, true)));
2776 }
2777
2778
2779 Datum
query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2780 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2781 {
2782 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2783 bool nulls = PG_GETARG_BOOL(1);
2784 bool tableforest = PG_GETARG_BOOL(2);
2785 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2786
2787 const char *xmlschema;
2788 SPIPlanPtr plan;
2789 Portal portal;
2790
2791 SPI_connect();
2792
2793 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2794 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2795
2796 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2797 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2798
2799 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2800 InvalidOid, nulls, tableforest, targetns));
2801 SPI_cursor_close(portal);
2802 SPI_finish();
2803
2804 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2805 xmlschema, nulls, tableforest,
2806 targetns, true)));
2807 }
2808
2809
2810 /*
2811 * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2812 * sections 9.13, 9.14.
2813 */
2814
2815 static StringInfo
schema_to_xml_internal(Oid nspid,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2816 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2817 bool tableforest, const char *targetns, bool top_level)
2818 {
2819 StringInfo result;
2820 char *xmlsn;
2821 List *relid_list;
2822 ListCell *cell;
2823
2824 xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2825 true, false);
2826 result = makeStringInfo();
2827
2828 xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2829 appendStringInfoChar(result, '\n');
2830
2831 if (xmlschema)
2832 appendStringInfo(result, "%s\n\n", xmlschema);
2833
2834 SPI_connect();
2835
2836 relid_list = schema_get_xml_visible_tables(nspid);
2837
2838 foreach(cell, relid_list)
2839 {
2840 Oid relid = lfirst_oid(cell);
2841 StringInfo subres;
2842
2843 subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2844 targetns, false);
2845
2846 appendStringInfoString(result, subres->data);
2847 appendStringInfoChar(result, '\n');
2848 }
2849
2850 SPI_finish();
2851
2852 xmldata_root_element_end(result, xmlsn);
2853
2854 return result;
2855 }
2856
2857
2858 Datum
schema_to_xml(PG_FUNCTION_ARGS)2859 schema_to_xml(PG_FUNCTION_ARGS)
2860 {
2861 Name name = PG_GETARG_NAME(0);
2862 bool nulls = PG_GETARG_BOOL(1);
2863 bool tableforest = PG_GETARG_BOOL(2);
2864 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2865
2866 char *schemaname;
2867 Oid nspid;
2868
2869 schemaname = NameStr(*name);
2870 nspid = LookupExplicitNamespace(schemaname, false);
2871
2872 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2873 nulls, tableforest, targetns, true)));
2874 }
2875
2876
2877 /*
2878 * Write the start element of the root element of an XML Schema mapping.
2879 */
2880 static void
xsd_schema_element_start(StringInfo result,const char * targetns)2881 xsd_schema_element_start(StringInfo result, const char *targetns)
2882 {
2883 appendStringInfoString(result,
2884 "<xsd:schema\n"
2885 " xmlns:xsd=\"" NAMESPACE_XSD "\"");
2886 if (strlen(targetns) > 0)
2887 appendStringInfo(result,
2888 "\n"
2889 " targetNamespace=\"%s\"\n"
2890 " elementFormDefault=\"qualified\"",
2891 targetns);
2892 appendStringInfoString(result,
2893 ">\n\n");
2894 }
2895
2896
2897 static void
xsd_schema_element_end(StringInfo result)2898 xsd_schema_element_end(StringInfo result)
2899 {
2900 appendStringInfoString(result, "</xsd:schema>");
2901 }
2902
2903
2904 static StringInfo
schema_to_xmlschema_internal(const char * schemaname,bool nulls,bool tableforest,const char * targetns)2905 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2906 bool tableforest, const char *targetns)
2907 {
2908 Oid nspid;
2909 List *relid_list;
2910 List *tupdesc_list;
2911 ListCell *cell;
2912 StringInfo result;
2913
2914 result = makeStringInfo();
2915
2916 nspid = LookupExplicitNamespace(schemaname, false);
2917
2918 xsd_schema_element_start(result, targetns);
2919
2920 SPI_connect();
2921
2922 relid_list = schema_get_xml_visible_tables(nspid);
2923
2924 tupdesc_list = NIL;
2925 foreach(cell, relid_list)
2926 {
2927 Relation rel;
2928
2929 rel = heap_open(lfirst_oid(cell), AccessShareLock);
2930 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2931 heap_close(rel, NoLock);
2932 }
2933
2934 appendStringInfoString(result,
2935 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2936
2937 appendStringInfoString(result,
2938 map_sql_schema_to_xmlschema_types(nspid, relid_list,
2939 nulls, tableforest, targetns));
2940
2941 xsd_schema_element_end(result);
2942
2943 SPI_finish();
2944
2945 return result;
2946 }
2947
2948
2949 Datum
schema_to_xmlschema(PG_FUNCTION_ARGS)2950 schema_to_xmlschema(PG_FUNCTION_ARGS)
2951 {
2952 Name name = PG_GETARG_NAME(0);
2953 bool nulls = PG_GETARG_BOOL(1);
2954 bool tableforest = PG_GETARG_BOOL(2);
2955 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2956
2957 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2958 nulls, tableforest, targetns)));
2959 }
2960
2961
2962 Datum
schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2963 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2964 {
2965 Name name = PG_GETARG_NAME(0);
2966 bool nulls = PG_GETARG_BOOL(1);
2967 bool tableforest = PG_GETARG_BOOL(2);
2968 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2969 char *schemaname;
2970 Oid nspid;
2971 StringInfo xmlschema;
2972
2973 schemaname = NameStr(*name);
2974 nspid = LookupExplicitNamespace(schemaname, false);
2975
2976 xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2977 tableforest, targetns);
2978
2979 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2980 xmlschema->data, nulls,
2981 tableforest, targetns, true)));
2982 }
2983
2984
2985 /*
2986 * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
2987 * sections 9.16, 9.17.
2988 */
2989
2990 static StringInfo
database_to_xml_internal(const char * xmlschema,bool nulls,bool tableforest,const char * targetns)2991 database_to_xml_internal(const char *xmlschema, bool nulls,
2992 bool tableforest, const char *targetns)
2993 {
2994 StringInfo result;
2995 List *nspid_list;
2996 ListCell *cell;
2997 char *xmlcn;
2998
2999 xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3000 true, false);
3001 result = makeStringInfo();
3002
3003 xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3004 appendStringInfoChar(result, '\n');
3005
3006 if (xmlschema)
3007 appendStringInfo(result, "%s\n\n", xmlschema);
3008
3009 SPI_connect();
3010
3011 nspid_list = database_get_xml_visible_schemas();
3012
3013 foreach(cell, nspid_list)
3014 {
3015 Oid nspid = lfirst_oid(cell);
3016 StringInfo subres;
3017
3018 subres = schema_to_xml_internal(nspid, NULL, nulls,
3019 tableforest, targetns, false);
3020
3021 appendStringInfoString(result, subres->data);
3022 appendStringInfoChar(result, '\n');
3023 }
3024
3025 SPI_finish();
3026
3027 xmldata_root_element_end(result, xmlcn);
3028
3029 return result;
3030 }
3031
3032
3033 Datum
database_to_xml(PG_FUNCTION_ARGS)3034 database_to_xml(PG_FUNCTION_ARGS)
3035 {
3036 bool nulls = PG_GETARG_BOOL(0);
3037 bool tableforest = PG_GETARG_BOOL(1);
3038 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3039
3040 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3041 tableforest, targetns)));
3042 }
3043
3044
3045 static StringInfo
database_to_xmlschema_internal(bool nulls,bool tableforest,const char * targetns)3046 database_to_xmlschema_internal(bool nulls, bool tableforest,
3047 const char *targetns)
3048 {
3049 List *relid_list;
3050 List *nspid_list;
3051 List *tupdesc_list;
3052 ListCell *cell;
3053 StringInfo result;
3054
3055 result = makeStringInfo();
3056
3057 xsd_schema_element_start(result, targetns);
3058
3059 SPI_connect();
3060
3061 relid_list = database_get_xml_visible_tables();
3062 nspid_list = database_get_xml_visible_schemas();
3063
3064 tupdesc_list = NIL;
3065 foreach(cell, relid_list)
3066 {
3067 Relation rel;
3068
3069 rel = heap_open(lfirst_oid(cell), AccessShareLock);
3070 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3071 heap_close(rel, NoLock);
3072 }
3073
3074 appendStringInfoString(result,
3075 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3076
3077 appendStringInfoString(result,
3078 map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3079
3080 xsd_schema_element_end(result);
3081
3082 SPI_finish();
3083
3084 return result;
3085 }
3086
3087
3088 Datum
database_to_xmlschema(PG_FUNCTION_ARGS)3089 database_to_xmlschema(PG_FUNCTION_ARGS)
3090 {
3091 bool nulls = PG_GETARG_BOOL(0);
3092 bool tableforest = PG_GETARG_BOOL(1);
3093 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3094
3095 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3096 tableforest, targetns)));
3097 }
3098
3099
3100 Datum
database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)3101 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3102 {
3103 bool nulls = PG_GETARG_BOOL(0);
3104 bool tableforest = PG_GETARG_BOOL(1);
3105 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3106 StringInfo xmlschema;
3107
3108 xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3109
3110 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3111 nulls, tableforest, targetns)));
3112 }
3113
3114
3115 /*
3116 * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3117 * 9.2.
3118 */
3119 static char *
map_multipart_sql_identifier_to_xml_name(const char * a,const char * b,const char * c,const char * d)3120 map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3121 {
3122 StringInfoData result;
3123
3124 initStringInfo(&result);
3125
3126 if (a)
3127 appendStringInfoString(&result,
3128 map_sql_identifier_to_xml_name(a, true, true));
3129 if (b)
3130 appendStringInfo(&result, ".%s",
3131 map_sql_identifier_to_xml_name(b, true, true));
3132 if (c)
3133 appendStringInfo(&result, ".%s",
3134 map_sql_identifier_to_xml_name(c, true, true));
3135 if (d)
3136 appendStringInfo(&result, ".%s",
3137 map_sql_identifier_to_xml_name(d, true, true));
3138
3139 return result.data;
3140 }
3141
3142
3143 /*
3144 * Map an SQL table to an XML Schema document; see SQL/XML:2008
3145 * section 9.11.
3146 *
3147 * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3148 * 9.9.
3149 */
3150 static const char *
map_sql_table_to_xmlschema(TupleDesc tupdesc,Oid relid,bool nulls,bool tableforest,const char * targetns)3151 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3152 bool tableforest, const char *targetns)
3153 {
3154 int i;
3155 char *xmltn;
3156 char *tabletypename;
3157 char *rowtypename;
3158 StringInfoData result;
3159
3160 initStringInfo(&result);
3161
3162 if (OidIsValid(relid))
3163 {
3164 HeapTuple tuple;
3165 Form_pg_class reltuple;
3166
3167 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3168 if (!HeapTupleIsValid(tuple))
3169 elog(ERROR, "cache lookup failed for relation %u", relid);
3170 reltuple = (Form_pg_class) GETSTRUCT(tuple);
3171
3172 xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3173 true, false);
3174
3175 tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3176 get_database_name(MyDatabaseId),
3177 get_namespace_name(reltuple->relnamespace),
3178 NameStr(reltuple->relname));
3179
3180 rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3181 get_database_name(MyDatabaseId),
3182 get_namespace_name(reltuple->relnamespace),
3183 NameStr(reltuple->relname));
3184
3185 ReleaseSysCache(tuple);
3186 }
3187 else
3188 {
3189 if (tableforest)
3190 xmltn = "row";
3191 else
3192 xmltn = "table";
3193
3194 tabletypename = "TableType";
3195 rowtypename = "RowType";
3196 }
3197
3198 xsd_schema_element_start(&result, targetns);
3199
3200 appendStringInfoString(&result,
3201 map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3202
3203 appendStringInfo(&result,
3204 "<xsd:complexType name=\"%s\">\n"
3205 " <xsd:sequence>\n",
3206 rowtypename);
3207
3208 for (i = 0; i < tupdesc->natts; i++)
3209 {
3210 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3211
3212 if (att->attisdropped)
3213 continue;
3214 appendStringInfo(&result,
3215 " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3216 map_sql_identifier_to_xml_name(NameStr(att->attname),
3217 true, false),
3218 map_sql_type_to_xml_name(att->atttypid, -1),
3219 nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3220 }
3221
3222 appendStringInfoString(&result,
3223 " </xsd:sequence>\n"
3224 "</xsd:complexType>\n\n");
3225
3226 if (!tableforest)
3227 {
3228 appendStringInfo(&result,
3229 "<xsd:complexType name=\"%s\">\n"
3230 " <xsd:sequence>\n"
3231 " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3232 " </xsd:sequence>\n"
3233 "</xsd:complexType>\n\n",
3234 tabletypename, rowtypename);
3235
3236 appendStringInfo(&result,
3237 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3238 xmltn, tabletypename);
3239 }
3240 else
3241 appendStringInfo(&result,
3242 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3243 xmltn, rowtypename);
3244
3245 xsd_schema_element_end(&result);
3246
3247 return result.data;
3248 }
3249
3250
3251 /*
3252 * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3253 * section 9.12.
3254 */
3255 static const char *
map_sql_schema_to_xmlschema_types(Oid nspid,List * relid_list,bool nulls,bool tableforest,const char * targetns)3256 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3257 bool tableforest, const char *targetns)
3258 {
3259 char *dbname;
3260 char *nspname;
3261 char *xmlsn;
3262 char *schematypename;
3263 StringInfoData result;
3264 ListCell *cell;
3265
3266 dbname = get_database_name(MyDatabaseId);
3267 nspname = get_namespace_name(nspid);
3268
3269 initStringInfo(&result);
3270
3271 xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3272
3273 schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3274 dbname,
3275 nspname,
3276 NULL);
3277
3278 appendStringInfo(&result,
3279 "<xsd:complexType name=\"%s\">\n", schematypename);
3280 if (!tableforest)
3281 appendStringInfoString(&result,
3282 " <xsd:all>\n");
3283 else
3284 appendStringInfoString(&result,
3285 " <xsd:sequence>\n");
3286
3287 foreach(cell, relid_list)
3288 {
3289 Oid relid = lfirst_oid(cell);
3290 char *relname = get_rel_name(relid);
3291 char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3292 char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3293 dbname,
3294 nspname,
3295 relname);
3296
3297 if (!tableforest)
3298 appendStringInfo(&result,
3299 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3300 xmltn, tabletypename);
3301 else
3302 appendStringInfo(&result,
3303 " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3304 xmltn, tabletypename);
3305 }
3306
3307 if (!tableforest)
3308 appendStringInfoString(&result,
3309 " </xsd:all>\n");
3310 else
3311 appendStringInfoString(&result,
3312 " </xsd:sequence>\n");
3313 appendStringInfoString(&result,
3314 "</xsd:complexType>\n\n");
3315
3316 appendStringInfo(&result,
3317 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3318 xmlsn, schematypename);
3319
3320 return result.data;
3321 }
3322
3323
3324 /*
3325 * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3326 * section 9.15.
3327 */
3328 static const char *
map_sql_catalog_to_xmlschema_types(List * nspid_list,bool nulls,bool tableforest,const char * targetns)3329 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3330 bool tableforest, const char *targetns)
3331 {
3332 char *dbname;
3333 char *xmlcn;
3334 char *catalogtypename;
3335 StringInfoData result;
3336 ListCell *cell;
3337
3338 dbname = get_database_name(MyDatabaseId);
3339
3340 initStringInfo(&result);
3341
3342 xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3343
3344 catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3345 dbname,
3346 NULL,
3347 NULL);
3348
3349 appendStringInfo(&result,
3350 "<xsd:complexType name=\"%s\">\n", catalogtypename);
3351 appendStringInfoString(&result,
3352 " <xsd:all>\n");
3353
3354 foreach(cell, nspid_list)
3355 {
3356 Oid nspid = lfirst_oid(cell);
3357 char *nspname = get_namespace_name(nspid);
3358 char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3359 char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3360 dbname,
3361 nspname,
3362 NULL);
3363
3364 appendStringInfo(&result,
3365 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3366 xmlsn, schematypename);
3367 }
3368
3369 appendStringInfoString(&result,
3370 " </xsd:all>\n");
3371 appendStringInfoString(&result,
3372 "</xsd:complexType>\n\n");
3373
3374 appendStringInfo(&result,
3375 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3376 xmlcn, catalogtypename);
3377
3378 return result.data;
3379 }
3380
3381
3382 /*
3383 * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3384 */
3385 static const char *
map_sql_type_to_xml_name(Oid typeoid,int typmod)3386 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3387 {
3388 StringInfoData result;
3389
3390 initStringInfo(&result);
3391
3392 switch (typeoid)
3393 {
3394 case BPCHAROID:
3395 if (typmod == -1)
3396 appendStringInfoString(&result, "CHAR");
3397 else
3398 appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3399 break;
3400 case VARCHAROID:
3401 if (typmod == -1)
3402 appendStringInfoString(&result, "VARCHAR");
3403 else
3404 appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3405 break;
3406 case NUMERICOID:
3407 if (typmod == -1)
3408 appendStringInfoString(&result, "NUMERIC");
3409 else
3410 appendStringInfo(&result, "NUMERIC_%d_%d",
3411 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3412 (typmod - VARHDRSZ) & 0xffff);
3413 break;
3414 case INT4OID:
3415 appendStringInfoString(&result, "INTEGER");
3416 break;
3417 case INT2OID:
3418 appendStringInfoString(&result, "SMALLINT");
3419 break;
3420 case INT8OID:
3421 appendStringInfoString(&result, "BIGINT");
3422 break;
3423 case FLOAT4OID:
3424 appendStringInfoString(&result, "REAL");
3425 break;
3426 case FLOAT8OID:
3427 appendStringInfoString(&result, "DOUBLE");
3428 break;
3429 case BOOLOID:
3430 appendStringInfoString(&result, "BOOLEAN");
3431 break;
3432 case TIMEOID:
3433 if (typmod == -1)
3434 appendStringInfoString(&result, "TIME");
3435 else
3436 appendStringInfo(&result, "TIME_%d", typmod);
3437 break;
3438 case TIMETZOID:
3439 if (typmod == -1)
3440 appendStringInfoString(&result, "TIME_WTZ");
3441 else
3442 appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3443 break;
3444 case TIMESTAMPOID:
3445 if (typmod == -1)
3446 appendStringInfoString(&result, "TIMESTAMP");
3447 else
3448 appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3449 break;
3450 case TIMESTAMPTZOID:
3451 if (typmod == -1)
3452 appendStringInfoString(&result, "TIMESTAMP_WTZ");
3453 else
3454 appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3455 break;
3456 case DATEOID:
3457 appendStringInfoString(&result, "DATE");
3458 break;
3459 case XMLOID:
3460 appendStringInfoString(&result, "XML");
3461 break;
3462 default:
3463 {
3464 HeapTuple tuple;
3465 Form_pg_type typtuple;
3466
3467 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3468 if (!HeapTupleIsValid(tuple))
3469 elog(ERROR, "cache lookup failed for type %u", typeoid);
3470 typtuple = (Form_pg_type) GETSTRUCT(tuple);
3471
3472 appendStringInfoString(&result,
3473 map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3474 get_database_name(MyDatabaseId),
3475 get_namespace_name(typtuple->typnamespace),
3476 NameStr(typtuple->typname)));
3477
3478 ReleaseSysCache(tuple);
3479 }
3480 }
3481
3482 return result.data;
3483 }
3484
3485
3486 /*
3487 * Map a collection of SQL data types to XML Schema data types; see
3488 * SQL/XML:2008 section 9.7.
3489 */
3490 static const char *
map_sql_typecoll_to_xmlschema_types(List * tupdesc_list)3491 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3492 {
3493 List *uniquetypes = NIL;
3494 int i;
3495 StringInfoData result;
3496 ListCell *cell0;
3497
3498 /* extract all column types used in the set of TupleDescs */
3499 foreach(cell0, tupdesc_list)
3500 {
3501 TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3502
3503 for (i = 0; i < tupdesc->natts; i++)
3504 {
3505 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3506
3507 if (att->attisdropped)
3508 continue;
3509 uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3510 }
3511 }
3512
3513 /* add base types of domains */
3514 foreach(cell0, uniquetypes)
3515 {
3516 Oid typid = lfirst_oid(cell0);
3517 Oid basetypid = getBaseType(typid);
3518
3519 if (basetypid != typid)
3520 uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3521 }
3522
3523 /* Convert to textual form */
3524 initStringInfo(&result);
3525
3526 foreach(cell0, uniquetypes)
3527 {
3528 appendStringInfo(&result, "%s\n",
3529 map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3530 -1));
3531 }
3532
3533 return result.data;
3534 }
3535
3536
3537 /*
3538 * Map an SQL data type to a named XML Schema data type; see
3539 * SQL/XML:2008 sections 9.5 and 9.6.
3540 *
3541 * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3542 * a name attribute, which this function does. The name-less version
3543 * 9.5 doesn't appear to be required anywhere.)
3544 */
3545 static const char *
map_sql_type_to_xmlschema_type(Oid typeoid,int typmod)3546 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3547 {
3548 StringInfoData result;
3549 const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3550
3551 initStringInfo(&result);
3552
3553 if (typeoid == XMLOID)
3554 {
3555 appendStringInfoString(&result,
3556 "<xsd:complexType mixed=\"true\">\n"
3557 " <xsd:sequence>\n"
3558 " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3559 " </xsd:sequence>\n"
3560 "</xsd:complexType>\n");
3561 }
3562 else
3563 {
3564 appendStringInfo(&result,
3565 "<xsd:simpleType name=\"%s\">\n", typename);
3566
3567 switch (typeoid)
3568 {
3569 case BPCHAROID:
3570 case VARCHAROID:
3571 case TEXTOID:
3572 appendStringInfoString(&result,
3573 " <xsd:restriction base=\"xsd:string\">\n");
3574 if (typmod != -1)
3575 appendStringInfo(&result,
3576 " <xsd:maxLength value=\"%d\"/>\n",
3577 typmod - VARHDRSZ);
3578 appendStringInfoString(&result, " </xsd:restriction>\n");
3579 break;
3580
3581 case BYTEAOID:
3582 appendStringInfo(&result,
3583 " <xsd:restriction base=\"xsd:%s\">\n"
3584 " </xsd:restriction>\n",
3585 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3586 break;
3587
3588 case NUMERICOID:
3589 if (typmod != -1)
3590 appendStringInfo(&result,
3591 " <xsd:restriction base=\"xsd:decimal\">\n"
3592 " <xsd:totalDigits value=\"%d\"/>\n"
3593 " <xsd:fractionDigits value=\"%d\"/>\n"
3594 " </xsd:restriction>\n",
3595 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3596 (typmod - VARHDRSZ) & 0xffff);
3597 break;
3598
3599 case INT2OID:
3600 appendStringInfo(&result,
3601 " <xsd:restriction base=\"xsd:short\">\n"
3602 " <xsd:maxInclusive value=\"%d\"/>\n"
3603 " <xsd:minInclusive value=\"%d\"/>\n"
3604 " </xsd:restriction>\n",
3605 SHRT_MAX, SHRT_MIN);
3606 break;
3607
3608 case INT4OID:
3609 appendStringInfo(&result,
3610 " <xsd:restriction base=\"xsd:int\">\n"
3611 " <xsd:maxInclusive value=\"%d\"/>\n"
3612 " <xsd:minInclusive value=\"%d\"/>\n"
3613 " </xsd:restriction>\n",
3614 INT_MAX, INT_MIN);
3615 break;
3616
3617 case INT8OID:
3618 appendStringInfo(&result,
3619 " <xsd:restriction base=\"xsd:long\">\n"
3620 " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3621 " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3622 " </xsd:restriction>\n",
3623 (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3624 (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3625 break;
3626
3627 case FLOAT4OID:
3628 appendStringInfoString(&result,
3629 " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3630 break;
3631
3632 case FLOAT8OID:
3633 appendStringInfoString(&result,
3634 " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3635 break;
3636
3637 case BOOLOID:
3638 appendStringInfoString(&result,
3639 " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3640 break;
3641
3642 case TIMEOID:
3643 case TIMETZOID:
3644 {
3645 const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3646
3647 if (typmod == -1)
3648 appendStringInfo(&result,
3649 " <xsd:restriction base=\"xsd:time\">\n"
3650 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3651 " </xsd:restriction>\n", tz);
3652 else if (typmod == 0)
3653 appendStringInfo(&result,
3654 " <xsd:restriction base=\"xsd:time\">\n"
3655 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3656 " </xsd:restriction>\n", tz);
3657 else
3658 appendStringInfo(&result,
3659 " <xsd:restriction base=\"xsd:time\">\n"
3660 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3661 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3662 break;
3663 }
3664
3665 case TIMESTAMPOID:
3666 case TIMESTAMPTZOID:
3667 {
3668 const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3669
3670 if (typmod == -1)
3671 appendStringInfo(&result,
3672 " <xsd:restriction base=\"xsd:dateTime\">\n"
3673 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3674 " </xsd:restriction>\n", tz);
3675 else if (typmod == 0)
3676 appendStringInfo(&result,
3677 " <xsd:restriction base=\"xsd:dateTime\">\n"
3678 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3679 " </xsd:restriction>\n", tz);
3680 else
3681 appendStringInfo(&result,
3682 " <xsd:restriction base=\"xsd:dateTime\">\n"
3683 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3684 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3685 break;
3686 }
3687
3688 case DATEOID:
3689 appendStringInfoString(&result,
3690 " <xsd:restriction base=\"xsd:date\">\n"
3691 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3692 " </xsd:restriction>\n");
3693 break;
3694
3695 default:
3696 if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3697 {
3698 Oid base_typeoid;
3699 int32 base_typmod = -1;
3700
3701 base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3702
3703 appendStringInfo(&result,
3704 " <xsd:restriction base=\"%s\"/>\n",
3705 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3706 }
3707 break;
3708 }
3709 appendStringInfoString(&result, "</xsd:simpleType>\n");
3710 }
3711
3712 return result.data;
3713 }
3714
3715
3716 /*
3717 * Map an SQL row to an XML element, taking the row from the active
3718 * SPI cursor. See also SQL/XML:2008 section 9.10.
3719 */
3720 static void
SPI_sql_row_to_xmlelement(uint64 rownum,StringInfo result,char * tablename,bool nulls,bool tableforest,const char * targetns,bool top_level)3721 SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3722 bool nulls, bool tableforest,
3723 const char *targetns, bool top_level)
3724 {
3725 int i;
3726 char *xmltn;
3727
3728 if (tablename)
3729 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3730 else
3731 {
3732 if (tableforest)
3733 xmltn = "row";
3734 else
3735 xmltn = "table";
3736 }
3737
3738 if (tableforest)
3739 xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3740 else
3741 appendStringInfoString(result, "<row>\n");
3742
3743 for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3744 {
3745 char *colname;
3746 Datum colval;
3747 bool isnull;
3748
3749 colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3750 true, false);
3751 colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3752 SPI_tuptable->tupdesc,
3753 i,
3754 &isnull);
3755 if (isnull)
3756 {
3757 if (nulls)
3758 appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
3759 }
3760 else
3761 appendStringInfo(result, " <%s>%s</%s>\n",
3762 colname,
3763 map_sql_value_to_xml_value(colval,
3764 SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3765 colname);
3766 }
3767
3768 if (tableforest)
3769 {
3770 xmldata_root_element_end(result, xmltn);
3771 appendStringInfoChar(result, '\n');
3772 }
3773 else
3774 appendStringInfoString(result, "</row>\n\n");
3775 }
3776
3777
3778 /*
3779 * XPath related functions
3780 */
3781
3782 #ifdef USE_LIBXML
3783
3784 /*
3785 * Convert XML node to text (dump subtree in case of element,
3786 * return value otherwise)
3787 */
3788 static text *
xml_xmlnodetoxmltype(xmlNodePtr cur,PgXmlErrorContext * xmlerrcxt)3789 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3790 {
3791 xmltype *result;
3792
3793 if (cur->type == XML_ELEMENT_NODE)
3794 {
3795 xmlBufferPtr buf;
3796 xmlNodePtr cur_copy;
3797
3798 buf = xmlBufferCreate();
3799
3800 /*
3801 * The result of xmlNodeDump() won't contain namespace definitions
3802 * from parent nodes, but xmlCopyNode() duplicates a node along with
3803 * its required namespace definitions.
3804 */
3805 cur_copy = xmlCopyNode(cur, 1);
3806
3807 if (cur_copy == NULL)
3808 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3809 "could not copy node");
3810
3811 PG_TRY();
3812 {
3813 xmlNodeDump(buf, NULL, cur_copy, 0, 1);
3814 result = xmlBuffer_to_xmltype(buf);
3815 }
3816 PG_CATCH();
3817 {
3818 xmlFreeNode(cur_copy);
3819 xmlBufferFree(buf);
3820 PG_RE_THROW();
3821 }
3822 PG_END_TRY();
3823 xmlFreeNode(cur_copy);
3824 xmlBufferFree(buf);
3825 }
3826 else
3827 {
3828 xmlChar *str;
3829
3830 str = xmlXPathCastNodeToString(cur);
3831 PG_TRY();
3832 {
3833 /* Here we rely on XML having the same representation as TEXT */
3834 char *escaped = escape_xml((char *) str);
3835
3836 result = (xmltype *) cstring_to_text(escaped);
3837 pfree(escaped);
3838 }
3839 PG_CATCH();
3840 {
3841 xmlFree(str);
3842 PG_RE_THROW();
3843 }
3844 PG_END_TRY();
3845 xmlFree(str);
3846 }
3847
3848 return result;
3849 }
3850
3851 /*
3852 * Convert an XML XPath object (the result of evaluating an XPath expression)
3853 * to an array of xml values, which are appended to astate. The function
3854 * result value is the number of elements in the array.
3855 *
3856 * If "astate" is NULL then we don't generate the array value, but we still
3857 * return the number of elements it would have had.
3858 *
3859 * Nodesets are converted to an array containing the nodes' textual
3860 * representations. Primitive values (float, double, string) are converted
3861 * to a single-element array containing the value's string representation.
3862 */
3863 static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,ArrayBuildState * astate,PgXmlErrorContext * xmlerrcxt)3864 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3865 ArrayBuildState *astate,
3866 PgXmlErrorContext *xmlerrcxt)
3867 {
3868 int result = 0;
3869 Datum datum;
3870 Oid datumtype;
3871 char *result_str;
3872
3873 switch (xpathobj->type)
3874 {
3875 case XPATH_NODESET:
3876 if (xpathobj->nodesetval != NULL)
3877 {
3878 result = xpathobj->nodesetval->nodeNr;
3879 if (astate != NULL)
3880 {
3881 int i;
3882
3883 for (i = 0; i < result; i++)
3884 {
3885 datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3886 xmlerrcxt));
3887 (void) accumArrayResult(astate, datum, false,
3888 XMLOID, CurrentMemoryContext);
3889 }
3890 }
3891 }
3892 return result;
3893
3894 case XPATH_BOOLEAN:
3895 if (astate == NULL)
3896 return 1;
3897 datum = BoolGetDatum(xpathobj->boolval);
3898 datumtype = BOOLOID;
3899 break;
3900
3901 case XPATH_NUMBER:
3902 if (astate == NULL)
3903 return 1;
3904 datum = Float8GetDatum(xpathobj->floatval);
3905 datumtype = FLOAT8OID;
3906 break;
3907
3908 case XPATH_STRING:
3909 if (astate == NULL)
3910 return 1;
3911 datum = CStringGetDatum((char *) xpathobj->stringval);
3912 datumtype = CSTRINGOID;
3913 break;
3914
3915 default:
3916 elog(ERROR, "xpath expression result type %d is unsupported",
3917 xpathobj->type);
3918 return 0; /* keep compiler quiet */
3919 }
3920
3921 /* Common code for scalar-value cases */
3922 result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3923 datum = PointerGetDatum(cstring_to_xmltype(result_str));
3924 (void) accumArrayResult(astate, datum, false,
3925 XMLOID, CurrentMemoryContext);
3926 return 1;
3927 }
3928
3929
3930 /*
3931 * Common code for xpath() and xmlexists()
3932 *
3933 * Evaluate XPath expression and return number of nodes in res_items
3934 * and array of XML values in astate. Either of those pointers can be
3935 * NULL if the corresponding result isn't wanted.
3936 *
3937 * It is up to the user to ensure that the XML passed is in fact
3938 * an XML document - XPath doesn't work easily on fragments without
3939 * a context node being known.
3940 */
3941 static void
xpath_internal(text * xpath_expr_text,xmltype * data,ArrayType * namespaces,int * res_nitems,ArrayBuildState * astate)3942 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3943 int *res_nitems, ArrayBuildState *astate)
3944 {
3945 PgXmlErrorContext *xmlerrcxt;
3946 volatile xmlParserCtxtPtr ctxt = NULL;
3947 volatile xmlDocPtr doc = NULL;
3948 volatile xmlXPathContextPtr xpathctx = NULL;
3949 volatile xmlXPathCompExprPtr xpathcomp = NULL;
3950 volatile xmlXPathObjectPtr xpathobj = NULL;
3951 char *datastr;
3952 int32 len;
3953 int32 xpath_len;
3954 xmlChar *string;
3955 xmlChar *xpath_expr;
3956 size_t xmldecl_len = 0;
3957 int i;
3958 int ndim;
3959 Datum *ns_names_uris;
3960 bool *ns_names_uris_nulls;
3961 int ns_count;
3962
3963 /*
3964 * Namespace mappings are passed as text[]. If an empty array is passed
3965 * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
3966 * Else, a 2-dimensional array with length of the second axis being equal
3967 * to 2 should be passed, i.e., every subarray contains 2 elements, the
3968 * first element defining the name, the second one the URI. Example:
3969 * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
3970 * 'http://example2.com']].
3971 */
3972 ndim = namespaces ? ARR_NDIM(namespaces) : 0;
3973 if (ndim != 0)
3974 {
3975 int *dims;
3976
3977 dims = ARR_DIMS(namespaces);
3978
3979 if (ndim != 2 || dims[1] != 2)
3980 ereport(ERROR,
3981 (errcode(ERRCODE_DATA_EXCEPTION),
3982 errmsg("invalid array for XML namespace mapping"),
3983 errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
3984
3985 Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
3986
3987 deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
3988 &ns_names_uris, &ns_names_uris_nulls,
3989 &ns_count);
3990
3991 Assert((ns_count % 2) == 0); /* checked above */
3992 ns_count /= 2; /* count pairs only */
3993 }
3994 else
3995 {
3996 ns_names_uris = NULL;
3997 ns_names_uris_nulls = NULL;
3998 ns_count = 0;
3999 }
4000
4001 datastr = VARDATA(data);
4002 len = VARSIZE(data) - VARHDRSZ;
4003 xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4004 if (xpath_len == 0)
4005 ereport(ERROR,
4006 (errcode(ERRCODE_DATA_EXCEPTION),
4007 errmsg("empty XPath expression")));
4008
4009 string = pg_xmlCharStrndup(datastr, len);
4010 xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4011
4012 /*
4013 * In a UTF8 database, skip any xml declaration, which might assert
4014 * another encoding. Ignore parse_xml_decl() failure, letting
4015 * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4016 * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4017 * those scenarios bug-compatible with historical behavior.
4018 */
4019 if (GetDatabaseEncoding() == PG_UTF8)
4020 parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4021
4022 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4023
4024 PG_TRY();
4025 {
4026 xmlInitParser();
4027
4028 /*
4029 * redundant XML parsing (two parsings for the same value during one
4030 * command execution are possible)
4031 */
4032 ctxt = xmlNewParserCtxt();
4033 if (ctxt == NULL || xmlerrcxt->err_occurred)
4034 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4035 "could not allocate parser context");
4036 doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4037 len - xmldecl_len, NULL, NULL, 0);
4038 if (doc == NULL || xmlerrcxt->err_occurred)
4039 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4040 "could not parse XML document");
4041 xpathctx = xmlXPathNewContext(doc);
4042 if (xpathctx == NULL || xmlerrcxt->err_occurred)
4043 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4044 "could not allocate XPath context");
4045 xpathctx->node = (xmlNodePtr) doc;
4046
4047 /* register namespaces, if any */
4048 if (ns_count > 0)
4049 {
4050 for (i = 0; i < ns_count; i++)
4051 {
4052 char *ns_name;
4053 char *ns_uri;
4054
4055 if (ns_names_uris_nulls[i * 2] ||
4056 ns_names_uris_nulls[i * 2 + 1])
4057 ereport(ERROR,
4058 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4059 errmsg("neither namespace name nor URI may be null")));
4060 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4061 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4062 if (xmlXPathRegisterNs(xpathctx,
4063 (xmlChar *) ns_name,
4064 (xmlChar *) ns_uri) != 0)
4065 ereport(ERROR, /* is this an internal error??? */
4066 (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4067 ns_name, ns_uri)));
4068 }
4069 }
4070
4071 xpathcomp = xmlXPathCompile(xpath_expr);
4072 if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4073 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4074 "invalid XPath expression");
4075
4076 /*
4077 * Version 2.6.27 introduces a function named
4078 * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4079 * but we can derive the existence by whether any nodes are returned,
4080 * thereby preventing a library version upgrade and keeping the code
4081 * the same.
4082 */
4083 xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4084 if (xpathobj == NULL || xmlerrcxt->err_occurred)
4085 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4086 "could not create XPath object");
4087
4088 /*
4089 * Extract the results as requested.
4090 */
4091 if (res_nitems != NULL)
4092 *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4093 else
4094 (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4095 }
4096 PG_CATCH();
4097 {
4098 if (xpathobj)
4099 xmlXPathFreeObject(xpathobj);
4100 if (xpathcomp)
4101 xmlXPathFreeCompExpr(xpathcomp);
4102 if (xpathctx)
4103 xmlXPathFreeContext(xpathctx);
4104 if (doc)
4105 xmlFreeDoc(doc);
4106 if (ctxt)
4107 xmlFreeParserCtxt(ctxt);
4108
4109 pg_xml_done(xmlerrcxt, true);
4110
4111 PG_RE_THROW();
4112 }
4113 PG_END_TRY();
4114
4115 xmlXPathFreeObject(xpathobj);
4116 xmlXPathFreeCompExpr(xpathcomp);
4117 xmlXPathFreeContext(xpathctx);
4118 xmlFreeDoc(doc);
4119 xmlFreeParserCtxt(ctxt);
4120
4121 pg_xml_done(xmlerrcxt, false);
4122 }
4123 #endif /* USE_LIBXML */
4124
4125 /*
4126 * Evaluate XPath expression and return array of XML values.
4127 *
4128 * As we have no support of XQuery sequences yet, this function seems
4129 * to be the most useful one (array of XML functions plays a role of
4130 * some kind of substitution for XQuery sequences).
4131 */
4132 Datum
xpath(PG_FUNCTION_ARGS)4133 xpath(PG_FUNCTION_ARGS)
4134 {
4135 #ifdef USE_LIBXML
4136 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4137 xmltype *data = PG_GETARG_XML_P(1);
4138 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4139 ArrayBuildState *astate;
4140
4141 astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4142 xpath_internal(xpath_expr_text, data, namespaces,
4143 NULL, astate);
4144 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4145 #else
4146 NO_XML_SUPPORT();
4147 return 0;
4148 #endif
4149 }
4150
4151 /*
4152 * Determines if the node specified by the supplied XPath exists
4153 * in a given XML document, returning a boolean.
4154 */
4155 Datum
xmlexists(PG_FUNCTION_ARGS)4156 xmlexists(PG_FUNCTION_ARGS)
4157 {
4158 #ifdef USE_LIBXML
4159 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4160 xmltype *data = PG_GETARG_XML_P(1);
4161 int res_nitems;
4162
4163 xpath_internal(xpath_expr_text, data, NULL,
4164 &res_nitems, NULL);
4165
4166 PG_RETURN_BOOL(res_nitems > 0);
4167 #else
4168 NO_XML_SUPPORT();
4169 return 0;
4170 #endif
4171 }
4172
4173 /*
4174 * Determines if the node specified by the supplied XPath exists
4175 * in a given XML document, returning a boolean. Differs from
4176 * xmlexists as it supports namespaces and is not defined in SQL/XML.
4177 */
4178 Datum
xpath_exists(PG_FUNCTION_ARGS)4179 xpath_exists(PG_FUNCTION_ARGS)
4180 {
4181 #ifdef USE_LIBXML
4182 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4183 xmltype *data = PG_GETARG_XML_P(1);
4184 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4185 int res_nitems;
4186
4187 xpath_internal(xpath_expr_text, data, namespaces,
4188 &res_nitems, NULL);
4189
4190 PG_RETURN_BOOL(res_nitems > 0);
4191 #else
4192 NO_XML_SUPPORT();
4193 return 0;
4194 #endif
4195 }
4196
4197 /*
4198 * Functions for checking well-formed-ness
4199 */
4200
4201 #ifdef USE_LIBXML
4202 static bool
wellformed_xml(text * data,XmlOptionType xmloption_arg)4203 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4204 {
4205 bool result;
4206 volatile xmlDocPtr doc = NULL;
4207
4208 /* We want to catch any exceptions and return false */
4209 PG_TRY();
4210 {
4211 doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4212 result = true;
4213 }
4214 PG_CATCH();
4215 {
4216 FlushErrorState();
4217 result = false;
4218 }
4219 PG_END_TRY();
4220
4221 if (doc)
4222 xmlFreeDoc(doc);
4223
4224 return result;
4225 }
4226 #endif
4227
4228 Datum
xml_is_well_formed(PG_FUNCTION_ARGS)4229 xml_is_well_formed(PG_FUNCTION_ARGS)
4230 {
4231 #ifdef USE_LIBXML
4232 text *data = PG_GETARG_TEXT_PP(0);
4233
4234 PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4235 #else
4236 NO_XML_SUPPORT();
4237 return 0;
4238 #endif /* not USE_LIBXML */
4239 }
4240
4241 Datum
xml_is_well_formed_document(PG_FUNCTION_ARGS)4242 xml_is_well_formed_document(PG_FUNCTION_ARGS)
4243 {
4244 #ifdef USE_LIBXML
4245 text *data = PG_GETARG_TEXT_PP(0);
4246
4247 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4248 #else
4249 NO_XML_SUPPORT();
4250 return 0;
4251 #endif /* not USE_LIBXML */
4252 }
4253
4254 Datum
xml_is_well_formed_content(PG_FUNCTION_ARGS)4255 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4256 {
4257 #ifdef USE_LIBXML
4258 text *data = PG_GETARG_TEXT_PP(0);
4259
4260 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4261 #else
4262 NO_XML_SUPPORT();
4263 return 0;
4264 #endif /* not USE_LIBXML */
4265 }
4266
4267 /*
4268 * support functions for XMLTABLE
4269 *
4270 */
4271 #ifdef USE_LIBXML
4272
4273 /*
4274 * Returns private data from executor state. Ensure validity by check with
4275 * MAGIC number.
4276 */
4277 static inline XmlTableBuilderData *
GetXmlTableBuilderPrivateData(TableFuncScanState * state,const char * fname)4278 GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4279 {
4280 XmlTableBuilderData *result;
4281
4282 if (!IsA(state, TableFuncScanState))
4283 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4284 result = (XmlTableBuilderData *) state->opaque;
4285 if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4286 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4287
4288 return result;
4289 }
4290 #endif
4291
4292 /*
4293 * XmlTableInitOpaque
4294 * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4295 * the XML parser.
4296 *
4297 * Note: Because we call pg_xml_init() here and pg_xml_done() in
4298 * XmlTableDestroyOpaque, it is critical for robustness that no other
4299 * executor nodes run until this node is processed to completion. Caller
4300 * must execute this to completion (probably filling a tuplestore to exhaust
4301 * this node in a single pass) instead of using row-per-call mode.
4302 */
4303 static void
XmlTableInitOpaque(TableFuncScanState * state,int natts)4304 XmlTableInitOpaque(TableFuncScanState *state, int natts)
4305 {
4306 #ifdef USE_LIBXML
4307 volatile xmlParserCtxtPtr ctxt = NULL;
4308 XmlTableBuilderData *xtCxt;
4309 PgXmlErrorContext *xmlerrcxt;
4310
4311 xtCxt = palloc0(sizeof(XmlTableBuilderData));
4312 xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4313 xtCxt->natts = natts;
4314 xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4315
4316 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4317
4318 PG_TRY();
4319 {
4320 xmlInitParser();
4321
4322 ctxt = xmlNewParserCtxt();
4323 if (ctxt == NULL || xmlerrcxt->err_occurred)
4324 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4325 "could not allocate parser context");
4326 }
4327 PG_CATCH();
4328 {
4329 if (ctxt != NULL)
4330 xmlFreeParserCtxt(ctxt);
4331
4332 pg_xml_done(xmlerrcxt, true);
4333
4334 PG_RE_THROW();
4335 }
4336 PG_END_TRY();
4337
4338 xtCxt->xmlerrcxt = xmlerrcxt;
4339 xtCxt->ctxt = ctxt;
4340
4341 state->opaque = xtCxt;
4342 #else
4343 NO_XML_SUPPORT();
4344 #endif /* not USE_LIBXML */
4345 }
4346
4347 /*
4348 * XmlTableSetDocument
4349 * Install the input document
4350 */
4351 static void
XmlTableSetDocument(TableFuncScanState * state,Datum value)4352 XmlTableSetDocument(TableFuncScanState *state, Datum value)
4353 {
4354 #ifdef USE_LIBXML
4355 XmlTableBuilderData *xtCxt;
4356 xmltype *xmlval = DatumGetXmlP(value);
4357 char *str;
4358 xmlChar *xstr;
4359 int length;
4360 volatile xmlDocPtr doc = NULL;
4361 volatile xmlXPathContextPtr xpathcxt = NULL;
4362
4363 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4364
4365 /*
4366 * Use out function for casting to string (remove encoding property). See
4367 * comment in xml_out.
4368 */
4369 str = xml_out_internal(xmlval, 0);
4370
4371 length = strlen(str);
4372 xstr = pg_xmlCharStrndup(str, length);
4373
4374 PG_TRY();
4375 {
4376 doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4377 if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4378 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4379 "could not parse XML document");
4380 xpathcxt = xmlXPathNewContext(doc);
4381 if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4382 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4383 "could not allocate XPath context");
4384 xpathcxt->node = (xmlNodePtr) doc;
4385 }
4386 PG_CATCH();
4387 {
4388 if (xpathcxt != NULL)
4389 xmlXPathFreeContext(xpathcxt);
4390 if (doc != NULL)
4391 xmlFreeDoc(doc);
4392
4393 PG_RE_THROW();
4394 }
4395 PG_END_TRY();
4396
4397 xtCxt->doc = doc;
4398 xtCxt->xpathcxt = xpathcxt;
4399 #else
4400 NO_XML_SUPPORT();
4401 #endif /* not USE_LIBXML */
4402 }
4403
4404 /*
4405 * XmlTableSetNamespace
4406 * Add a namespace declaration
4407 */
4408 static void
XmlTableSetNamespace(TableFuncScanState * state,const char * name,const char * uri)4409 XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4410 {
4411 #ifdef USE_LIBXML
4412 XmlTableBuilderData *xtCxt;
4413
4414 if (name == NULL)
4415 ereport(ERROR,
4416 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4417 errmsg("DEFAULT namespace is not supported")));
4418 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4419
4420 if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4421 pg_xmlCharStrndup(name, strlen(name)),
4422 pg_xmlCharStrndup(uri, strlen(uri))))
4423 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4424 "could not set XML namespace");
4425 #else
4426 NO_XML_SUPPORT();
4427 #endif /* not USE_LIBXML */
4428 }
4429
4430 /*
4431 * XmlTableSetRowFilter
4432 * Install the row-filter Xpath expression.
4433 */
4434 static void
XmlTableSetRowFilter(TableFuncScanState * state,const char * path)4435 XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4436 {
4437 #ifdef USE_LIBXML
4438 XmlTableBuilderData *xtCxt;
4439 xmlChar *xstr;
4440
4441 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4442
4443 if (*path == '\0')
4444 ereport(ERROR,
4445 (errcode(ERRCODE_DATA_EXCEPTION),
4446 errmsg("row path filter must not be empty string")));
4447
4448 xstr = pg_xmlCharStrndup(path, strlen(path));
4449
4450 xtCxt->xpathcomp = xmlXPathCompile(xstr);
4451 if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4452 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4453 "invalid XPath expression");
4454 #else
4455 NO_XML_SUPPORT();
4456 #endif /* not USE_LIBXML */
4457 }
4458
4459 /*
4460 * XmlTableSetColumnFilter
4461 * Install the column-filter Xpath expression, for the given column.
4462 */
4463 static void
XmlTableSetColumnFilter(TableFuncScanState * state,const char * path,int colnum)4464 XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4465 {
4466 #ifdef USE_LIBXML
4467 XmlTableBuilderData *xtCxt;
4468 xmlChar *xstr;
4469
4470 AssertArg(PointerIsValid(path));
4471
4472 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4473
4474 if (*path == '\0')
4475 ereport(ERROR,
4476 (errcode(ERRCODE_DATA_EXCEPTION),
4477 errmsg("column path filter must not be empty string")));
4478
4479 xstr = pg_xmlCharStrndup(path, strlen(path));
4480
4481 xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4482 if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4483 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4484 "invalid XPath expression");
4485 #else
4486 NO_XML_SUPPORT();
4487 #endif /* not USE_LIBXML */
4488 }
4489
4490 /*
4491 * XmlTableFetchRow
4492 * Prepare the next "current" tuple for upcoming GetValue calls.
4493 * Returns false if the row-filter expression returned no more rows.
4494 */
4495 static bool
XmlTableFetchRow(TableFuncScanState * state)4496 XmlTableFetchRow(TableFuncScanState *state)
4497 {
4498 #ifdef USE_LIBXML
4499 XmlTableBuilderData *xtCxt;
4500
4501 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4502
4503 /*
4504 * XmlTable returns table - set of composite values. The error context, is
4505 * used for producement more values, between two calls, there can be
4506 * created and used another libxml2 error context. It is libxml2 global
4507 * value, so it should be refreshed any time before any libxml2 usage,
4508 * that is finished by returning some value.
4509 */
4510 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4511
4512 if (xtCxt->xpathobj == NULL)
4513 {
4514 xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4515 if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4516 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4517 "could not create XPath object");
4518
4519 xtCxt->row_count = 0;
4520 }
4521
4522 if (xtCxt->xpathobj->type == XPATH_NODESET)
4523 {
4524 if (xtCxt->xpathobj->nodesetval != NULL)
4525 {
4526 if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4527 return true;
4528 }
4529 }
4530
4531 return false;
4532 #else
4533 NO_XML_SUPPORT();
4534 return false;
4535 #endif /* not USE_LIBXML */
4536 }
4537
4538 /*
4539 * XmlTableGetValue
4540 * Return the value for column number 'colnum' for the current row. If
4541 * column -1 is requested, return representation of the whole row.
4542 *
4543 * This leaks memory, so be sure to reset often the context in which it's
4544 * called.
4545 */
4546 static Datum
XmlTableGetValue(TableFuncScanState * state,int colnum,Oid typid,int32 typmod,bool * isnull)4547 XmlTableGetValue(TableFuncScanState *state, int colnum,
4548 Oid typid, int32 typmod, bool *isnull)
4549 {
4550 #ifdef USE_LIBXML
4551 XmlTableBuilderData *xtCxt;
4552 Datum result = (Datum) 0;
4553 xmlNodePtr cur;
4554 char *cstr = NULL;
4555 volatile xmlXPathObjectPtr xpathobj = NULL;
4556
4557 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4558
4559 Assert(xtCxt->xpathobj &&
4560 xtCxt->xpathobj->type == XPATH_NODESET &&
4561 xtCxt->xpathobj->nodesetval != NULL);
4562
4563 /* Propagate context related error context to libxml2 */
4564 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4565
4566 *isnull = false;
4567
4568 cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4569
4570 Assert(xtCxt->xpathscomp[colnum] != NULL);
4571
4572 PG_TRY();
4573 {
4574 /* Set current node as entry point for XPath evaluation */
4575 xtCxt->xpathcxt->node = cur;
4576
4577 /* Evaluate column path */
4578 xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4579 if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4580 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4581 "could not create XPath object");
4582
4583 /*
4584 * There are four possible cases, depending on the number of nodes
4585 * returned by the XPath expression and the type of the target column:
4586 * a) XPath returns no nodes. b) One node is returned, and column is
4587 * of type XML. c) One node, column type other than XML. d) Multiple
4588 * nodes are returned.
4589 */
4590 if (xpathobj->type == XPATH_NODESET)
4591 {
4592 int count = 0;
4593
4594 if (xpathobj->nodesetval != NULL)
4595 count = xpathobj->nodesetval->nodeNr;
4596
4597 if (xpathobj->nodesetval == NULL || count == 0)
4598 {
4599 *isnull = true;
4600 }
4601 else if (count == 1 && typid == XMLOID)
4602 {
4603 text *textstr;
4604
4605 /* simple case, result is one value */
4606 textstr = xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[0],
4607 xtCxt->xmlerrcxt);
4608 cstr = text_to_cstring(textstr);
4609 }
4610 else if (count == 1)
4611 {
4612 xmlChar *str;
4613 xmlNodePtr node;
4614
4615 node = xpathobj->nodesetval->nodeTab[0];
4616 if (node->type == XML_NAMESPACE_DECL)
4617 ereport(ERROR,
4618 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4619 errmsg("XMLTABLE cannot cast a namespace node to a non-XML result type")));
4620
4621 /*
4622 * Most nodes (elements and even attributes) store their data
4623 * in children nodes. If they don't have children nodes, it
4624 * means that they are empty (e.g. <element/>). Text nodes and
4625 * CDATA sections are an exception: they don't have children
4626 * but have content in the Text/CDATA node itself.
4627 */
4628 if (node->type != XML_CDATA_SECTION_NODE &&
4629 node->type != XML_TEXT_NODE)
4630 node = node->xmlChildrenNode;
4631
4632 str = xmlNodeListGetString(xtCxt->doc, node, 1);
4633 if (str != NULL)
4634 {
4635 PG_TRY();
4636 {
4637 cstr = pstrdup((char *) str);
4638 }
4639 PG_CATCH();
4640 {
4641 xmlFree(str);
4642 PG_RE_THROW();
4643 }
4644 PG_END_TRY();
4645 xmlFree(str);
4646 }
4647 else
4648 {
4649 /* Ensure mapping of empty tags to PostgreSQL values. */
4650 cstr = "";
4651 }
4652 }
4653 else
4654 {
4655 StringInfoData str;
4656 int i;
4657
4658 Assert(count > 1);
4659
4660 /*
4661 * When evaluating the XPath expression returns multiple
4662 * nodes, the result is the concatenation of them all. The
4663 * target type must be XML.
4664 */
4665 if (typid != XMLOID)
4666 ereport(ERROR,
4667 (errcode(ERRCODE_CARDINALITY_VIOLATION),
4668 errmsg("more than one value returned by column XPath expression")));
4669
4670 /* Concatenate serialized values */
4671 initStringInfo(&str);
4672 for (i = 0; i < count; i++)
4673 {
4674 appendStringInfoText(&str,
4675 xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4676 xtCxt->xmlerrcxt));
4677 }
4678 cstr = str.data;
4679 }
4680 }
4681 else if (xpathobj->type == XPATH_STRING)
4682 {
4683 cstr = (char *) xpathobj->stringval;
4684 }
4685 else
4686 elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4687
4688 /*
4689 * By here, either cstr contains the result value, or the isnull flag
4690 * has been set.
4691 */
4692 Assert(cstr || *isnull);
4693
4694 if (!*isnull)
4695 result = InputFunctionCall(&state->in_functions[colnum],
4696 cstr,
4697 state->typioparams[colnum],
4698 typmod);
4699 }
4700 PG_CATCH();
4701 {
4702 if (xpathobj != NULL)
4703 xmlXPathFreeObject(xpathobj);
4704 PG_RE_THROW();
4705 }
4706 PG_END_TRY();
4707
4708 xmlXPathFreeObject(xpathobj);
4709
4710 return result;
4711 #else
4712 NO_XML_SUPPORT();
4713 return 0;
4714 #endif /* not USE_LIBXML */
4715 }
4716
4717 /*
4718 * XmlTableDestroyOpaque
4719 * Release all libxml2 resources
4720 */
4721 static void
XmlTableDestroyOpaque(TableFuncScanState * state)4722 XmlTableDestroyOpaque(TableFuncScanState *state)
4723 {
4724 #ifdef USE_LIBXML
4725 XmlTableBuilderData *xtCxt;
4726
4727 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4728
4729 /* Propagate context related error context to libxml2 */
4730 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4731
4732 if (xtCxt->xpathscomp != NULL)
4733 {
4734 int i;
4735
4736 for (i = 0; i < xtCxt->natts; i++)
4737 if (xtCxt->xpathscomp[i] != NULL)
4738 xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4739 }
4740
4741 if (xtCxt->xpathobj != NULL)
4742 xmlXPathFreeObject(xtCxt->xpathobj);
4743 if (xtCxt->xpathcomp != NULL)
4744 xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4745 if (xtCxt->xpathcxt != NULL)
4746 xmlXPathFreeContext(xtCxt->xpathcxt);
4747 if (xtCxt->doc != NULL)
4748 xmlFreeDoc(xtCxt->doc);
4749 if (xtCxt->ctxt != NULL)
4750 xmlFreeParserCtxt(xtCxt->ctxt);
4751
4752 pg_xml_done(xtCxt->xmlerrcxt, true);
4753
4754 /* not valid anymore */
4755 xtCxt->magic = 0;
4756 state->opaque = NULL;
4757
4758 #else
4759 NO_XML_SUPPORT();
4760 #endif /* not USE_LIBXML */
4761 }
4762