1 /*-------------------------------------------------------------------------
2 *
3 * xml.c
4 * XML data type support.
5 *
6 *
7 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/backend/utils/adt/xml.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15 /*
16 * Generally, XML type support is only available when libxml use was
17 * configured during the build. But even if that is not done, the
18 * type and all the functions are available, but most of them will
19 * fail. For one thing, this avoids having to manage variant catalog
20 * installations. But it also has nice effects such as that you can
21 * dump a database containing XML type data even if the server is not
22 * linked with libxml. Thus, make sure xml_out() works even if nothing
23 * else does.
24 */
25
26 /*
27 * Notes on memory management:
28 *
29 * Sometimes libxml allocates global structures in the hope that it can reuse
30 * them later on. This makes it impractical to change the xmlMemSetup
31 * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 * allocated with malloc() or vice versa. Since libxml might be used by
33 * loadable modules, eg libperl, our only safe choices are to change the
34 * functions at postmaster/backend launch or not at all. Since we'd rather
35 * not activate libxml in sessions that might never use it, the latter choice
36 * is the preferred one. However, for debugging purposes it can be awfully
37 * handy to constrain libxml's allocations to be done in a specific palloc
38 * context, where they're easy to track. Therefore there is code here that
39 * can be enabled in debug builds to redirect libxml's allocations into a
40 * special context LibxmlContext. It's not recommended to turn this on in
41 * a production build because of the possibility of bad interactions with
42 * external modules.
43 */
44 /* #define USE_LIBXMLCONTEXT */
45
46 #include "postgres.h"
47
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
59
60 /*
61 * We used to check for xmlStructuredErrorContext via a configure test; but
62 * that doesn't work on Windows, so instead use this grottier method of
63 * testing the library version number.
64 */
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 #endif
68 #endif /* USE_LIBXML */
69
70 #include "access/htup_details.h"
71 #include "access/table.h"
72 #include "catalog/namespace.h"
73 #include "catalog/pg_class.h"
74 #include "catalog/pg_type.h"
75 #include "commands/dbcommands.h"
76 #include "executor/spi.h"
77 #include "executor/tablefunc.h"
78 #include "fmgr.h"
79 #include "lib/stringinfo.h"
80 #include "libpq/pqformat.h"
81 #include "mb/pg_wchar.h"
82 #include "miscadmin.h"
83 #include "nodes/execnodes.h"
84 #include "nodes/nodeFuncs.h"
85 #include "utils/array.h"
86 #include "utils/builtins.h"
87 #include "utils/date.h"
88 #include "utils/datetime.h"
89 #include "utils/lsyscache.h"
90 #include "utils/memutils.h"
91 #include "utils/rel.h"
92 #include "utils/syscache.h"
93 #include "utils/xml.h"
94
95
96 /* GUC variables */
97 int xmlbinary;
98 int xmloption;
99
100 #ifdef USE_LIBXML
101
102 /* random number to identify PgXmlErrorContext */
103 #define ERRCXT_MAGIC 68275028
104
105 struct PgXmlErrorContext
106 {
107 int magic;
108 /* strictness argument passed to pg_xml_init */
109 PgXmlStrictness strictness;
110 /* current error status and accumulated message, if any */
111 bool err_occurred;
112 StringInfoData err_buf;
113 /* previous libxml error handling state (saved by pg_xml_init) */
114 xmlStructuredErrorFunc saved_errfunc;
115 void *saved_errcxt;
116 /* previous libxml entity handler (saved by pg_xml_init) */
117 xmlExternalEntityLoader saved_entityfunc;
118 };
119
120 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
121 xmlParserCtxtPtr ctxt);
122 static void xml_errorHandler(void *data, xmlErrorPtr error);
123 static void xml_ereport_by_code(int level, int sqlcode,
124 const char *msg, int errcode);
125 static void chopStringInfoNewlines(StringInfo str);
126 static void appendStringInfoLineSeparator(StringInfo str);
127
128 #ifdef USE_LIBXMLCONTEXT
129
130 static MemoryContext LibxmlContext = NULL;
131
132 static void xml_memory_init(void);
133 static void *xml_palloc(size_t size);
134 static void *xml_repalloc(void *ptr, size_t size);
135 static void xml_pfree(void *ptr);
136 static char *xml_pstrdup(const char *string);
137 #endif /* USE_LIBXMLCONTEXT */
138
139 static xmlChar *xml_text2xmlChar(text *in);
140 static int parse_xml_decl(const xmlChar *str, size_t *lenp,
141 xmlChar **version, xmlChar **encoding, int *standalone);
142 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143 pg_enc encoding, int standalone);
144 static bool xml_doctype_in_content(const xmlChar *str);
145 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
146 bool preserve_whitespace, int encoding);
147 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
148 static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
149 ArrayBuildState *astate,
150 PgXmlErrorContext *xmlerrcxt);
151 static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
152 #endif /* USE_LIBXML */
153
154 static void xmldata_root_element_start(StringInfo result, const char *eltname,
155 const char *xmlschema, const char *targetns,
156 bool top_level);
157 static void xmldata_root_element_end(StringInfo result, const char *eltname);
158 static StringInfo query_to_xml_internal(const char *query, char *tablename,
159 const char *xmlschema, bool nulls, bool tableforest,
160 const char *targetns, bool top_level);
161 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
162 bool nulls, bool tableforest, const char *targetns);
163 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
164 List *relid_list, bool nulls,
165 bool tableforest, const char *targetns);
166 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
167 bool nulls, bool tableforest,
168 const char *targetns);
169 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
170 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
171 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
172 static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
173 char *tablename, bool nulls, bool tableforest,
174 const char *targetns, bool top_level);
175
176 /* XMLTABLE support */
177 #ifdef USE_LIBXML
178 /* random number to identify XmlTableContext */
179 #define XMLTABLE_CONTEXT_MAGIC 46922182
180 typedef struct XmlTableBuilderData
181 {
182 int magic;
183 int natts;
184 long int row_count;
185 PgXmlErrorContext *xmlerrcxt;
186 xmlParserCtxtPtr ctxt;
187 xmlDocPtr doc;
188 xmlXPathContextPtr xpathcxt;
189 xmlXPathCompExprPtr xpathcomp;
190 xmlXPathObjectPtr xpathobj;
191 xmlXPathCompExprPtr *xpathscomp;
192 } XmlTableBuilderData;
193 #endif
194
195 static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
196 static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
197 static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
198 const char *uri);
199 static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
200 static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
201 const char *path, int colnum);
202 static bool XmlTableFetchRow(struct TableFuncScanState *state);
203 static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
204 Oid typid, int32 typmod, bool *isnull);
205 static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
206
207 const TableFuncRoutine XmlTableRoutine =
208 {
209 XmlTableInitOpaque,
210 XmlTableSetDocument,
211 XmlTableSetNamespace,
212 XmlTableSetRowFilter,
213 XmlTableSetColumnFilter,
214 XmlTableFetchRow,
215 XmlTableGetValue,
216 XmlTableDestroyOpaque
217 };
218
219 #define NO_XML_SUPPORT() \
220 ereport(ERROR, \
221 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
222 errmsg("unsupported XML feature"), \
223 errdetail("This functionality requires the server to be built with libxml support."), \
224 errhint("You need to rebuild PostgreSQL using %s.", "--with-libxml")))
225
226
227 /* from SQL/XML:2008 section 4.9 */
228 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
229 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
230 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
231
232
233 #ifdef USE_LIBXML
234
235 static int
xmlChar_to_encoding(const xmlChar * encoding_name)236 xmlChar_to_encoding(const xmlChar *encoding_name)
237 {
238 int encoding = pg_char_to_encoding((const char *) encoding_name);
239
240 if (encoding < 0)
241 ereport(ERROR,
242 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
243 errmsg("invalid encoding name \"%s\"",
244 (const char *) encoding_name)));
245 return encoding;
246 }
247 #endif
248
249
250 /*
251 * xml_in uses a plain C string to VARDATA conversion, so for the time being
252 * we use the conversion function for the text datatype.
253 *
254 * This is only acceptable so long as xmltype and text use the same
255 * representation.
256 */
257 Datum
xml_in(PG_FUNCTION_ARGS)258 xml_in(PG_FUNCTION_ARGS)
259 {
260 #ifdef USE_LIBXML
261 char *s = PG_GETARG_CSTRING(0);
262 xmltype *vardata;
263 xmlDocPtr doc;
264
265 vardata = (xmltype *) cstring_to_text(s);
266
267 /*
268 * Parse the data to check if it is well-formed XML data. Assume that
269 * ERROR occurred if parsing failed.
270 */
271 doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
272 xmlFreeDoc(doc);
273
274 PG_RETURN_XML_P(vardata);
275 #else
276 NO_XML_SUPPORT();
277 return 0;
278 #endif
279 }
280
281
282 #define PG_XML_DEFAULT_VERSION "1.0"
283
284
285 /*
286 * xml_out_internal uses a plain VARDATA to C string conversion, so for the
287 * time being we use the conversion function for the text datatype.
288 *
289 * This is only acceptable so long as xmltype and text use the same
290 * representation.
291 */
292 static char *
xml_out_internal(xmltype * x,pg_enc target_encoding)293 xml_out_internal(xmltype *x, pg_enc target_encoding)
294 {
295 char *str = text_to_cstring((text *) x);
296
297 #ifdef USE_LIBXML
298 size_t len = strlen(str);
299 xmlChar *version;
300 int standalone;
301 int res_code;
302
303 if ((res_code = parse_xml_decl((xmlChar *) str,
304 &len, &version, NULL, &standalone)) == 0)
305 {
306 StringInfoData buf;
307
308 initStringInfo(&buf);
309
310 if (!print_xml_decl(&buf, version, target_encoding, standalone))
311 {
312 /*
313 * If we are not going to produce an XML declaration, eat a single
314 * newline in the original string to prevent empty first lines in
315 * the output.
316 */
317 if (*(str + len) == '\n')
318 len += 1;
319 }
320 appendStringInfoString(&buf, str + len);
321
322 pfree(str);
323
324 return buf.data;
325 }
326
327 xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
328 "could not parse XML declaration in stored value",
329 res_code);
330 #endif
331 return str;
332 }
333
334
335 Datum
xml_out(PG_FUNCTION_ARGS)336 xml_out(PG_FUNCTION_ARGS)
337 {
338 xmltype *x = PG_GETARG_XML_P(0);
339
340 /*
341 * xml_out removes the encoding property in all cases. This is because we
342 * cannot control from here whether the datum will be converted to a
343 * different client encoding, so we'd do more harm than good by including
344 * it.
345 */
346 PG_RETURN_CSTRING(xml_out_internal(x, 0));
347 }
348
349
350 Datum
xml_recv(PG_FUNCTION_ARGS)351 xml_recv(PG_FUNCTION_ARGS)
352 {
353 #ifdef USE_LIBXML
354 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
355 xmltype *result;
356 char *str;
357 char *newstr;
358 int nbytes;
359 xmlDocPtr doc;
360 xmlChar *encodingStr = NULL;
361 int encoding;
362
363 /*
364 * Read the data in raw format. We don't know yet what the encoding is, as
365 * that information is embedded in the xml declaration; so we have to
366 * parse that before converting to server encoding.
367 */
368 nbytes = buf->len - buf->cursor;
369 str = (char *) pq_getmsgbytes(buf, nbytes);
370
371 /*
372 * We need a null-terminated string to pass to parse_xml_decl(). Rather
373 * than make a separate copy, make the temporary result one byte bigger
374 * than it needs to be.
375 */
376 result = palloc(nbytes + 1 + VARHDRSZ);
377 SET_VARSIZE(result, nbytes + VARHDRSZ);
378 memcpy(VARDATA(result), str, nbytes);
379 str = VARDATA(result);
380 str[nbytes] = '\0';
381
382 parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
383
384 /*
385 * If encoding wasn't explicitly specified in the XML header, treat it as
386 * UTF-8, as that's the default in XML. This is different from xml_in(),
387 * where the input has to go through the normal client to server encoding
388 * conversion.
389 */
390 encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
391
392 /*
393 * Parse the data to check if it is well-formed XML data. Assume that
394 * xml_parse will throw ERROR if not.
395 */
396 doc = xml_parse(result, xmloption, true, encoding);
397 xmlFreeDoc(doc);
398
399 /* Now that we know what we're dealing with, convert to server encoding */
400 newstr = pg_any_to_server(str, nbytes, encoding);
401
402 if (newstr != str)
403 {
404 pfree(result);
405 result = (xmltype *) cstring_to_text(newstr);
406 pfree(newstr);
407 }
408
409 PG_RETURN_XML_P(result);
410 #else
411 NO_XML_SUPPORT();
412 return 0;
413 #endif
414 }
415
416
417 Datum
xml_send(PG_FUNCTION_ARGS)418 xml_send(PG_FUNCTION_ARGS)
419 {
420 xmltype *x = PG_GETARG_XML_P(0);
421 char *outval;
422 StringInfoData buf;
423
424 /*
425 * xml_out_internal doesn't convert the encoding, it just prints the right
426 * declaration. pq_sendtext will do the conversion.
427 */
428 outval = xml_out_internal(x, pg_get_client_encoding());
429
430 pq_begintypsend(&buf);
431 pq_sendtext(&buf, outval, strlen(outval));
432 pfree(outval);
433 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
434 }
435
436
437 #ifdef USE_LIBXML
438 static void
appendStringInfoText(StringInfo str,const text * t)439 appendStringInfoText(StringInfo str, const text *t)
440 {
441 appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
442 }
443 #endif
444
445
446 static xmltype *
stringinfo_to_xmltype(StringInfo buf)447 stringinfo_to_xmltype(StringInfo buf)
448 {
449 return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
450 }
451
452
453 static xmltype *
cstring_to_xmltype(const char * string)454 cstring_to_xmltype(const char *string)
455 {
456 return (xmltype *) cstring_to_text(string);
457 }
458
459
460 #ifdef USE_LIBXML
461 static xmltype *
xmlBuffer_to_xmltype(xmlBufferPtr buf)462 xmlBuffer_to_xmltype(xmlBufferPtr buf)
463 {
464 return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
465 xmlBufferLength(buf));
466 }
467 #endif
468
469
470 Datum
xmlcomment(PG_FUNCTION_ARGS)471 xmlcomment(PG_FUNCTION_ARGS)
472 {
473 #ifdef USE_LIBXML
474 text *arg = PG_GETARG_TEXT_PP(0);
475 char *argdata = VARDATA_ANY(arg);
476 int len = VARSIZE_ANY_EXHDR(arg);
477 StringInfoData buf;
478 int i;
479
480 /* check for "--" in string or "-" at the end */
481 for (i = 1; i < len; i++)
482 {
483 if (argdata[i] == '-' && argdata[i - 1] == '-')
484 ereport(ERROR,
485 (errcode(ERRCODE_INVALID_XML_COMMENT),
486 errmsg("invalid XML comment")));
487 }
488 if (len > 0 && argdata[len - 1] == '-')
489 ereport(ERROR,
490 (errcode(ERRCODE_INVALID_XML_COMMENT),
491 errmsg("invalid XML comment")));
492
493 initStringInfo(&buf);
494 appendStringInfoString(&buf, "<!--");
495 appendStringInfoText(&buf, arg);
496 appendStringInfoString(&buf, "-->");
497
498 PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
499 #else
500 NO_XML_SUPPORT();
501 return 0;
502 #endif
503 }
504
505
506
507 /*
508 * TODO: xmlconcat needs to merge the notations and unparsed entities
509 * of the argument values. Not very important in practice, though.
510 */
511 xmltype *
xmlconcat(List * args)512 xmlconcat(List *args)
513 {
514 #ifdef USE_LIBXML
515 int global_standalone = 1;
516 xmlChar *global_version = NULL;
517 bool global_version_no_value = false;
518 StringInfoData buf;
519 ListCell *v;
520
521 initStringInfo(&buf);
522 foreach(v, args)
523 {
524 xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
525 size_t len;
526 xmlChar *version;
527 int standalone;
528 char *str;
529
530 len = VARSIZE(x) - VARHDRSZ;
531 str = text_to_cstring((text *) x);
532
533 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
534
535 if (standalone == 0 && global_standalone == 1)
536 global_standalone = 0;
537 if (standalone < 0)
538 global_standalone = -1;
539
540 if (!version)
541 global_version_no_value = true;
542 else if (!global_version)
543 global_version = version;
544 else if (xmlStrcmp(version, global_version) != 0)
545 global_version_no_value = true;
546
547 appendStringInfoString(&buf, str + len);
548 pfree(str);
549 }
550
551 if (!global_version_no_value || global_standalone >= 0)
552 {
553 StringInfoData buf2;
554
555 initStringInfo(&buf2);
556
557 print_xml_decl(&buf2,
558 (!global_version_no_value) ? global_version : NULL,
559 0,
560 global_standalone);
561
562 appendBinaryStringInfo(&buf2, buf.data, buf.len);
563 buf = buf2;
564 }
565
566 return stringinfo_to_xmltype(&buf);
567 #else
568 NO_XML_SUPPORT();
569 return NULL;
570 #endif
571 }
572
573
574 /*
575 * XMLAGG support
576 */
577 Datum
xmlconcat2(PG_FUNCTION_ARGS)578 xmlconcat2(PG_FUNCTION_ARGS)
579 {
580 if (PG_ARGISNULL(0))
581 {
582 if (PG_ARGISNULL(1))
583 PG_RETURN_NULL();
584 else
585 PG_RETURN_XML_P(PG_GETARG_XML_P(1));
586 }
587 else if (PG_ARGISNULL(1))
588 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
589 else
590 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
591 PG_GETARG_XML_P(1))));
592 }
593
594
595 Datum
texttoxml(PG_FUNCTION_ARGS)596 texttoxml(PG_FUNCTION_ARGS)
597 {
598 text *data = PG_GETARG_TEXT_PP(0);
599
600 PG_RETURN_XML_P(xmlparse(data, xmloption, true));
601 }
602
603
604 Datum
xmltotext(PG_FUNCTION_ARGS)605 xmltotext(PG_FUNCTION_ARGS)
606 {
607 xmltype *data = PG_GETARG_XML_P(0);
608
609 /* It's actually binary compatible. */
610 PG_RETURN_TEXT_P((text *) data);
611 }
612
613
614 text *
xmltotext_with_xmloption(xmltype * data,XmlOptionType xmloption_arg)615 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
616 {
617 if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
618 ereport(ERROR,
619 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
620 errmsg("not an XML document")));
621
622 /* It's actually binary compatible, save for the above check. */
623 return (text *) data;
624 }
625
626
627 xmltype *
xmlelement(XmlExpr * xexpr,Datum * named_argvalue,bool * named_argnull,Datum * argvalue,bool * argnull)628 xmlelement(XmlExpr *xexpr,
629 Datum *named_argvalue, bool *named_argnull,
630 Datum *argvalue, bool *argnull)
631 {
632 #ifdef USE_LIBXML
633 xmltype *result;
634 List *named_arg_strings;
635 List *arg_strings;
636 int i;
637 ListCell *arg;
638 ListCell *narg;
639 PgXmlErrorContext *xmlerrcxt;
640 volatile xmlBufferPtr buf = NULL;
641 volatile xmlTextWriterPtr writer = NULL;
642
643 /*
644 * All arguments are already evaluated, and their values are passed in the
645 * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
646 * issues if one of the arguments involves a call to some other function
647 * or subsystem that wants to use libxml on its own terms. We examine the
648 * original XmlExpr to identify the numbers and types of the arguments.
649 */
650 named_arg_strings = NIL;
651 i = 0;
652 foreach(arg, xexpr->named_args)
653 {
654 Expr *e = (Expr *) lfirst(arg);
655 char *str;
656
657 if (named_argnull[i])
658 str = NULL;
659 else
660 str = map_sql_value_to_xml_value(named_argvalue[i],
661 exprType((Node *) e),
662 false);
663 named_arg_strings = lappend(named_arg_strings, str);
664 i++;
665 }
666
667 arg_strings = NIL;
668 i = 0;
669 foreach(arg, xexpr->args)
670 {
671 Expr *e = (Expr *) lfirst(arg);
672 char *str;
673
674 /* here we can just forget NULL elements immediately */
675 if (!argnull[i])
676 {
677 str = map_sql_value_to_xml_value(argvalue[i],
678 exprType((Node *) e),
679 true);
680 arg_strings = lappend(arg_strings, str);
681 }
682 i++;
683 }
684
685 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
686
687 PG_TRY();
688 {
689 buf = xmlBufferCreate();
690 if (buf == NULL || xmlerrcxt->err_occurred)
691 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
692 "could not allocate xmlBuffer");
693 writer = xmlNewTextWriterMemory(buf, 0);
694 if (writer == NULL || xmlerrcxt->err_occurred)
695 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
696 "could not allocate xmlTextWriter");
697
698 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
699
700 forboth(arg, named_arg_strings, narg, xexpr->arg_names)
701 {
702 char *str = (char *) lfirst(arg);
703 char *argname = strVal(lfirst(narg));
704
705 if (str)
706 xmlTextWriterWriteAttribute(writer,
707 (xmlChar *) argname,
708 (xmlChar *) str);
709 }
710
711 foreach(arg, arg_strings)
712 {
713 char *str = (char *) lfirst(arg);
714
715 xmlTextWriterWriteRaw(writer, (xmlChar *) str);
716 }
717
718 xmlTextWriterEndElement(writer);
719
720 /* we MUST do this now to flush data out to the buffer ... */
721 xmlFreeTextWriter(writer);
722 writer = NULL;
723
724 result = xmlBuffer_to_xmltype(buf);
725 }
726 PG_CATCH();
727 {
728 if (writer)
729 xmlFreeTextWriter(writer);
730 if (buf)
731 xmlBufferFree(buf);
732
733 pg_xml_done(xmlerrcxt, true);
734
735 PG_RE_THROW();
736 }
737 PG_END_TRY();
738
739 xmlBufferFree(buf);
740
741 pg_xml_done(xmlerrcxt, false);
742
743 return result;
744 #else
745 NO_XML_SUPPORT();
746 return NULL;
747 #endif
748 }
749
750
751 xmltype *
xmlparse(text * data,XmlOptionType xmloption_arg,bool preserve_whitespace)752 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
753 {
754 #ifdef USE_LIBXML
755 xmlDocPtr doc;
756
757 doc = xml_parse(data, xmloption_arg, preserve_whitespace,
758 GetDatabaseEncoding());
759 xmlFreeDoc(doc);
760
761 return (xmltype *) data;
762 #else
763 NO_XML_SUPPORT();
764 return NULL;
765 #endif
766 }
767
768
769 xmltype *
xmlpi(const char * target,text * arg,bool arg_is_null,bool * result_is_null)770 xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
771 {
772 #ifdef USE_LIBXML
773 xmltype *result;
774 StringInfoData buf;
775
776 if (pg_strcasecmp(target, "xml") == 0)
777 ereport(ERROR,
778 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
779 errmsg("invalid XML processing instruction"),
780 errdetail("XML processing instruction target name cannot be \"%s\".", target)));
781
782 /*
783 * Following the SQL standard, the null check comes after the syntax check
784 * above.
785 */
786 *result_is_null = arg_is_null;
787 if (*result_is_null)
788 return NULL;
789
790 initStringInfo(&buf);
791
792 appendStringInfo(&buf, "<?%s", target);
793
794 if (arg != NULL)
795 {
796 char *string;
797
798 string = text_to_cstring(arg);
799 if (strstr(string, "?>") != NULL)
800 ereport(ERROR,
801 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
802 errmsg("invalid XML processing instruction"),
803 errdetail("XML processing instruction cannot contain \"?>\".")));
804
805 appendStringInfoChar(&buf, ' ');
806 appendStringInfoString(&buf, string + strspn(string, " "));
807 pfree(string);
808 }
809 appendStringInfoString(&buf, "?>");
810
811 result = stringinfo_to_xmltype(&buf);
812 pfree(buf.data);
813 return result;
814 #else
815 NO_XML_SUPPORT();
816 return NULL;
817 #endif
818 }
819
820
821 xmltype *
xmlroot(xmltype * data,text * version,int standalone)822 xmlroot(xmltype *data, text *version, int standalone)
823 {
824 #ifdef USE_LIBXML
825 char *str;
826 size_t len;
827 xmlChar *orig_version;
828 int orig_standalone;
829 StringInfoData buf;
830
831 len = VARSIZE(data) - VARHDRSZ;
832 str = text_to_cstring((text *) data);
833
834 parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
835
836 if (version)
837 orig_version = xml_text2xmlChar(version);
838 else
839 orig_version = NULL;
840
841 switch (standalone)
842 {
843 case XML_STANDALONE_YES:
844 orig_standalone = 1;
845 break;
846 case XML_STANDALONE_NO:
847 orig_standalone = 0;
848 break;
849 case XML_STANDALONE_NO_VALUE:
850 orig_standalone = -1;
851 break;
852 case XML_STANDALONE_OMITTED:
853 /* leave original value */
854 break;
855 }
856
857 initStringInfo(&buf);
858 print_xml_decl(&buf, orig_version, 0, orig_standalone);
859 appendStringInfoString(&buf, str + len);
860
861 return stringinfo_to_xmltype(&buf);
862 #else
863 NO_XML_SUPPORT();
864 return NULL;
865 #endif
866 }
867
868
869 /*
870 * Validate document (given as string) against DTD (given as external link)
871 *
872 * This has been removed because it is a security hole: unprivileged users
873 * should not be able to use Postgres to fetch arbitrary external files,
874 * which unfortunately is exactly what libxml is willing to do with the DTD
875 * parameter.
876 */
877 Datum
xmlvalidate(PG_FUNCTION_ARGS)878 xmlvalidate(PG_FUNCTION_ARGS)
879 {
880 ereport(ERROR,
881 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
882 errmsg("xmlvalidate is not implemented")));
883 return 0;
884 }
885
886
887 bool
xml_is_document(xmltype * arg)888 xml_is_document(xmltype *arg)
889 {
890 #ifdef USE_LIBXML
891 bool result;
892 volatile xmlDocPtr doc = NULL;
893 MemoryContext ccxt = CurrentMemoryContext;
894
895 /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
896 PG_TRY();
897 {
898 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
899 GetDatabaseEncoding());
900 result = true;
901 }
902 PG_CATCH();
903 {
904 ErrorData *errdata;
905 MemoryContext ecxt;
906
907 ecxt = MemoryContextSwitchTo(ccxt);
908 errdata = CopyErrorData();
909 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
910 {
911 FlushErrorState();
912 result = false;
913 }
914 else
915 {
916 MemoryContextSwitchTo(ecxt);
917 PG_RE_THROW();
918 }
919 }
920 PG_END_TRY();
921
922 if (doc)
923 xmlFreeDoc(doc);
924
925 return result;
926 #else /* not USE_LIBXML */
927 NO_XML_SUPPORT();
928 return false;
929 #endif /* not USE_LIBXML */
930 }
931
932
933 #ifdef USE_LIBXML
934
935 /*
936 * pg_xml_init_library --- set up for use of libxml
937 *
938 * This should be called by each function that is about to use libxml
939 * facilities but doesn't require error handling. It initializes libxml
940 * and verifies compatibility with the loaded libxml version. These are
941 * once-per-session activities.
942 *
943 * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
944 * check)
945 */
946 void
pg_xml_init_library(void)947 pg_xml_init_library(void)
948 {
949 static bool first_time = true;
950
951 if (first_time)
952 {
953 /* Stuff we need do only once per session */
954
955 /*
956 * Currently, we have no pure UTF-8 support for internals -- check if
957 * we can work.
958 */
959 if (sizeof(char) != sizeof(xmlChar))
960 ereport(ERROR,
961 (errmsg("could not initialize XML library"),
962 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
963 (int) sizeof(char), (int) sizeof(xmlChar))));
964
965 #ifdef USE_LIBXMLCONTEXT
966 /* Set up libxml's memory allocation our way */
967 xml_memory_init();
968 #endif
969
970 /* Check library compatibility */
971 LIBXML_TEST_VERSION;
972
973 first_time = false;
974 }
975 }
976
977 /*
978 * pg_xml_init --- set up for use of libxml and register an error handler
979 *
980 * This should be called by each function that is about to use libxml
981 * facilities and requires error handling. It initializes libxml with
982 * pg_xml_init_library() and establishes our libxml error handler.
983 *
984 * strictness determines which errors are reported and which are ignored.
985 *
986 * Calls to this function MUST be followed by a PG_TRY block that guarantees
987 * that pg_xml_done() is called during either normal or error exit.
988 *
989 * This is exported for use by contrib/xml2, as well as other code that might
990 * wish to share use of this module's libxml error handler.
991 */
992 PgXmlErrorContext *
pg_xml_init(PgXmlStrictness strictness)993 pg_xml_init(PgXmlStrictness strictness)
994 {
995 PgXmlErrorContext *errcxt;
996 void *new_errcxt;
997
998 /* Do one-time setup if needed */
999 pg_xml_init_library();
1000
1001 /* Create error handling context structure */
1002 errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1003 errcxt->magic = ERRCXT_MAGIC;
1004 errcxt->strictness = strictness;
1005 errcxt->err_occurred = false;
1006 initStringInfo(&errcxt->err_buf);
1007
1008 /*
1009 * Save original error handler and install ours. libxml originally didn't
1010 * distinguish between the contexts for generic and for structured error
1011 * handlers. If we're using an old libxml version, we must thus save the
1012 * generic error context, even though we're using a structured error
1013 * handler.
1014 */
1015 errcxt->saved_errfunc = xmlStructuredError;
1016
1017 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1018 errcxt->saved_errcxt = xmlStructuredErrorContext;
1019 #else
1020 errcxt->saved_errcxt = xmlGenericErrorContext;
1021 #endif
1022
1023 xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1024
1025 /*
1026 * Verify that xmlSetStructuredErrorFunc set the context variable we
1027 * expected it to. If not, the error context pointer we just saved is not
1028 * the correct thing to restore, and since that leaves us without a way to
1029 * restore the context in pg_xml_done, we must fail.
1030 *
1031 * The only known situation in which this test fails is if we compile with
1032 * headers from a libxml2 that doesn't track the structured error context
1033 * separately (< 2.7.4), but at runtime use a version that does, or vice
1034 * versa. The libxml2 authors did not treat that change as constituting
1035 * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1036 * fails to protect us from this.
1037 */
1038
1039 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1040 new_errcxt = xmlStructuredErrorContext;
1041 #else
1042 new_errcxt = xmlGenericErrorContext;
1043 #endif
1044
1045 if (new_errcxt != (void *) errcxt)
1046 ereport(ERROR,
1047 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1048 errmsg("could not set up XML error handler"),
1049 errhint("This probably indicates that the version of libxml2"
1050 " being used is not compatible with the libxml2"
1051 " header files that PostgreSQL was built with.")));
1052
1053 /*
1054 * Also, install an entity loader to prevent unwanted fetches of external
1055 * files and URLs.
1056 */
1057 errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1058 xmlSetExternalEntityLoader(xmlPgEntityLoader);
1059
1060 return errcxt;
1061 }
1062
1063
1064 /*
1065 * pg_xml_done --- restore previous libxml error handling
1066 *
1067 * Resets libxml's global error-handling state to what it was before
1068 * pg_xml_init() was called.
1069 *
1070 * This routine verifies that all pending errors have been dealt with
1071 * (in assert-enabled builds, anyway).
1072 */
1073 void
pg_xml_done(PgXmlErrorContext * errcxt,bool isError)1074 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1075 {
1076 void *cur_errcxt;
1077
1078 /* An assert seems like enough protection here */
1079 Assert(errcxt->magic == ERRCXT_MAGIC);
1080
1081 /*
1082 * In a normal exit, there should be no un-handled libxml errors. But we
1083 * shouldn't try to enforce this during error recovery, since the longjmp
1084 * could have been thrown before xml_ereport had a chance to run.
1085 */
1086 Assert(!errcxt->err_occurred || isError);
1087
1088 /*
1089 * Check that libxml's global state is correct, warn if not. This is a
1090 * real test and not an Assert because it has a higher probability of
1091 * happening.
1092 */
1093 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1094 cur_errcxt = xmlStructuredErrorContext;
1095 #else
1096 cur_errcxt = xmlGenericErrorContext;
1097 #endif
1098
1099 if (cur_errcxt != (void *) errcxt)
1100 elog(WARNING, "libxml error handling state is out of sync with xml.c");
1101
1102 /* Restore the saved handlers */
1103 xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1104 xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1105
1106 /*
1107 * Mark the struct as invalid, just in case somebody somehow manages to
1108 * call xml_errorHandler or xml_ereport with it.
1109 */
1110 errcxt->magic = 0;
1111
1112 /* Release memory */
1113 pfree(errcxt->err_buf.data);
1114 pfree(errcxt);
1115 }
1116
1117
1118 /*
1119 * pg_xml_error_occurred() --- test the error flag
1120 */
1121 bool
pg_xml_error_occurred(PgXmlErrorContext * errcxt)1122 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1123 {
1124 return errcxt->err_occurred;
1125 }
1126
1127
1128 /*
1129 * SQL/XML allows storing "XML documents" or "XML content". "XML
1130 * documents" are specified by the XML specification and are parsed
1131 * easily by libxml. "XML content" is specified by SQL/XML as the
1132 * production "XMLDecl? content". But libxml can only parse the
1133 * "content" part, so we have to parse the XML declaration ourselves
1134 * to complete this.
1135 */
1136
1137 #define CHECK_XML_SPACE(p) \
1138 do { \
1139 if (!xmlIsBlank_ch(*(p))) \
1140 return XML_ERR_SPACE_REQUIRED; \
1141 } while (0)
1142
1143 #define SKIP_XML_SPACE(p) \
1144 while (xmlIsBlank_ch(*(p))) (p)++
1145
1146 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1147 /* Beware of multiple evaluations of argument! */
1148 #define PG_XMLISNAMECHAR(c) \
1149 (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1150 || xmlIsDigit_ch(c) \
1151 || c == '.' || c == '-' || c == '_' || c == ':' \
1152 || xmlIsCombiningQ(c) \
1153 || xmlIsExtender_ch(c))
1154
1155 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1156 static xmlChar *
xml_pnstrdup(const xmlChar * str,size_t len)1157 xml_pnstrdup(const xmlChar *str, size_t len)
1158 {
1159 xmlChar *result;
1160
1161 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1162 memcpy(result, str, len * sizeof(xmlChar));
1163 result[len] = 0;
1164 return result;
1165 }
1166
1167 /* Ditto, except input is char* */
1168 static xmlChar *
pg_xmlCharStrndup(const char * str,size_t len)1169 pg_xmlCharStrndup(const char *str, size_t len)
1170 {
1171 xmlChar *result;
1172
1173 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1174 memcpy(result, str, len);
1175 result[len] = '\0';
1176
1177 return result;
1178 }
1179
1180 /*
1181 * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1182 *
1183 * The input xmlChar is freed regardless of success of the copy.
1184 */
1185 static char *
xml_pstrdup_and_free(xmlChar * str)1186 xml_pstrdup_and_free(xmlChar *str)
1187 {
1188 char *result;
1189
1190 if (str)
1191 {
1192 PG_TRY();
1193 {
1194 result = pstrdup((char *) str);
1195 }
1196 PG_FINALLY();
1197 {
1198 xmlFree(str);
1199 }
1200 PG_END_TRY();
1201 }
1202 else
1203 result = NULL;
1204
1205 return result;
1206 }
1207
1208 /*
1209 * str is the null-terminated input string. Remaining arguments are
1210 * output arguments; each can be NULL if value is not wanted.
1211 * version and encoding are returned as locally-palloc'd strings.
1212 * Result is 0 if OK, an error code if not.
1213 */
1214 static int
parse_xml_decl(const xmlChar * str,size_t * lenp,xmlChar ** version,xmlChar ** encoding,int * standalone)1215 parse_xml_decl(const xmlChar *str, size_t *lenp,
1216 xmlChar **version, xmlChar **encoding, int *standalone)
1217 {
1218 const xmlChar *p;
1219 const xmlChar *save_p;
1220 size_t len;
1221 int utf8char;
1222 int utf8len;
1223
1224 /*
1225 * Only initialize libxml. We don't need error handling here, but we do
1226 * need to make sure libxml is initialized before calling any of its
1227 * functions. Note that this is safe (and a no-op) if caller has already
1228 * done pg_xml_init().
1229 */
1230 pg_xml_init_library();
1231
1232 /* Initialize output arguments to "not present" */
1233 if (version)
1234 *version = NULL;
1235 if (encoding)
1236 *encoding = NULL;
1237 if (standalone)
1238 *standalone = -1;
1239
1240 p = str;
1241
1242 if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1243 goto finished;
1244
1245 /*
1246 * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1247 * rather than an XMLDecl, so we have done what we came to do and found no
1248 * XMLDecl.
1249 *
1250 * We need an input length value for xmlGetUTF8Char, but there's no need
1251 * to count the whole document size, so use strnlen not strlen.
1252 */
1253 utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1254 utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1255 if (PG_XMLISNAMECHAR(utf8char))
1256 goto finished;
1257
1258 p += 5;
1259
1260 /* version */
1261 CHECK_XML_SPACE(p);
1262 SKIP_XML_SPACE(p);
1263 if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1264 return XML_ERR_VERSION_MISSING;
1265 p += 7;
1266 SKIP_XML_SPACE(p);
1267 if (*p != '=')
1268 return XML_ERR_VERSION_MISSING;
1269 p += 1;
1270 SKIP_XML_SPACE(p);
1271
1272 if (*p == '\'' || *p == '"')
1273 {
1274 const xmlChar *q;
1275
1276 q = xmlStrchr(p + 1, *p);
1277 if (!q)
1278 return XML_ERR_VERSION_MISSING;
1279
1280 if (version)
1281 *version = xml_pnstrdup(p + 1, q - p - 1);
1282 p = q + 1;
1283 }
1284 else
1285 return XML_ERR_VERSION_MISSING;
1286
1287 /* encoding */
1288 save_p = p;
1289 SKIP_XML_SPACE(p);
1290 if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1291 {
1292 CHECK_XML_SPACE(save_p);
1293 p += 8;
1294 SKIP_XML_SPACE(p);
1295 if (*p != '=')
1296 return XML_ERR_MISSING_ENCODING;
1297 p += 1;
1298 SKIP_XML_SPACE(p);
1299
1300 if (*p == '\'' || *p == '"')
1301 {
1302 const xmlChar *q;
1303
1304 q = xmlStrchr(p + 1, *p);
1305 if (!q)
1306 return XML_ERR_MISSING_ENCODING;
1307
1308 if (encoding)
1309 *encoding = xml_pnstrdup(p + 1, q - p - 1);
1310 p = q + 1;
1311 }
1312 else
1313 return XML_ERR_MISSING_ENCODING;
1314 }
1315 else
1316 {
1317 p = save_p;
1318 }
1319
1320 /* standalone */
1321 save_p = p;
1322 SKIP_XML_SPACE(p);
1323 if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1324 {
1325 CHECK_XML_SPACE(save_p);
1326 p += 10;
1327 SKIP_XML_SPACE(p);
1328 if (*p != '=')
1329 return XML_ERR_STANDALONE_VALUE;
1330 p += 1;
1331 SKIP_XML_SPACE(p);
1332 if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1333 xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1334 {
1335 if (standalone)
1336 *standalone = 1;
1337 p += 5;
1338 }
1339 else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1340 xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1341 {
1342 if (standalone)
1343 *standalone = 0;
1344 p += 4;
1345 }
1346 else
1347 return XML_ERR_STANDALONE_VALUE;
1348 }
1349 else
1350 {
1351 p = save_p;
1352 }
1353
1354 SKIP_XML_SPACE(p);
1355 if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1356 return XML_ERR_XMLDECL_NOT_FINISHED;
1357 p += 2;
1358
1359 finished:
1360 len = p - str;
1361
1362 for (p = str; p < str + len; p++)
1363 if (*p > 127)
1364 return XML_ERR_INVALID_CHAR;
1365
1366 if (lenp)
1367 *lenp = len;
1368
1369 return XML_ERR_OK;
1370 }
1371
1372
1373 /*
1374 * Write an XML declaration. On output, we adjust the XML declaration
1375 * as follows. (These rules are the moral equivalent of the clause
1376 * "Serialization of an XML value" in the SQL standard.)
1377 *
1378 * We try to avoid generating an XML declaration if possible. This is
1379 * so that you don't get trivial things like xml '<foo/>' resulting in
1380 * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1381 * must provide a declaration if the standalone property is specified
1382 * or if we include an encoding declaration. If we have a
1383 * declaration, we must specify a version (XML requires this).
1384 * Otherwise we only make a declaration if the version is not "1.0",
1385 * which is the default version specified in SQL:2003.
1386 */
1387 static bool
print_xml_decl(StringInfo buf,const xmlChar * version,pg_enc encoding,int standalone)1388 print_xml_decl(StringInfo buf, const xmlChar *version,
1389 pg_enc encoding, int standalone)
1390 {
1391 if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1392 || (encoding && encoding != PG_UTF8)
1393 || standalone != -1)
1394 {
1395 appendStringInfoString(buf, "<?xml");
1396
1397 if (version)
1398 appendStringInfo(buf, " version=\"%s\"", version);
1399 else
1400 appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1401
1402 if (encoding && encoding != PG_UTF8)
1403 {
1404 /*
1405 * XXX might be useful to convert this to IANA names (ISO-8859-1
1406 * instead of LATIN1 etc.); needs field experience
1407 */
1408 appendStringInfo(buf, " encoding=\"%s\"",
1409 pg_encoding_to_char(encoding));
1410 }
1411
1412 if (standalone == 1)
1413 appendStringInfoString(buf, " standalone=\"yes\"");
1414 else if (standalone == 0)
1415 appendStringInfoString(buf, " standalone=\"no\"");
1416 appendStringInfoString(buf, "?>");
1417
1418 return true;
1419 }
1420 else
1421 return false;
1422 }
1423
1424 /*
1425 * Test whether an input that is to be parsed as CONTENT contains a DTD.
1426 *
1427 * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1428 * satisfied by a document with a DTD, which is a bit of a wart, as it means
1429 * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1430 * later fix that, by redefining content with reference to the "more
1431 * permissive" Document Node of the XQuery/XPath Data Model, such that any
1432 * DOCUMENT value is indeed also a CONTENT value. That definition is more
1433 * useful, as CONTENT becomes usable for parsing input of unknown form (think
1434 * pg_restore).
1435 *
1436 * As used below in parse_xml when parsing for CONTENT, libxml does not give
1437 * us the 2006+ behavior, but only the 2003; it will choke if the input has
1438 * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1439 * by detecting this case first and simply doing the parse as DOCUMENT.
1440 *
1441 * A DTD can be found arbitrarily far in, but that would be a contrived case;
1442 * it will ordinarily start within a few dozen characters. The only things
1443 * that can precede it are an XMLDecl (here, the caller will have called
1444 * parse_xml_decl already), whitespace, comments, and processing instructions.
1445 * This function need only return true if it sees a valid sequence of such
1446 * things leading to <!DOCTYPE. It can simply return false in any other
1447 * cases, including malformed input; that will mean the input gets parsed as
1448 * CONTENT as originally planned, with libxml reporting any errors.
1449 *
1450 * This is only to be called from xml_parse, when pg_xml_init has already
1451 * been called. The input is already in UTF8 encoding.
1452 */
1453 static bool
xml_doctype_in_content(const xmlChar * str)1454 xml_doctype_in_content(const xmlChar *str)
1455 {
1456 const xmlChar *p = str;
1457
1458 for (;;)
1459 {
1460 const xmlChar *e;
1461
1462 SKIP_XML_SPACE(p);
1463 if (*p != '<')
1464 return false;
1465 p++;
1466
1467 if (*p == '!')
1468 {
1469 p++;
1470
1471 /* if we see <!DOCTYPE, we can return true */
1472 if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1473 return true;
1474
1475 /* otherwise, if it's not a comment, fail */
1476 if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1477 return false;
1478 /* find end of comment: find -- and a > must follow */
1479 p = xmlStrstr(p + 2, (xmlChar *) "--");
1480 if (!p || p[2] != '>')
1481 return false;
1482 /* advance over comment, and keep scanning */
1483 p += 3;
1484 continue;
1485 }
1486
1487 /* otherwise, if it's not a PI <?target something?>, fail */
1488 if (*p != '?')
1489 return false;
1490 p++;
1491
1492 /* find end of PI (the string ?> is forbidden within a PI) */
1493 e = xmlStrstr(p, (xmlChar *) "?>");
1494 if (!e)
1495 return false;
1496
1497 /* advance over PI, keep scanning */
1498 p = e + 2;
1499 }
1500 }
1501
1502
1503 /*
1504 * Convert a C string to XML internal representation
1505 *
1506 * Note: it is caller's responsibility to xmlFreeDoc() the result,
1507 * else a permanent memory leak will ensue!
1508 *
1509 * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1510 * yet do not use SAX - see xmlreader.c)
1511 */
1512 static xmlDocPtr
xml_parse(text * data,XmlOptionType xmloption_arg,bool preserve_whitespace,int encoding)1513 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1514 int encoding)
1515 {
1516 int32 len;
1517 xmlChar *string;
1518 xmlChar *utf8string;
1519 PgXmlErrorContext *xmlerrcxt;
1520 volatile xmlParserCtxtPtr ctxt = NULL;
1521 volatile xmlDocPtr doc = NULL;
1522
1523 len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1524 string = xml_text2xmlChar(data);
1525
1526 utf8string = pg_do_encoding_conversion(string,
1527 len,
1528 encoding,
1529 PG_UTF8);
1530
1531 /* Start up libxml and its parser */
1532 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1533
1534 /* Use a TRY block to ensure we clean up correctly */
1535 PG_TRY();
1536 {
1537 bool parse_as_document = false;
1538 int res_code;
1539 size_t count = 0;
1540 xmlChar *version = NULL;
1541 int standalone = 0;
1542
1543 xmlInitParser();
1544
1545 ctxt = xmlNewParserCtxt();
1546 if (ctxt == NULL || xmlerrcxt->err_occurred)
1547 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1548 "could not allocate parser context");
1549
1550 /* Decide whether to parse as document or content */
1551 if (xmloption_arg == XMLOPTION_DOCUMENT)
1552 parse_as_document = true;
1553 else
1554 {
1555 /* Parse and skip over the XML declaration, if any */
1556 res_code = parse_xml_decl(utf8string,
1557 &count, &version, NULL, &standalone);
1558 if (res_code != 0)
1559 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1560 "invalid XML content: invalid XML declaration",
1561 res_code);
1562
1563 /* Is there a DOCTYPE element? */
1564 if (xml_doctype_in_content(utf8string + count))
1565 parse_as_document = true;
1566 }
1567
1568 if (parse_as_document)
1569 {
1570 /*
1571 * Note, that here we try to apply DTD defaults
1572 * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1573 * 'Default values defined by internal DTD are applied'. As for
1574 * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1575 * 10.16.7.e)
1576 */
1577 doc = xmlCtxtReadDoc(ctxt, utf8string,
1578 NULL,
1579 "UTF-8",
1580 XML_PARSE_NOENT | XML_PARSE_DTDATTR
1581 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1582 if (doc == NULL || xmlerrcxt->err_occurred)
1583 {
1584 /* Use original option to decide which error code to throw */
1585 if (xmloption_arg == XMLOPTION_DOCUMENT)
1586 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1587 "invalid XML document");
1588 else
1589 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1590 "invalid XML content");
1591 }
1592 }
1593 else
1594 {
1595 doc = xmlNewDoc(version);
1596 Assert(doc->encoding == NULL);
1597 doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1598 doc->standalone = standalone;
1599
1600 /* allow empty content */
1601 if (*(utf8string + count))
1602 {
1603 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1604 utf8string + count, NULL);
1605 if (res_code != 0 || xmlerrcxt->err_occurred)
1606 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1607 "invalid XML content");
1608 }
1609 }
1610 }
1611 PG_CATCH();
1612 {
1613 if (doc != NULL)
1614 xmlFreeDoc(doc);
1615 if (ctxt != NULL)
1616 xmlFreeParserCtxt(ctxt);
1617
1618 pg_xml_done(xmlerrcxt, true);
1619
1620 PG_RE_THROW();
1621 }
1622 PG_END_TRY();
1623
1624 xmlFreeParserCtxt(ctxt);
1625
1626 pg_xml_done(xmlerrcxt, false);
1627
1628 return doc;
1629 }
1630
1631
1632 /*
1633 * xmlChar<->text conversions
1634 */
1635 static xmlChar *
xml_text2xmlChar(text * in)1636 xml_text2xmlChar(text *in)
1637 {
1638 return (xmlChar *) text_to_cstring(in);
1639 }
1640
1641
1642 #ifdef USE_LIBXMLCONTEXT
1643
1644 /*
1645 * Manage the special context used for all libxml allocations (but only
1646 * in special debug builds; see notes at top of file)
1647 */
1648 static void
xml_memory_init(void)1649 xml_memory_init(void)
1650 {
1651 /* Create memory context if not there already */
1652 if (LibxmlContext == NULL)
1653 LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1654 "Libxml context",
1655 ALLOCSET_DEFAULT_SIZES);
1656
1657 /* Re-establish the callbacks even if already set */
1658 xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1659 }
1660
1661 /*
1662 * Wrappers for memory management functions
1663 */
1664 static void *
xml_palloc(size_t size)1665 xml_palloc(size_t size)
1666 {
1667 return MemoryContextAlloc(LibxmlContext, size);
1668 }
1669
1670
1671 static void *
xml_repalloc(void * ptr,size_t size)1672 xml_repalloc(void *ptr, size_t size)
1673 {
1674 return repalloc(ptr, size);
1675 }
1676
1677
1678 static void
xml_pfree(void * ptr)1679 xml_pfree(void *ptr)
1680 {
1681 /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1682 if (ptr)
1683 pfree(ptr);
1684 }
1685
1686
1687 static char *
xml_pstrdup(const char * string)1688 xml_pstrdup(const char *string)
1689 {
1690 return MemoryContextStrdup(LibxmlContext, string);
1691 }
1692 #endif /* USE_LIBXMLCONTEXT */
1693
1694
1695 /*
1696 * xmlPgEntityLoader --- entity loader callback function
1697 *
1698 * Silently prevent any external entity URL from being loaded. We don't want
1699 * to throw an error, so instead make the entity appear to expand to an empty
1700 * string.
1701 *
1702 * We would prefer to allow loading entities that exist in the system's
1703 * global XML catalog; but the available libxml2 APIs make that a complex
1704 * and fragile task. For now, just shut down all external access.
1705 */
1706 static xmlParserInputPtr
xmlPgEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)1707 xmlPgEntityLoader(const char *URL, const char *ID,
1708 xmlParserCtxtPtr ctxt)
1709 {
1710 return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1711 }
1712
1713
1714 /*
1715 * xml_ereport --- report an XML-related error
1716 *
1717 * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1718 * standard. This function adds libxml's native error message, if any, as
1719 * detail.
1720 *
1721 * This is exported for modules that want to share the core libxml error
1722 * handler. Note that pg_xml_init() *must* have been called previously.
1723 */
1724 void
xml_ereport(PgXmlErrorContext * errcxt,int level,int sqlcode,const char * msg)1725 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1726 {
1727 char *detail;
1728
1729 /* Defend against someone passing us a bogus context struct */
1730 if (errcxt->magic != ERRCXT_MAGIC)
1731 elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1732
1733 /* Flag that the current libxml error has been reported */
1734 errcxt->err_occurred = false;
1735
1736 /* Include detail only if we have some text from libxml */
1737 if (errcxt->err_buf.len > 0)
1738 detail = errcxt->err_buf.data;
1739 else
1740 detail = NULL;
1741
1742 ereport(level,
1743 (errcode(sqlcode),
1744 errmsg_internal("%s", msg),
1745 detail ? errdetail_internal("%s", detail) : 0));
1746 }
1747
1748
1749 /*
1750 * Error handler for libxml errors and warnings
1751 */
1752 static void
xml_errorHandler(void * data,xmlErrorPtr error)1753 xml_errorHandler(void *data, xmlErrorPtr error)
1754 {
1755 PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1756 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1757 xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1758 xmlNodePtr node = error->node;
1759 const xmlChar *name = (node != NULL &&
1760 node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1761 int domain = error->domain;
1762 int level = error->level;
1763 StringInfo errorBuf;
1764
1765 /*
1766 * Defend against someone passing us a bogus context struct.
1767 *
1768 * We force a backend exit if this check fails because longjmp'ing out of
1769 * libxml would likely render it unsafe to use further.
1770 */
1771 if (xmlerrcxt->magic != ERRCXT_MAGIC)
1772 elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1773
1774 /*----------
1775 * Older libxml versions report some errors differently.
1776 * First, some errors were previously reported as coming from the parser
1777 * domain but are now reported as coming from the namespace domain.
1778 * Second, some warnings were upgraded to errors.
1779 * We attempt to compensate for that here.
1780 *----------
1781 */
1782 switch (error->code)
1783 {
1784 case XML_WAR_NS_URI:
1785 level = XML_ERR_ERROR;
1786 domain = XML_FROM_NAMESPACE;
1787 break;
1788
1789 case XML_ERR_NS_DECL_ERROR:
1790 case XML_WAR_NS_URI_RELATIVE:
1791 case XML_WAR_NS_COLUMN:
1792 case XML_NS_ERR_XML_NAMESPACE:
1793 case XML_NS_ERR_UNDEFINED_NAMESPACE:
1794 case XML_NS_ERR_QNAME:
1795 case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1796 case XML_NS_ERR_EMPTY:
1797 domain = XML_FROM_NAMESPACE;
1798 break;
1799 }
1800
1801 /* Decide whether to act on the error or not */
1802 switch (domain)
1803 {
1804 case XML_FROM_PARSER:
1805 case XML_FROM_NONE:
1806 case XML_FROM_MEMORY:
1807 case XML_FROM_IO:
1808
1809 /*
1810 * Suppress warnings about undeclared entities. We need to do
1811 * this to avoid problems due to not loading DTD definitions.
1812 */
1813 if (error->code == XML_WAR_UNDECLARED_ENTITY)
1814 return;
1815
1816 /* Otherwise, accept error regardless of the parsing purpose */
1817 break;
1818
1819 default:
1820 /* Ignore error if only doing well-formedness check */
1821 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1822 return;
1823 break;
1824 }
1825
1826 /* Prepare error message in errorBuf */
1827 errorBuf = makeStringInfo();
1828
1829 if (error->line > 0)
1830 appendStringInfo(errorBuf, "line %d: ", error->line);
1831 if (name != NULL)
1832 appendStringInfo(errorBuf, "element %s: ", name);
1833 if (error->message != NULL)
1834 appendStringInfoString(errorBuf, error->message);
1835 else
1836 appendStringInfoString(errorBuf, "(no message provided)");
1837
1838 /*
1839 * Append context information to errorBuf.
1840 *
1841 * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1842 * write the context. Since we don't want to duplicate libxml
1843 * functionality here, we set up a generic error handler temporarily.
1844 *
1845 * We use appendStringInfo() directly as libxml's generic error handler.
1846 * This should work because it has essentially the same signature as
1847 * libxml expects, namely (void *ptr, const char *msg, ...).
1848 */
1849 if (input != NULL)
1850 {
1851 xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1852 void *errCtxSaved = xmlGenericErrorContext;
1853
1854 xmlSetGenericErrorFunc((void *) errorBuf,
1855 (xmlGenericErrorFunc) appendStringInfo);
1856
1857 /* Add context information to errorBuf */
1858 appendStringInfoLineSeparator(errorBuf);
1859
1860 xmlParserPrintFileContext(input);
1861
1862 /* Restore generic error func */
1863 xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1864 }
1865
1866 /* Get rid of any trailing newlines in errorBuf */
1867 chopStringInfoNewlines(errorBuf);
1868
1869 /*
1870 * Legacy error handling mode. err_occurred is never set, we just add the
1871 * message to err_buf. This mode exists because the xml2 contrib module
1872 * uses our error-handling infrastructure, but we don't want to change its
1873 * behaviour since it's deprecated anyway. This is also why we don't
1874 * distinguish between notices, warnings and errors here --- the old-style
1875 * generic error handler wouldn't have done that either.
1876 */
1877 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1878 {
1879 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1880 appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1881 errorBuf->len);
1882
1883 pfree(errorBuf->data);
1884 pfree(errorBuf);
1885 return;
1886 }
1887
1888 /*
1889 * We don't want to ereport() here because that'd probably leave libxml in
1890 * an inconsistent state. Instead, we remember the error and ereport()
1891 * from xml_ereport().
1892 *
1893 * Warnings and notices can be reported immediately since they won't cause
1894 * a longjmp() out of libxml.
1895 */
1896 if (level >= XML_ERR_ERROR)
1897 {
1898 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1899 appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1900 errorBuf->len);
1901
1902 xmlerrcxt->err_occurred = true;
1903 }
1904 else if (level >= XML_ERR_WARNING)
1905 {
1906 ereport(WARNING,
1907 (errmsg_internal("%s", errorBuf->data)));
1908 }
1909 else
1910 {
1911 ereport(NOTICE,
1912 (errmsg_internal("%s", errorBuf->data)));
1913 }
1914
1915 pfree(errorBuf->data);
1916 pfree(errorBuf);
1917 }
1918
1919
1920 /*
1921 * Wrapper for "ereport" function for XML-related errors. The "msg"
1922 * is the SQL-level message; some can be adopted from the SQL/XML
1923 * standard. This function uses "code" to create a textual detail
1924 * message. At the moment, we only need to cover those codes that we
1925 * may raise in this file.
1926 */
1927 static void
xml_ereport_by_code(int level,int sqlcode,const char * msg,int code)1928 xml_ereport_by_code(int level, int sqlcode,
1929 const char *msg, int code)
1930 {
1931 const char *det;
1932
1933 switch (code)
1934 {
1935 case XML_ERR_INVALID_CHAR:
1936 det = gettext_noop("Invalid character value.");
1937 break;
1938 case XML_ERR_SPACE_REQUIRED:
1939 det = gettext_noop("Space required.");
1940 break;
1941 case XML_ERR_STANDALONE_VALUE:
1942 det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1943 break;
1944 case XML_ERR_VERSION_MISSING:
1945 det = gettext_noop("Malformed declaration: missing version.");
1946 break;
1947 case XML_ERR_MISSING_ENCODING:
1948 det = gettext_noop("Missing encoding in text declaration.");
1949 break;
1950 case XML_ERR_XMLDECL_NOT_FINISHED:
1951 det = gettext_noop("Parsing XML declaration: '?>' expected.");
1952 break;
1953 default:
1954 det = gettext_noop("Unrecognized libxml error code: %d.");
1955 break;
1956 }
1957
1958 ereport(level,
1959 (errcode(sqlcode),
1960 errmsg_internal("%s", msg),
1961 errdetail(det, code)));
1962 }
1963
1964
1965 /*
1966 * Remove all trailing newlines from a StringInfo string
1967 */
1968 static void
chopStringInfoNewlines(StringInfo str)1969 chopStringInfoNewlines(StringInfo str)
1970 {
1971 while (str->len > 0 && str->data[str->len - 1] == '\n')
1972 str->data[--str->len] = '\0';
1973 }
1974
1975
1976 /*
1977 * Append a newline after removing any existing trailing newlines
1978 */
1979 static void
appendStringInfoLineSeparator(StringInfo str)1980 appendStringInfoLineSeparator(StringInfo str)
1981 {
1982 chopStringInfoNewlines(str);
1983 if (str->len > 0)
1984 appendStringInfoChar(str, '\n');
1985 }
1986
1987
1988 /*
1989 * Convert one char in the current server encoding to a Unicode codepoint.
1990 */
1991 static pg_wchar
sqlchar_to_unicode(const char * s)1992 sqlchar_to_unicode(const char *s)
1993 {
1994 char *utf8string;
1995 pg_wchar ret[2]; /* need space for trailing zero */
1996
1997 /* note we're not assuming s is null-terminated */
1998 utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1999
2000 pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2001 pg_encoding_mblen(PG_UTF8, utf8string));
2002
2003 if (utf8string != s)
2004 pfree(utf8string);
2005
2006 return ret[0];
2007 }
2008
2009
2010 static bool
is_valid_xml_namefirst(pg_wchar c)2011 is_valid_xml_namefirst(pg_wchar c)
2012 {
2013 /* (Letter | '_' | ':') */
2014 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2015 || c == '_' || c == ':');
2016 }
2017
2018
2019 static bool
is_valid_xml_namechar(pg_wchar c)2020 is_valid_xml_namechar(pg_wchar c)
2021 {
2022 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2023 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2024 || xmlIsDigitQ(c)
2025 || c == '.' || c == '-' || c == '_' || c == ':'
2026 || xmlIsCombiningQ(c)
2027 || xmlIsExtenderQ(c));
2028 }
2029 #endif /* USE_LIBXML */
2030
2031
2032 /*
2033 * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2034 */
2035 char *
map_sql_identifier_to_xml_name(const char * ident,bool fully_escaped,bool escape_period)2036 map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2037 bool escape_period)
2038 {
2039 #ifdef USE_LIBXML
2040 StringInfoData buf;
2041 const char *p;
2042
2043 /*
2044 * SQL/XML doesn't make use of this case anywhere, so it's probably a
2045 * mistake.
2046 */
2047 Assert(fully_escaped || !escape_period);
2048
2049 initStringInfo(&buf);
2050
2051 for (p = ident; *p; p += pg_mblen(p))
2052 {
2053 if (*p == ':' && (p == ident || fully_escaped))
2054 appendStringInfoString(&buf, "_x003A_");
2055 else if (*p == '_' && *(p + 1) == 'x')
2056 appendStringInfoString(&buf, "_x005F_");
2057 else if (fully_escaped && p == ident &&
2058 pg_strncasecmp(p, "xml", 3) == 0)
2059 {
2060 if (*p == 'x')
2061 appendStringInfoString(&buf, "_x0078_");
2062 else
2063 appendStringInfoString(&buf, "_x0058_");
2064 }
2065 else if (escape_period && *p == '.')
2066 appendStringInfoString(&buf, "_x002E_");
2067 else
2068 {
2069 pg_wchar u = sqlchar_to_unicode(p);
2070
2071 if ((p == ident)
2072 ? !is_valid_xml_namefirst(u)
2073 : !is_valid_xml_namechar(u))
2074 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2075 else
2076 appendBinaryStringInfo(&buf, p, pg_mblen(p));
2077 }
2078 }
2079
2080 return buf.data;
2081 #else /* not USE_LIBXML */
2082 NO_XML_SUPPORT();
2083 return NULL;
2084 #endif /* not USE_LIBXML */
2085 }
2086
2087
2088 /*
2089 * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2090 */
2091 char *
map_xml_name_to_sql_identifier(const char * name)2092 map_xml_name_to_sql_identifier(const char *name)
2093 {
2094 StringInfoData buf;
2095 const char *p;
2096
2097 initStringInfo(&buf);
2098
2099 for (p = name; *p; p += pg_mblen(p))
2100 {
2101 if (*p == '_' && *(p + 1) == 'x'
2102 && isxdigit((unsigned char) *(p + 2))
2103 && isxdigit((unsigned char) *(p + 3))
2104 && isxdigit((unsigned char) *(p + 4))
2105 && isxdigit((unsigned char) *(p + 5))
2106 && *(p + 6) == '_')
2107 {
2108 char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2109 unsigned int u;
2110
2111 sscanf(p + 2, "%X", &u);
2112 pg_unicode_to_server(u, (unsigned char *) cbuf);
2113 appendStringInfoString(&buf, cbuf);
2114 p += 6;
2115 }
2116 else
2117 appendBinaryStringInfo(&buf, p, pg_mblen(p));
2118 }
2119
2120 return buf.data;
2121 }
2122
2123 /*
2124 * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2125 *
2126 * When xml_escape_strings is true, then certain characters in string
2127 * values are replaced by entity references (< etc.), as specified
2128 * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2129 * wanted. The false case is mainly useful when the resulting value
2130 * is used with xmlTextWriterWriteAttribute() to write out an
2131 * attribute, because that function does the escaping itself.
2132 */
2133 char *
map_sql_value_to_xml_value(Datum value,Oid type,bool xml_escape_strings)2134 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2135 {
2136 if (type_is_array_domain(type))
2137 {
2138 ArrayType *array;
2139 Oid elmtype;
2140 int16 elmlen;
2141 bool elmbyval;
2142 char elmalign;
2143 int num_elems;
2144 Datum *elem_values;
2145 bool *elem_nulls;
2146 StringInfoData buf;
2147 int i;
2148
2149 array = DatumGetArrayTypeP(value);
2150 elmtype = ARR_ELEMTYPE(array);
2151 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2152
2153 deconstruct_array(array, elmtype,
2154 elmlen, elmbyval, elmalign,
2155 &elem_values, &elem_nulls,
2156 &num_elems);
2157
2158 initStringInfo(&buf);
2159
2160 for (i = 0; i < num_elems; i++)
2161 {
2162 if (elem_nulls[i])
2163 continue;
2164 appendStringInfoString(&buf, "<element>");
2165 appendStringInfoString(&buf,
2166 map_sql_value_to_xml_value(elem_values[i],
2167 elmtype, true));
2168 appendStringInfoString(&buf, "</element>");
2169 }
2170
2171 pfree(elem_values);
2172 pfree(elem_nulls);
2173
2174 return buf.data;
2175 }
2176 else
2177 {
2178 Oid typeOut;
2179 bool isvarlena;
2180 char *str;
2181
2182 /*
2183 * Flatten domains; the special-case treatments below should apply to,
2184 * eg, domains over boolean not just boolean.
2185 */
2186 type = getBaseType(type);
2187
2188 /*
2189 * Special XSD formatting for some data types
2190 */
2191 switch (type)
2192 {
2193 case BOOLOID:
2194 if (DatumGetBool(value))
2195 return "true";
2196 else
2197 return "false";
2198
2199 case DATEOID:
2200 {
2201 DateADT date;
2202 struct pg_tm tm;
2203 char buf[MAXDATELEN + 1];
2204
2205 date = DatumGetDateADT(value);
2206 /* XSD doesn't support infinite values */
2207 if (DATE_NOT_FINITE(date))
2208 ereport(ERROR,
2209 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2210 errmsg("date out of range"),
2211 errdetail("XML does not support infinite date values.")));
2212 j2date(date + POSTGRES_EPOCH_JDATE,
2213 &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2214 EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2215
2216 return pstrdup(buf);
2217 }
2218
2219 case TIMESTAMPOID:
2220 {
2221 Timestamp timestamp;
2222 struct pg_tm tm;
2223 fsec_t fsec;
2224 char buf[MAXDATELEN + 1];
2225
2226 timestamp = DatumGetTimestamp(value);
2227
2228 /* XSD doesn't support infinite values */
2229 if (TIMESTAMP_NOT_FINITE(timestamp))
2230 ereport(ERROR,
2231 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2232 errmsg("timestamp out of range"),
2233 errdetail("XML does not support infinite timestamp values.")));
2234 else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2235 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2236 else
2237 ereport(ERROR,
2238 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2239 errmsg("timestamp out of range")));
2240
2241 return pstrdup(buf);
2242 }
2243
2244 case TIMESTAMPTZOID:
2245 {
2246 TimestampTz timestamp;
2247 struct pg_tm tm;
2248 int tz;
2249 fsec_t fsec;
2250 const char *tzn = NULL;
2251 char buf[MAXDATELEN + 1];
2252
2253 timestamp = DatumGetTimestamp(value);
2254
2255 /* XSD doesn't support infinite values */
2256 if (TIMESTAMP_NOT_FINITE(timestamp))
2257 ereport(ERROR,
2258 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2259 errmsg("timestamp out of range"),
2260 errdetail("XML does not support infinite timestamp values.")));
2261 else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2262 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2263 else
2264 ereport(ERROR,
2265 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2266 errmsg("timestamp out of range")));
2267
2268 return pstrdup(buf);
2269 }
2270
2271 #ifdef USE_LIBXML
2272 case BYTEAOID:
2273 {
2274 bytea *bstr = DatumGetByteaPP(value);
2275 PgXmlErrorContext *xmlerrcxt;
2276 volatile xmlBufferPtr buf = NULL;
2277 volatile xmlTextWriterPtr writer = NULL;
2278 char *result;
2279
2280 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2281
2282 PG_TRY();
2283 {
2284 buf = xmlBufferCreate();
2285 if (buf == NULL || xmlerrcxt->err_occurred)
2286 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2287 "could not allocate xmlBuffer");
2288 writer = xmlNewTextWriterMemory(buf, 0);
2289 if (writer == NULL || xmlerrcxt->err_occurred)
2290 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2291 "could not allocate xmlTextWriter");
2292
2293 if (xmlbinary == XMLBINARY_BASE64)
2294 xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2295 0, VARSIZE_ANY_EXHDR(bstr));
2296 else
2297 xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2298 0, VARSIZE_ANY_EXHDR(bstr));
2299
2300 /* we MUST do this now to flush data out to the buffer */
2301 xmlFreeTextWriter(writer);
2302 writer = NULL;
2303
2304 result = pstrdup((const char *) xmlBufferContent(buf));
2305 }
2306 PG_CATCH();
2307 {
2308 if (writer)
2309 xmlFreeTextWriter(writer);
2310 if (buf)
2311 xmlBufferFree(buf);
2312
2313 pg_xml_done(xmlerrcxt, true);
2314
2315 PG_RE_THROW();
2316 }
2317 PG_END_TRY();
2318
2319 xmlBufferFree(buf);
2320
2321 pg_xml_done(xmlerrcxt, false);
2322
2323 return result;
2324 }
2325 #endif /* USE_LIBXML */
2326
2327 }
2328
2329 /*
2330 * otherwise, just use the type's native text representation
2331 */
2332 getTypeOutputInfo(type, &typeOut, &isvarlena);
2333 str = OidOutputFunctionCall(typeOut, value);
2334
2335 /* ... exactly as-is for XML, and when escaping is not wanted */
2336 if (type == XMLOID || !xml_escape_strings)
2337 return str;
2338
2339 /* otherwise, translate special characters as needed */
2340 return escape_xml(str);
2341 }
2342 }
2343
2344
2345 /*
2346 * Escape characters in text that have special meanings in XML.
2347 *
2348 * Returns a palloc'd string.
2349 *
2350 * NB: this is intentionally not dependent on libxml.
2351 */
2352 char *
escape_xml(const char * str)2353 escape_xml(const char *str)
2354 {
2355 StringInfoData buf;
2356 const char *p;
2357
2358 initStringInfo(&buf);
2359 for (p = str; *p; p++)
2360 {
2361 switch (*p)
2362 {
2363 case '&':
2364 appendStringInfoString(&buf, "&");
2365 break;
2366 case '<':
2367 appendStringInfoString(&buf, "<");
2368 break;
2369 case '>':
2370 appendStringInfoString(&buf, ">");
2371 break;
2372 case '\r':
2373 appendStringInfoString(&buf, "
");
2374 break;
2375 default:
2376 appendStringInfoCharMacro(&buf, *p);
2377 break;
2378 }
2379 }
2380 return buf.data;
2381 }
2382
2383
2384 static char *
_SPI_strdup(const char * s)2385 _SPI_strdup(const char *s)
2386 {
2387 size_t len = strlen(s) + 1;
2388 char *ret = SPI_palloc(len);
2389
2390 memcpy(ret, s, len);
2391 return ret;
2392 }
2393
2394
2395 /*
2396 * SQL to XML mapping functions
2397 *
2398 * What follows below was at one point intentionally organized so that
2399 * you can read along in the SQL/XML standard. The functions are
2400 * mostly split up the way the clauses lay out in the standards
2401 * document, and the identifiers are also aligned with the standard
2402 * text. Unfortunately, SQL/XML:2006 reordered the clauses
2403 * differently than SQL/XML:2003, so the order below doesn't make much
2404 * sense anymore.
2405 *
2406 * There are many things going on there:
2407 *
2408 * There are two kinds of mappings: Mapping SQL data (table contents)
2409 * to XML documents, and mapping SQL structure (the "schema") to XML
2410 * Schema. And there are functions that do both at the same time.
2411 *
2412 * Then you can map a database, a schema, or a table, each in both
2413 * ways. This breaks down recursively: Mapping a database invokes
2414 * mapping schemas, which invokes mapping tables, which invokes
2415 * mapping rows, which invokes mapping columns, although you can't
2416 * call the last two from the outside. Because of this, there are a
2417 * number of xyz_internal() functions which are to be called both from
2418 * the function manager wrapper and from some upper layer in a
2419 * recursive call.
2420 *
2421 * See the documentation about what the common function arguments
2422 * nulls, tableforest, and targetns mean.
2423 *
2424 * Some style guidelines for XML output: Use double quotes for quoting
2425 * XML attributes. Indent XML elements by two spaces, but remember
2426 * that a lot of code is called recursively at different levels, so
2427 * it's better not to indent rather than create output that indents
2428 * and outdents weirdly. Add newlines to make the output look nice.
2429 */
2430
2431
2432 /*
2433 * Visibility of objects for XML mappings; see SQL/XML:2008 section
2434 * 4.10.8.
2435 */
2436
2437 /*
2438 * Given a query, which must return type oid as first column, produce
2439 * a list of Oids with the query results.
2440 */
2441 static List *
query_to_oid_list(const char * query)2442 query_to_oid_list(const char *query)
2443 {
2444 uint64 i;
2445 List *list = NIL;
2446 int spi_result;
2447
2448 spi_result = SPI_execute(query, true, 0);
2449 if (spi_result != SPI_OK_SELECT)
2450 elog(ERROR, "SPI_execute returned %s for %s",
2451 SPI_result_code_string(spi_result), query);
2452
2453 for (i = 0; i < SPI_processed; i++)
2454 {
2455 Datum oid;
2456 bool isnull;
2457
2458 oid = SPI_getbinval(SPI_tuptable->vals[i],
2459 SPI_tuptable->tupdesc,
2460 1,
2461 &isnull);
2462 if (!isnull)
2463 list = lappend_oid(list, DatumGetObjectId(oid));
2464 }
2465
2466 return list;
2467 }
2468
2469
2470 static List *
schema_get_xml_visible_tables(Oid nspid)2471 schema_get_xml_visible_tables(Oid nspid)
2472 {
2473 StringInfoData query;
2474
2475 initStringInfo(&query);
2476 appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2477 " WHERE relnamespace = %u AND relkind IN ("
2478 CppAsString2(RELKIND_RELATION) ","
2479 CppAsString2(RELKIND_MATVIEW) ","
2480 CppAsString2(RELKIND_VIEW) ")"
2481 " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2482 " ORDER BY relname;", nspid);
2483
2484 return query_to_oid_list(query.data);
2485 }
2486
2487
2488 /*
2489 * Including the system schemas is probably not useful for a database
2490 * mapping.
2491 */
2492 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2493
2494 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2495
2496
2497 static List *
database_get_xml_visible_schemas(void)2498 database_get_xml_visible_schemas(void)
2499 {
2500 return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2501 }
2502
2503
2504 static List *
database_get_xml_visible_tables(void)2505 database_get_xml_visible_tables(void)
2506 {
2507 /* At the moment there is no order required here. */
2508 return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2509 " WHERE relkind IN ("
2510 CppAsString2(RELKIND_RELATION) ","
2511 CppAsString2(RELKIND_MATVIEW) ","
2512 CppAsString2(RELKIND_VIEW) ")"
2513 " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2514 " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2515 }
2516
2517
2518 /*
2519 * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2520 * section 9.11.
2521 */
2522
2523 static StringInfo
table_to_xml_internal(Oid relid,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2524 table_to_xml_internal(Oid relid,
2525 const char *xmlschema, bool nulls, bool tableforest,
2526 const char *targetns, bool top_level)
2527 {
2528 StringInfoData query;
2529
2530 initStringInfo(&query);
2531 appendStringInfo(&query, "SELECT * FROM %s",
2532 DatumGetCString(DirectFunctionCall1(regclassout,
2533 ObjectIdGetDatum(relid))));
2534 return query_to_xml_internal(query.data, get_rel_name(relid),
2535 xmlschema, nulls, tableforest,
2536 targetns, top_level);
2537 }
2538
2539
2540 Datum
table_to_xml(PG_FUNCTION_ARGS)2541 table_to_xml(PG_FUNCTION_ARGS)
2542 {
2543 Oid relid = PG_GETARG_OID(0);
2544 bool nulls = PG_GETARG_BOOL(1);
2545 bool tableforest = PG_GETARG_BOOL(2);
2546 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2547
2548 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2549 nulls, tableforest,
2550 targetns, true)));
2551 }
2552
2553
2554 Datum
query_to_xml(PG_FUNCTION_ARGS)2555 query_to_xml(PG_FUNCTION_ARGS)
2556 {
2557 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2558 bool nulls = PG_GETARG_BOOL(1);
2559 bool tableforest = PG_GETARG_BOOL(2);
2560 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2561
2562 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2563 NULL, nulls, tableforest,
2564 targetns, true)));
2565 }
2566
2567
2568 Datum
cursor_to_xml(PG_FUNCTION_ARGS)2569 cursor_to_xml(PG_FUNCTION_ARGS)
2570 {
2571 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2572 int32 count = PG_GETARG_INT32(1);
2573 bool nulls = PG_GETARG_BOOL(2);
2574 bool tableforest = PG_GETARG_BOOL(3);
2575 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2576
2577 StringInfoData result;
2578 Portal portal;
2579 uint64 i;
2580
2581 initStringInfo(&result);
2582
2583 if (!tableforest)
2584 {
2585 xmldata_root_element_start(&result, "table", NULL, targetns, true);
2586 appendStringInfoChar(&result, '\n');
2587 }
2588
2589 SPI_connect();
2590 portal = SPI_cursor_find(name);
2591 if (portal == NULL)
2592 ereport(ERROR,
2593 (errcode(ERRCODE_UNDEFINED_CURSOR),
2594 errmsg("cursor \"%s\" does not exist", name)));
2595
2596 SPI_cursor_fetch(portal, true, count);
2597 for (i = 0; i < SPI_processed; i++)
2598 SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2599 tableforest, targetns, true);
2600
2601 SPI_finish();
2602
2603 if (!tableforest)
2604 xmldata_root_element_end(&result, "table");
2605
2606 PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2607 }
2608
2609
2610 /*
2611 * Write the start tag of the root element of a data mapping.
2612 *
2613 * top_level means that this is the very top level of the eventual
2614 * output. For example, when the user calls table_to_xml, then a call
2615 * with a table name to this function is the top level. When the user
2616 * calls database_to_xml, then a call with a schema name to this
2617 * function is not the top level. If top_level is false, then the XML
2618 * namespace declarations are omitted, because they supposedly already
2619 * appeared earlier in the output. Repeating them is not wrong, but
2620 * it looks ugly.
2621 */
2622 static void
xmldata_root_element_start(StringInfo result,const char * eltname,const char * xmlschema,const char * targetns,bool top_level)2623 xmldata_root_element_start(StringInfo result, const char *eltname,
2624 const char *xmlschema, const char *targetns,
2625 bool top_level)
2626 {
2627 /* This isn't really wrong but currently makes no sense. */
2628 Assert(top_level || !xmlschema);
2629
2630 appendStringInfo(result, "<%s", eltname);
2631 if (top_level)
2632 {
2633 appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2634 if (strlen(targetns) > 0)
2635 appendStringInfo(result, " xmlns=\"%s\"", targetns);
2636 }
2637 if (xmlschema)
2638 {
2639 /* FIXME: better targets */
2640 if (strlen(targetns) > 0)
2641 appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2642 else
2643 appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2644 }
2645 appendStringInfoString(result, ">\n");
2646 }
2647
2648
2649 static void
xmldata_root_element_end(StringInfo result,const char * eltname)2650 xmldata_root_element_end(StringInfo result, const char *eltname)
2651 {
2652 appendStringInfo(result, "</%s>\n", eltname);
2653 }
2654
2655
2656 static StringInfo
query_to_xml_internal(const char * query,char * tablename,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2657 query_to_xml_internal(const char *query, char *tablename,
2658 const char *xmlschema, bool nulls, bool tableforest,
2659 const char *targetns, bool top_level)
2660 {
2661 StringInfo result;
2662 char *xmltn;
2663 uint64 i;
2664
2665 if (tablename)
2666 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2667 else
2668 xmltn = "table";
2669
2670 result = makeStringInfo();
2671
2672 SPI_connect();
2673 if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2674 ereport(ERROR,
2675 (errcode(ERRCODE_DATA_EXCEPTION),
2676 errmsg("invalid query")));
2677
2678 if (!tableforest)
2679 {
2680 xmldata_root_element_start(result, xmltn, xmlschema,
2681 targetns, top_level);
2682 appendStringInfoChar(result, '\n');
2683 }
2684
2685 if (xmlschema)
2686 appendStringInfo(result, "%s\n\n", xmlschema);
2687
2688 for (i = 0; i < SPI_processed; i++)
2689 SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2690 tableforest, targetns, top_level);
2691
2692 if (!tableforest)
2693 xmldata_root_element_end(result, xmltn);
2694
2695 SPI_finish();
2696
2697 return result;
2698 }
2699
2700
2701 Datum
table_to_xmlschema(PG_FUNCTION_ARGS)2702 table_to_xmlschema(PG_FUNCTION_ARGS)
2703 {
2704 Oid relid = PG_GETARG_OID(0);
2705 bool nulls = PG_GETARG_BOOL(1);
2706 bool tableforest = PG_GETARG_BOOL(2);
2707 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2708 const char *result;
2709 Relation rel;
2710
2711 rel = table_open(relid, AccessShareLock);
2712 result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2713 tableforest, targetns);
2714 table_close(rel, NoLock);
2715
2716 PG_RETURN_XML_P(cstring_to_xmltype(result));
2717 }
2718
2719
2720 Datum
query_to_xmlschema(PG_FUNCTION_ARGS)2721 query_to_xmlschema(PG_FUNCTION_ARGS)
2722 {
2723 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2724 bool nulls = PG_GETARG_BOOL(1);
2725 bool tableforest = PG_GETARG_BOOL(2);
2726 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2727 const char *result;
2728 SPIPlanPtr plan;
2729 Portal portal;
2730
2731 SPI_connect();
2732
2733 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2734 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2735
2736 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2737 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2738
2739 result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2740 InvalidOid, nulls,
2741 tableforest, targetns));
2742 SPI_cursor_close(portal);
2743 SPI_finish();
2744
2745 PG_RETURN_XML_P(cstring_to_xmltype(result));
2746 }
2747
2748
2749 Datum
cursor_to_xmlschema(PG_FUNCTION_ARGS)2750 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2751 {
2752 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2753 bool nulls = PG_GETARG_BOOL(1);
2754 bool tableforest = PG_GETARG_BOOL(2);
2755 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2756 const char *xmlschema;
2757 Portal portal;
2758
2759 SPI_connect();
2760 portal = SPI_cursor_find(name);
2761 if (portal == NULL)
2762 ereport(ERROR,
2763 (errcode(ERRCODE_UNDEFINED_CURSOR),
2764 errmsg("cursor \"%s\" does not exist", name)));
2765
2766 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2767 InvalidOid, nulls,
2768 tableforest, targetns));
2769 SPI_finish();
2770
2771 PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2772 }
2773
2774
2775 Datum
table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2776 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2777 {
2778 Oid relid = PG_GETARG_OID(0);
2779 bool nulls = PG_GETARG_BOOL(1);
2780 bool tableforest = PG_GETARG_BOOL(2);
2781 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2782 Relation rel;
2783 const char *xmlschema;
2784
2785 rel = table_open(relid, AccessShareLock);
2786 xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2787 tableforest, targetns);
2788 table_close(rel, NoLock);
2789
2790 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2791 xmlschema, nulls, tableforest,
2792 targetns, true)));
2793 }
2794
2795
2796 Datum
query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2797 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2798 {
2799 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2800 bool nulls = PG_GETARG_BOOL(1);
2801 bool tableforest = PG_GETARG_BOOL(2);
2802 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2803
2804 const char *xmlschema;
2805 SPIPlanPtr plan;
2806 Portal portal;
2807
2808 SPI_connect();
2809
2810 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2811 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2812
2813 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2814 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2815
2816 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2817 InvalidOid, nulls, tableforest, targetns));
2818 SPI_cursor_close(portal);
2819 SPI_finish();
2820
2821 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2822 xmlschema, nulls, tableforest,
2823 targetns, true)));
2824 }
2825
2826
2827 /*
2828 * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2829 * sections 9.13, 9.14.
2830 */
2831
2832 static StringInfo
schema_to_xml_internal(Oid nspid,const char * xmlschema,bool nulls,bool tableforest,const char * targetns,bool top_level)2833 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2834 bool tableforest, const char *targetns, bool top_level)
2835 {
2836 StringInfo result;
2837 char *xmlsn;
2838 List *relid_list;
2839 ListCell *cell;
2840
2841 xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2842 true, false);
2843 result = makeStringInfo();
2844
2845 xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2846 appendStringInfoChar(result, '\n');
2847
2848 if (xmlschema)
2849 appendStringInfo(result, "%s\n\n", xmlschema);
2850
2851 SPI_connect();
2852
2853 relid_list = schema_get_xml_visible_tables(nspid);
2854
2855 foreach(cell, relid_list)
2856 {
2857 Oid relid = lfirst_oid(cell);
2858 StringInfo subres;
2859
2860 subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2861 targetns, false);
2862
2863 appendBinaryStringInfo(result, subres->data, subres->len);
2864 appendStringInfoChar(result, '\n');
2865 }
2866
2867 SPI_finish();
2868
2869 xmldata_root_element_end(result, xmlsn);
2870
2871 return result;
2872 }
2873
2874
2875 Datum
schema_to_xml(PG_FUNCTION_ARGS)2876 schema_to_xml(PG_FUNCTION_ARGS)
2877 {
2878 Name name = PG_GETARG_NAME(0);
2879 bool nulls = PG_GETARG_BOOL(1);
2880 bool tableforest = PG_GETARG_BOOL(2);
2881 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2882
2883 char *schemaname;
2884 Oid nspid;
2885
2886 schemaname = NameStr(*name);
2887 nspid = LookupExplicitNamespace(schemaname, false);
2888
2889 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2890 nulls, tableforest, targetns, true)));
2891 }
2892
2893
2894 /*
2895 * Write the start element of the root element of an XML Schema mapping.
2896 */
2897 static void
xsd_schema_element_start(StringInfo result,const char * targetns)2898 xsd_schema_element_start(StringInfo result, const char *targetns)
2899 {
2900 appendStringInfoString(result,
2901 "<xsd:schema\n"
2902 " xmlns:xsd=\"" NAMESPACE_XSD "\"");
2903 if (strlen(targetns) > 0)
2904 appendStringInfo(result,
2905 "\n"
2906 " targetNamespace=\"%s\"\n"
2907 " elementFormDefault=\"qualified\"",
2908 targetns);
2909 appendStringInfoString(result,
2910 ">\n\n");
2911 }
2912
2913
2914 static void
xsd_schema_element_end(StringInfo result)2915 xsd_schema_element_end(StringInfo result)
2916 {
2917 appendStringInfoString(result, "</xsd:schema>");
2918 }
2919
2920
2921 static StringInfo
schema_to_xmlschema_internal(const char * schemaname,bool nulls,bool tableforest,const char * targetns)2922 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2923 bool tableforest, const char *targetns)
2924 {
2925 Oid nspid;
2926 List *relid_list;
2927 List *tupdesc_list;
2928 ListCell *cell;
2929 StringInfo result;
2930
2931 result = makeStringInfo();
2932
2933 nspid = LookupExplicitNamespace(schemaname, false);
2934
2935 xsd_schema_element_start(result, targetns);
2936
2937 SPI_connect();
2938
2939 relid_list = schema_get_xml_visible_tables(nspid);
2940
2941 tupdesc_list = NIL;
2942 foreach(cell, relid_list)
2943 {
2944 Relation rel;
2945
2946 rel = table_open(lfirst_oid(cell), AccessShareLock);
2947 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2948 table_close(rel, NoLock);
2949 }
2950
2951 appendStringInfoString(result,
2952 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2953
2954 appendStringInfoString(result,
2955 map_sql_schema_to_xmlschema_types(nspid, relid_list,
2956 nulls, tableforest, targetns));
2957
2958 xsd_schema_element_end(result);
2959
2960 SPI_finish();
2961
2962 return result;
2963 }
2964
2965
2966 Datum
schema_to_xmlschema(PG_FUNCTION_ARGS)2967 schema_to_xmlschema(PG_FUNCTION_ARGS)
2968 {
2969 Name name = PG_GETARG_NAME(0);
2970 bool nulls = PG_GETARG_BOOL(1);
2971 bool tableforest = PG_GETARG_BOOL(2);
2972 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2973
2974 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2975 nulls, tableforest, targetns)));
2976 }
2977
2978
2979 Datum
schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)2980 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2981 {
2982 Name name = PG_GETARG_NAME(0);
2983 bool nulls = PG_GETARG_BOOL(1);
2984 bool tableforest = PG_GETARG_BOOL(2);
2985 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2986 char *schemaname;
2987 Oid nspid;
2988 StringInfo xmlschema;
2989
2990 schemaname = NameStr(*name);
2991 nspid = LookupExplicitNamespace(schemaname, false);
2992
2993 xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2994 tableforest, targetns);
2995
2996 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2997 xmlschema->data, nulls,
2998 tableforest, targetns, true)));
2999 }
3000
3001
3002 /*
3003 * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3004 * sections 9.16, 9.17.
3005 */
3006
3007 static StringInfo
database_to_xml_internal(const char * xmlschema,bool nulls,bool tableforest,const char * targetns)3008 database_to_xml_internal(const char *xmlschema, bool nulls,
3009 bool tableforest, const char *targetns)
3010 {
3011 StringInfo result;
3012 List *nspid_list;
3013 ListCell *cell;
3014 char *xmlcn;
3015
3016 xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3017 true, false);
3018 result = makeStringInfo();
3019
3020 xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3021 appendStringInfoChar(result, '\n');
3022
3023 if (xmlschema)
3024 appendStringInfo(result, "%s\n\n", xmlschema);
3025
3026 SPI_connect();
3027
3028 nspid_list = database_get_xml_visible_schemas();
3029
3030 foreach(cell, nspid_list)
3031 {
3032 Oid nspid = lfirst_oid(cell);
3033 StringInfo subres;
3034
3035 subres = schema_to_xml_internal(nspid, NULL, nulls,
3036 tableforest, targetns, false);
3037
3038 appendBinaryStringInfo(result, subres->data, subres->len);
3039 appendStringInfoChar(result, '\n');
3040 }
3041
3042 SPI_finish();
3043
3044 xmldata_root_element_end(result, xmlcn);
3045
3046 return result;
3047 }
3048
3049
3050 Datum
database_to_xml(PG_FUNCTION_ARGS)3051 database_to_xml(PG_FUNCTION_ARGS)
3052 {
3053 bool nulls = PG_GETARG_BOOL(0);
3054 bool tableforest = PG_GETARG_BOOL(1);
3055 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3056
3057 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3058 tableforest, targetns)));
3059 }
3060
3061
3062 static StringInfo
database_to_xmlschema_internal(bool nulls,bool tableforest,const char * targetns)3063 database_to_xmlschema_internal(bool nulls, bool tableforest,
3064 const char *targetns)
3065 {
3066 List *relid_list;
3067 List *nspid_list;
3068 List *tupdesc_list;
3069 ListCell *cell;
3070 StringInfo result;
3071
3072 result = makeStringInfo();
3073
3074 xsd_schema_element_start(result, targetns);
3075
3076 SPI_connect();
3077
3078 relid_list = database_get_xml_visible_tables();
3079 nspid_list = database_get_xml_visible_schemas();
3080
3081 tupdesc_list = NIL;
3082 foreach(cell, relid_list)
3083 {
3084 Relation rel;
3085
3086 rel = table_open(lfirst_oid(cell), AccessShareLock);
3087 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3088 table_close(rel, NoLock);
3089 }
3090
3091 appendStringInfoString(result,
3092 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3093
3094 appendStringInfoString(result,
3095 map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3096
3097 xsd_schema_element_end(result);
3098
3099 SPI_finish();
3100
3101 return result;
3102 }
3103
3104
3105 Datum
database_to_xmlschema(PG_FUNCTION_ARGS)3106 database_to_xmlschema(PG_FUNCTION_ARGS)
3107 {
3108 bool nulls = PG_GETARG_BOOL(0);
3109 bool tableforest = PG_GETARG_BOOL(1);
3110 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3111
3112 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3113 tableforest, targetns)));
3114 }
3115
3116
3117 Datum
database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)3118 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3119 {
3120 bool nulls = PG_GETARG_BOOL(0);
3121 bool tableforest = PG_GETARG_BOOL(1);
3122 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3123 StringInfo xmlschema;
3124
3125 xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3126
3127 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3128 nulls, tableforest, targetns)));
3129 }
3130
3131
3132 /*
3133 * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3134 * 9.2.
3135 */
3136 static char *
map_multipart_sql_identifier_to_xml_name(const char * a,const char * b,const char * c,const char * d)3137 map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3138 {
3139 StringInfoData result;
3140
3141 initStringInfo(&result);
3142
3143 if (a)
3144 appendStringInfoString(&result,
3145 map_sql_identifier_to_xml_name(a, true, true));
3146 if (b)
3147 appendStringInfo(&result, ".%s",
3148 map_sql_identifier_to_xml_name(b, true, true));
3149 if (c)
3150 appendStringInfo(&result, ".%s",
3151 map_sql_identifier_to_xml_name(c, true, true));
3152 if (d)
3153 appendStringInfo(&result, ".%s",
3154 map_sql_identifier_to_xml_name(d, true, true));
3155
3156 return result.data;
3157 }
3158
3159
3160 /*
3161 * Map an SQL table to an XML Schema document; see SQL/XML:2008
3162 * section 9.11.
3163 *
3164 * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3165 * 9.9.
3166 */
3167 static const char *
map_sql_table_to_xmlschema(TupleDesc tupdesc,Oid relid,bool nulls,bool tableforest,const char * targetns)3168 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3169 bool tableforest, const char *targetns)
3170 {
3171 int i;
3172 char *xmltn;
3173 char *tabletypename;
3174 char *rowtypename;
3175 StringInfoData result;
3176
3177 initStringInfo(&result);
3178
3179 if (OidIsValid(relid))
3180 {
3181 HeapTuple tuple;
3182 Form_pg_class reltuple;
3183
3184 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3185 if (!HeapTupleIsValid(tuple))
3186 elog(ERROR, "cache lookup failed for relation %u", relid);
3187 reltuple = (Form_pg_class) GETSTRUCT(tuple);
3188
3189 xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3190 true, false);
3191
3192 tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3193 get_database_name(MyDatabaseId),
3194 get_namespace_name(reltuple->relnamespace),
3195 NameStr(reltuple->relname));
3196
3197 rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3198 get_database_name(MyDatabaseId),
3199 get_namespace_name(reltuple->relnamespace),
3200 NameStr(reltuple->relname));
3201
3202 ReleaseSysCache(tuple);
3203 }
3204 else
3205 {
3206 if (tableforest)
3207 xmltn = "row";
3208 else
3209 xmltn = "table";
3210
3211 tabletypename = "TableType";
3212 rowtypename = "RowType";
3213 }
3214
3215 xsd_schema_element_start(&result, targetns);
3216
3217 appendStringInfoString(&result,
3218 map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3219
3220 appendStringInfo(&result,
3221 "<xsd:complexType name=\"%s\">\n"
3222 " <xsd:sequence>\n",
3223 rowtypename);
3224
3225 for (i = 0; i < tupdesc->natts; i++)
3226 {
3227 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3228
3229 if (att->attisdropped)
3230 continue;
3231 appendStringInfo(&result,
3232 " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3233 map_sql_identifier_to_xml_name(NameStr(att->attname),
3234 true, false),
3235 map_sql_type_to_xml_name(att->atttypid, -1),
3236 nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3237 }
3238
3239 appendStringInfoString(&result,
3240 " </xsd:sequence>\n"
3241 "</xsd:complexType>\n\n");
3242
3243 if (!tableforest)
3244 {
3245 appendStringInfo(&result,
3246 "<xsd:complexType name=\"%s\">\n"
3247 " <xsd:sequence>\n"
3248 " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3249 " </xsd:sequence>\n"
3250 "</xsd:complexType>\n\n",
3251 tabletypename, rowtypename);
3252
3253 appendStringInfo(&result,
3254 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3255 xmltn, tabletypename);
3256 }
3257 else
3258 appendStringInfo(&result,
3259 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3260 xmltn, rowtypename);
3261
3262 xsd_schema_element_end(&result);
3263
3264 return result.data;
3265 }
3266
3267
3268 /*
3269 * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3270 * section 9.12.
3271 */
3272 static const char *
map_sql_schema_to_xmlschema_types(Oid nspid,List * relid_list,bool nulls,bool tableforest,const char * targetns)3273 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3274 bool tableforest, const char *targetns)
3275 {
3276 char *dbname;
3277 char *nspname;
3278 char *xmlsn;
3279 char *schematypename;
3280 StringInfoData result;
3281 ListCell *cell;
3282
3283 dbname = get_database_name(MyDatabaseId);
3284 nspname = get_namespace_name(nspid);
3285
3286 initStringInfo(&result);
3287
3288 xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3289
3290 schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3291 dbname,
3292 nspname,
3293 NULL);
3294
3295 appendStringInfo(&result,
3296 "<xsd:complexType name=\"%s\">\n", schematypename);
3297 if (!tableforest)
3298 appendStringInfoString(&result,
3299 " <xsd:all>\n");
3300 else
3301 appendStringInfoString(&result,
3302 " <xsd:sequence>\n");
3303
3304 foreach(cell, relid_list)
3305 {
3306 Oid relid = lfirst_oid(cell);
3307 char *relname = get_rel_name(relid);
3308 char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3309 char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3310 dbname,
3311 nspname,
3312 relname);
3313
3314 if (!tableforest)
3315 appendStringInfo(&result,
3316 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3317 xmltn, tabletypename);
3318 else
3319 appendStringInfo(&result,
3320 " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3321 xmltn, tabletypename);
3322 }
3323
3324 if (!tableforest)
3325 appendStringInfoString(&result,
3326 " </xsd:all>\n");
3327 else
3328 appendStringInfoString(&result,
3329 " </xsd:sequence>\n");
3330 appendStringInfoString(&result,
3331 "</xsd:complexType>\n\n");
3332
3333 appendStringInfo(&result,
3334 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3335 xmlsn, schematypename);
3336
3337 return result.data;
3338 }
3339
3340
3341 /*
3342 * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3343 * section 9.15.
3344 */
3345 static const char *
map_sql_catalog_to_xmlschema_types(List * nspid_list,bool nulls,bool tableforest,const char * targetns)3346 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3347 bool tableforest, const char *targetns)
3348 {
3349 char *dbname;
3350 char *xmlcn;
3351 char *catalogtypename;
3352 StringInfoData result;
3353 ListCell *cell;
3354
3355 dbname = get_database_name(MyDatabaseId);
3356
3357 initStringInfo(&result);
3358
3359 xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3360
3361 catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3362 dbname,
3363 NULL,
3364 NULL);
3365
3366 appendStringInfo(&result,
3367 "<xsd:complexType name=\"%s\">\n", catalogtypename);
3368 appendStringInfoString(&result,
3369 " <xsd:all>\n");
3370
3371 foreach(cell, nspid_list)
3372 {
3373 Oid nspid = lfirst_oid(cell);
3374 char *nspname = get_namespace_name(nspid);
3375 char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3376 char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3377 dbname,
3378 nspname,
3379 NULL);
3380
3381 appendStringInfo(&result,
3382 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3383 xmlsn, schematypename);
3384 }
3385
3386 appendStringInfoString(&result,
3387 " </xsd:all>\n");
3388 appendStringInfoString(&result,
3389 "</xsd:complexType>\n\n");
3390
3391 appendStringInfo(&result,
3392 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3393 xmlcn, catalogtypename);
3394
3395 return result.data;
3396 }
3397
3398
3399 /*
3400 * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3401 */
3402 static const char *
map_sql_type_to_xml_name(Oid typeoid,int typmod)3403 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3404 {
3405 StringInfoData result;
3406
3407 initStringInfo(&result);
3408
3409 switch (typeoid)
3410 {
3411 case BPCHAROID:
3412 if (typmod == -1)
3413 appendStringInfoString(&result, "CHAR");
3414 else
3415 appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3416 break;
3417 case VARCHAROID:
3418 if (typmod == -1)
3419 appendStringInfoString(&result, "VARCHAR");
3420 else
3421 appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3422 break;
3423 case NUMERICOID:
3424 if (typmod == -1)
3425 appendStringInfoString(&result, "NUMERIC");
3426 else
3427 appendStringInfo(&result, "NUMERIC_%d_%d",
3428 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3429 (typmod - VARHDRSZ) & 0xffff);
3430 break;
3431 case INT4OID:
3432 appendStringInfoString(&result, "INTEGER");
3433 break;
3434 case INT2OID:
3435 appendStringInfoString(&result, "SMALLINT");
3436 break;
3437 case INT8OID:
3438 appendStringInfoString(&result, "BIGINT");
3439 break;
3440 case FLOAT4OID:
3441 appendStringInfoString(&result, "REAL");
3442 break;
3443 case FLOAT8OID:
3444 appendStringInfoString(&result, "DOUBLE");
3445 break;
3446 case BOOLOID:
3447 appendStringInfoString(&result, "BOOLEAN");
3448 break;
3449 case TIMEOID:
3450 if (typmod == -1)
3451 appendStringInfoString(&result, "TIME");
3452 else
3453 appendStringInfo(&result, "TIME_%d", typmod);
3454 break;
3455 case TIMETZOID:
3456 if (typmod == -1)
3457 appendStringInfoString(&result, "TIME_WTZ");
3458 else
3459 appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3460 break;
3461 case TIMESTAMPOID:
3462 if (typmod == -1)
3463 appendStringInfoString(&result, "TIMESTAMP");
3464 else
3465 appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3466 break;
3467 case TIMESTAMPTZOID:
3468 if (typmod == -1)
3469 appendStringInfoString(&result, "TIMESTAMP_WTZ");
3470 else
3471 appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3472 break;
3473 case DATEOID:
3474 appendStringInfoString(&result, "DATE");
3475 break;
3476 case XMLOID:
3477 appendStringInfoString(&result, "XML");
3478 break;
3479 default:
3480 {
3481 HeapTuple tuple;
3482 Form_pg_type typtuple;
3483
3484 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3485 if (!HeapTupleIsValid(tuple))
3486 elog(ERROR, "cache lookup failed for type %u", typeoid);
3487 typtuple = (Form_pg_type) GETSTRUCT(tuple);
3488
3489 appendStringInfoString(&result,
3490 map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3491 get_database_name(MyDatabaseId),
3492 get_namespace_name(typtuple->typnamespace),
3493 NameStr(typtuple->typname)));
3494
3495 ReleaseSysCache(tuple);
3496 }
3497 }
3498
3499 return result.data;
3500 }
3501
3502
3503 /*
3504 * Map a collection of SQL data types to XML Schema data types; see
3505 * SQL/XML:2008 section 9.7.
3506 */
3507 static const char *
map_sql_typecoll_to_xmlschema_types(List * tupdesc_list)3508 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3509 {
3510 List *uniquetypes = NIL;
3511 int i;
3512 StringInfoData result;
3513 ListCell *cell0;
3514
3515 /* extract all column types used in the set of TupleDescs */
3516 foreach(cell0, tupdesc_list)
3517 {
3518 TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3519
3520 for (i = 0; i < tupdesc->natts; i++)
3521 {
3522 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3523
3524 if (att->attisdropped)
3525 continue;
3526 uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3527 }
3528 }
3529
3530 /* add base types of domains */
3531 foreach(cell0, uniquetypes)
3532 {
3533 Oid typid = lfirst_oid(cell0);
3534 Oid basetypid = getBaseType(typid);
3535
3536 if (basetypid != typid)
3537 uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3538 }
3539
3540 /* Convert to textual form */
3541 initStringInfo(&result);
3542
3543 foreach(cell0, uniquetypes)
3544 {
3545 appendStringInfo(&result, "%s\n",
3546 map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3547 -1));
3548 }
3549
3550 return result.data;
3551 }
3552
3553
3554 /*
3555 * Map an SQL data type to a named XML Schema data type; see
3556 * SQL/XML:2008 sections 9.5 and 9.6.
3557 *
3558 * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3559 * a name attribute, which this function does. The name-less version
3560 * 9.5 doesn't appear to be required anywhere.)
3561 */
3562 static const char *
map_sql_type_to_xmlschema_type(Oid typeoid,int typmod)3563 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3564 {
3565 StringInfoData result;
3566 const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3567
3568 initStringInfo(&result);
3569
3570 if (typeoid == XMLOID)
3571 {
3572 appendStringInfoString(&result,
3573 "<xsd:complexType mixed=\"true\">\n"
3574 " <xsd:sequence>\n"
3575 " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3576 " </xsd:sequence>\n"
3577 "</xsd:complexType>\n");
3578 }
3579 else
3580 {
3581 appendStringInfo(&result,
3582 "<xsd:simpleType name=\"%s\">\n", typename);
3583
3584 switch (typeoid)
3585 {
3586 case BPCHAROID:
3587 case VARCHAROID:
3588 case TEXTOID:
3589 appendStringInfoString(&result,
3590 " <xsd:restriction base=\"xsd:string\">\n");
3591 if (typmod != -1)
3592 appendStringInfo(&result,
3593 " <xsd:maxLength value=\"%d\"/>\n",
3594 typmod - VARHDRSZ);
3595 appendStringInfoString(&result, " </xsd:restriction>\n");
3596 break;
3597
3598 case BYTEAOID:
3599 appendStringInfo(&result,
3600 " <xsd:restriction base=\"xsd:%s\">\n"
3601 " </xsd:restriction>\n",
3602 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3603 break;
3604
3605 case NUMERICOID:
3606 if (typmod != -1)
3607 appendStringInfo(&result,
3608 " <xsd:restriction base=\"xsd:decimal\">\n"
3609 " <xsd:totalDigits value=\"%d\"/>\n"
3610 " <xsd:fractionDigits value=\"%d\"/>\n"
3611 " </xsd:restriction>\n",
3612 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3613 (typmod - VARHDRSZ) & 0xffff);
3614 break;
3615
3616 case INT2OID:
3617 appendStringInfo(&result,
3618 " <xsd:restriction base=\"xsd:short\">\n"
3619 " <xsd:maxInclusive value=\"%d\"/>\n"
3620 " <xsd:minInclusive value=\"%d\"/>\n"
3621 " </xsd:restriction>\n",
3622 SHRT_MAX, SHRT_MIN);
3623 break;
3624
3625 case INT4OID:
3626 appendStringInfo(&result,
3627 " <xsd:restriction base=\"xsd:int\">\n"
3628 " <xsd:maxInclusive value=\"%d\"/>\n"
3629 " <xsd:minInclusive value=\"%d\"/>\n"
3630 " </xsd:restriction>\n",
3631 INT_MAX, INT_MIN);
3632 break;
3633
3634 case INT8OID:
3635 appendStringInfo(&result,
3636 " <xsd:restriction base=\"xsd:long\">\n"
3637 " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3638 " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3639 " </xsd:restriction>\n",
3640 (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3641 (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3642 break;
3643
3644 case FLOAT4OID:
3645 appendStringInfoString(&result,
3646 " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3647 break;
3648
3649 case FLOAT8OID:
3650 appendStringInfoString(&result,
3651 " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3652 break;
3653
3654 case BOOLOID:
3655 appendStringInfoString(&result,
3656 " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3657 break;
3658
3659 case TIMEOID:
3660 case TIMETZOID:
3661 {
3662 const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3663
3664 if (typmod == -1)
3665 appendStringInfo(&result,
3666 " <xsd:restriction base=\"xsd:time\">\n"
3667 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3668 " </xsd:restriction>\n", tz);
3669 else if (typmod == 0)
3670 appendStringInfo(&result,
3671 " <xsd:restriction base=\"xsd:time\">\n"
3672 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3673 " </xsd:restriction>\n", tz);
3674 else
3675 appendStringInfo(&result,
3676 " <xsd:restriction base=\"xsd:time\">\n"
3677 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3678 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3679 break;
3680 }
3681
3682 case TIMESTAMPOID:
3683 case TIMESTAMPTZOID:
3684 {
3685 const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3686
3687 if (typmod == -1)
3688 appendStringInfo(&result,
3689 " <xsd:restriction base=\"xsd:dateTime\">\n"
3690 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3691 " </xsd:restriction>\n", tz);
3692 else if (typmod == 0)
3693 appendStringInfo(&result,
3694 " <xsd:restriction base=\"xsd:dateTime\">\n"
3695 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3696 " </xsd:restriction>\n", tz);
3697 else
3698 appendStringInfo(&result,
3699 " <xsd:restriction base=\"xsd:dateTime\">\n"
3700 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3701 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3702 break;
3703 }
3704
3705 case DATEOID:
3706 appendStringInfoString(&result,
3707 " <xsd:restriction base=\"xsd:date\">\n"
3708 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3709 " </xsd:restriction>\n");
3710 break;
3711
3712 default:
3713 if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3714 {
3715 Oid base_typeoid;
3716 int32 base_typmod = -1;
3717
3718 base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3719
3720 appendStringInfo(&result,
3721 " <xsd:restriction base=\"%s\"/>\n",
3722 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3723 }
3724 break;
3725 }
3726 appendStringInfoString(&result, "</xsd:simpleType>\n");
3727 }
3728
3729 return result.data;
3730 }
3731
3732
3733 /*
3734 * Map an SQL row to an XML element, taking the row from the active
3735 * SPI cursor. See also SQL/XML:2008 section 9.10.
3736 */
3737 static void
SPI_sql_row_to_xmlelement(uint64 rownum,StringInfo result,char * tablename,bool nulls,bool tableforest,const char * targetns,bool top_level)3738 SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3739 bool nulls, bool tableforest,
3740 const char *targetns, bool top_level)
3741 {
3742 int i;
3743 char *xmltn;
3744
3745 if (tablename)
3746 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3747 else
3748 {
3749 if (tableforest)
3750 xmltn = "row";
3751 else
3752 xmltn = "table";
3753 }
3754
3755 if (tableforest)
3756 xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3757 else
3758 appendStringInfoString(result, "<row>\n");
3759
3760 for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3761 {
3762 char *colname;
3763 Datum colval;
3764 bool isnull;
3765
3766 colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3767 true, false);
3768 colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3769 SPI_tuptable->tupdesc,
3770 i,
3771 &isnull);
3772 if (isnull)
3773 {
3774 if (nulls)
3775 appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
3776 }
3777 else
3778 appendStringInfo(result, " <%s>%s</%s>\n",
3779 colname,
3780 map_sql_value_to_xml_value(colval,
3781 SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3782 colname);
3783 }
3784
3785 if (tableforest)
3786 {
3787 xmldata_root_element_end(result, xmltn);
3788 appendStringInfoChar(result, '\n');
3789 }
3790 else
3791 appendStringInfoString(result, "</row>\n\n");
3792 }
3793
3794
3795 /*
3796 * XPath related functions
3797 */
3798
3799 #ifdef USE_LIBXML
3800
3801 /*
3802 * Convert XML node to text.
3803 *
3804 * For attribute and text nodes, return the escaped text. For anything else,
3805 * dump the whole subtree.
3806 */
3807 static text *
xml_xmlnodetoxmltype(xmlNodePtr cur,PgXmlErrorContext * xmlerrcxt)3808 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3809 {
3810 xmltype *result = NULL;
3811
3812 if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
3813 {
3814 void (*volatile nodefree) (xmlNodePtr) = NULL;
3815 volatile xmlBufferPtr buf = NULL;
3816 volatile xmlNodePtr cur_copy = NULL;
3817
3818 PG_TRY();
3819 {
3820 int bytes;
3821
3822 buf = xmlBufferCreate();
3823 if (buf == NULL || xmlerrcxt->err_occurred)
3824 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3825 "could not allocate xmlBuffer");
3826
3827 /*
3828 * Produce a dump of the node that we can serialize. xmlNodeDump
3829 * does that, but the result of that function won't contain
3830 * namespace definitions from ancestor nodes, so we first do a
3831 * xmlCopyNode() which duplicates the node along with its required
3832 * namespace definitions.
3833 *
3834 * Some old libxml2 versions such as 2.7.6 produce partially
3835 * broken XML_DOCUMENT_NODE nodes (unset content field) when
3836 * copying them. xmlNodeDump of such a node works fine, but
3837 * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
3838 */
3839 cur_copy = xmlCopyNode(cur, 1);
3840 if (cur_copy == NULL || xmlerrcxt->err_occurred)
3841 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3842 "could not copy node");
3843 nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
3844 (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
3845
3846 bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
3847 if (bytes == -1 || xmlerrcxt->err_occurred)
3848 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3849 "could not dump node");
3850
3851 result = xmlBuffer_to_xmltype(buf);
3852 }
3853 PG_FINALLY();
3854 {
3855 if (nodefree)
3856 nodefree(cur_copy);
3857 if (buf)
3858 xmlBufferFree(buf);
3859 }
3860 PG_END_TRY();
3861 }
3862 else
3863 {
3864 xmlChar *str;
3865
3866 str = xmlXPathCastNodeToString(cur);
3867 PG_TRY();
3868 {
3869 /* Here we rely on XML having the same representation as TEXT */
3870 char *escaped = escape_xml((char *) str);
3871
3872 result = (xmltype *) cstring_to_text(escaped);
3873 pfree(escaped);
3874 }
3875 PG_FINALLY();
3876 {
3877 xmlFree(str);
3878 }
3879 PG_END_TRY();
3880 }
3881
3882 return result;
3883 }
3884
3885 /*
3886 * Convert an XML XPath object (the result of evaluating an XPath expression)
3887 * to an array of xml values, which are appended to astate. The function
3888 * result value is the number of elements in the array.
3889 *
3890 * If "astate" is NULL then we don't generate the array value, but we still
3891 * return the number of elements it would have had.
3892 *
3893 * Nodesets are converted to an array containing the nodes' textual
3894 * representations. Primitive values (float, double, string) are converted
3895 * to a single-element array containing the value's string representation.
3896 */
3897 static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,ArrayBuildState * astate,PgXmlErrorContext * xmlerrcxt)3898 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3899 ArrayBuildState *astate,
3900 PgXmlErrorContext *xmlerrcxt)
3901 {
3902 int result = 0;
3903 Datum datum;
3904 Oid datumtype;
3905 char *result_str;
3906
3907 switch (xpathobj->type)
3908 {
3909 case XPATH_NODESET:
3910 if (xpathobj->nodesetval != NULL)
3911 {
3912 result = xpathobj->nodesetval->nodeNr;
3913 if (astate != NULL)
3914 {
3915 int i;
3916
3917 for (i = 0; i < result; i++)
3918 {
3919 datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3920 xmlerrcxt));
3921 (void) accumArrayResult(astate, datum, false,
3922 XMLOID, CurrentMemoryContext);
3923 }
3924 }
3925 }
3926 return result;
3927
3928 case XPATH_BOOLEAN:
3929 if (astate == NULL)
3930 return 1;
3931 datum = BoolGetDatum(xpathobj->boolval);
3932 datumtype = BOOLOID;
3933 break;
3934
3935 case XPATH_NUMBER:
3936 if (astate == NULL)
3937 return 1;
3938 datum = Float8GetDatum(xpathobj->floatval);
3939 datumtype = FLOAT8OID;
3940 break;
3941
3942 case XPATH_STRING:
3943 if (astate == NULL)
3944 return 1;
3945 datum = CStringGetDatum((char *) xpathobj->stringval);
3946 datumtype = CSTRINGOID;
3947 break;
3948
3949 default:
3950 elog(ERROR, "xpath expression result type %d is unsupported",
3951 xpathobj->type);
3952 return 0; /* keep compiler quiet */
3953 }
3954
3955 /* Common code for scalar-value cases */
3956 result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3957 datum = PointerGetDatum(cstring_to_xmltype(result_str));
3958 (void) accumArrayResult(astate, datum, false,
3959 XMLOID, CurrentMemoryContext);
3960 return 1;
3961 }
3962
3963
3964 /*
3965 * Common code for xpath() and xmlexists()
3966 *
3967 * Evaluate XPath expression and return number of nodes in res_nitems
3968 * and array of XML values in astate. Either of those pointers can be
3969 * NULL if the corresponding result isn't wanted.
3970 *
3971 * It is up to the user to ensure that the XML passed is in fact
3972 * an XML document - XPath doesn't work easily on fragments without
3973 * a context node being known.
3974 */
3975 static void
xpath_internal(text * xpath_expr_text,xmltype * data,ArrayType * namespaces,int * res_nitems,ArrayBuildState * astate)3976 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3977 int *res_nitems, ArrayBuildState *astate)
3978 {
3979 PgXmlErrorContext *xmlerrcxt;
3980 volatile xmlParserCtxtPtr ctxt = NULL;
3981 volatile xmlDocPtr doc = NULL;
3982 volatile xmlXPathContextPtr xpathctx = NULL;
3983 volatile xmlXPathCompExprPtr xpathcomp = NULL;
3984 volatile xmlXPathObjectPtr xpathobj = NULL;
3985 char *datastr;
3986 int32 len;
3987 int32 xpath_len;
3988 xmlChar *string;
3989 xmlChar *xpath_expr;
3990 size_t xmldecl_len = 0;
3991 int i;
3992 int ndim;
3993 Datum *ns_names_uris;
3994 bool *ns_names_uris_nulls;
3995 int ns_count;
3996
3997 /*
3998 * Namespace mappings are passed as text[]. If an empty array is passed
3999 * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4000 * Else, a 2-dimensional array with length of the second axis being equal
4001 * to 2 should be passed, i.e., every subarray contains 2 elements, the
4002 * first element defining the name, the second one the URI. Example:
4003 * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4004 * 'http://example2.com']].
4005 */
4006 ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4007 if (ndim != 0)
4008 {
4009 int *dims;
4010
4011 dims = ARR_DIMS(namespaces);
4012
4013 if (ndim != 2 || dims[1] != 2)
4014 ereport(ERROR,
4015 (errcode(ERRCODE_DATA_EXCEPTION),
4016 errmsg("invalid array for XML namespace mapping"),
4017 errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4018
4019 Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4020
4021 deconstruct_array(namespaces, TEXTOID, -1, false, TYPALIGN_INT,
4022 &ns_names_uris, &ns_names_uris_nulls,
4023 &ns_count);
4024
4025 Assert((ns_count % 2) == 0); /* checked above */
4026 ns_count /= 2; /* count pairs only */
4027 }
4028 else
4029 {
4030 ns_names_uris = NULL;
4031 ns_names_uris_nulls = NULL;
4032 ns_count = 0;
4033 }
4034
4035 datastr = VARDATA(data);
4036 len = VARSIZE(data) - VARHDRSZ;
4037 xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4038 if (xpath_len == 0)
4039 ereport(ERROR,
4040 (errcode(ERRCODE_DATA_EXCEPTION),
4041 errmsg("empty XPath expression")));
4042
4043 string = pg_xmlCharStrndup(datastr, len);
4044 xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4045
4046 /*
4047 * In a UTF8 database, skip any xml declaration, which might assert
4048 * another encoding. Ignore parse_xml_decl() failure, letting
4049 * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4050 * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4051 * those scenarios bug-compatible with historical behavior.
4052 */
4053 if (GetDatabaseEncoding() == PG_UTF8)
4054 parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4055
4056 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4057
4058 PG_TRY();
4059 {
4060 xmlInitParser();
4061
4062 /*
4063 * redundant XML parsing (two parsings for the same value during one
4064 * command execution are possible)
4065 */
4066 ctxt = xmlNewParserCtxt();
4067 if (ctxt == NULL || xmlerrcxt->err_occurred)
4068 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4069 "could not allocate parser context");
4070 doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4071 len - xmldecl_len, NULL, NULL, 0);
4072 if (doc == NULL || xmlerrcxt->err_occurred)
4073 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4074 "could not parse XML document");
4075 xpathctx = xmlXPathNewContext(doc);
4076 if (xpathctx == NULL || xmlerrcxt->err_occurred)
4077 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4078 "could not allocate XPath context");
4079 xpathctx->node = (xmlNodePtr) doc;
4080
4081 /* register namespaces, if any */
4082 if (ns_count > 0)
4083 {
4084 for (i = 0; i < ns_count; i++)
4085 {
4086 char *ns_name;
4087 char *ns_uri;
4088
4089 if (ns_names_uris_nulls[i * 2] ||
4090 ns_names_uris_nulls[i * 2 + 1])
4091 ereport(ERROR,
4092 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4093 errmsg("neither namespace name nor URI may be null")));
4094 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4095 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4096 if (xmlXPathRegisterNs(xpathctx,
4097 (xmlChar *) ns_name,
4098 (xmlChar *) ns_uri) != 0)
4099 ereport(ERROR, /* is this an internal error??? */
4100 (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4101 ns_name, ns_uri)));
4102 }
4103 }
4104
4105 xpathcomp = xmlXPathCompile(xpath_expr);
4106 if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4107 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4108 "invalid XPath expression");
4109
4110 /*
4111 * Version 2.6.27 introduces a function named
4112 * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4113 * but we can derive the existence by whether any nodes are returned,
4114 * thereby preventing a library version upgrade and keeping the code
4115 * the same.
4116 */
4117 xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4118 if (xpathobj == NULL || xmlerrcxt->err_occurred)
4119 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4120 "could not create XPath object");
4121
4122 /*
4123 * Extract the results as requested.
4124 */
4125 if (res_nitems != NULL)
4126 *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4127 else
4128 (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4129 }
4130 PG_CATCH();
4131 {
4132 if (xpathobj)
4133 xmlXPathFreeObject(xpathobj);
4134 if (xpathcomp)
4135 xmlXPathFreeCompExpr(xpathcomp);
4136 if (xpathctx)
4137 xmlXPathFreeContext(xpathctx);
4138 if (doc)
4139 xmlFreeDoc(doc);
4140 if (ctxt)
4141 xmlFreeParserCtxt(ctxt);
4142
4143 pg_xml_done(xmlerrcxt, true);
4144
4145 PG_RE_THROW();
4146 }
4147 PG_END_TRY();
4148
4149 xmlXPathFreeObject(xpathobj);
4150 xmlXPathFreeCompExpr(xpathcomp);
4151 xmlXPathFreeContext(xpathctx);
4152 xmlFreeDoc(doc);
4153 xmlFreeParserCtxt(ctxt);
4154
4155 pg_xml_done(xmlerrcxt, false);
4156 }
4157 #endif /* USE_LIBXML */
4158
4159 /*
4160 * Evaluate XPath expression and return array of XML values.
4161 *
4162 * As we have no support of XQuery sequences yet, this function seems
4163 * to be the most useful one (array of XML functions plays a role of
4164 * some kind of substitution for XQuery sequences).
4165 */
4166 Datum
xpath(PG_FUNCTION_ARGS)4167 xpath(PG_FUNCTION_ARGS)
4168 {
4169 #ifdef USE_LIBXML
4170 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4171 xmltype *data = PG_GETARG_XML_P(1);
4172 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4173 ArrayBuildState *astate;
4174
4175 astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4176 xpath_internal(xpath_expr_text, data, namespaces,
4177 NULL, astate);
4178 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4179 #else
4180 NO_XML_SUPPORT();
4181 return 0;
4182 #endif
4183 }
4184
4185 /*
4186 * Determines if the node specified by the supplied XPath exists
4187 * in a given XML document, returning a boolean.
4188 */
4189 Datum
xmlexists(PG_FUNCTION_ARGS)4190 xmlexists(PG_FUNCTION_ARGS)
4191 {
4192 #ifdef USE_LIBXML
4193 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4194 xmltype *data = PG_GETARG_XML_P(1);
4195 int res_nitems;
4196
4197 xpath_internal(xpath_expr_text, data, NULL,
4198 &res_nitems, NULL);
4199
4200 PG_RETURN_BOOL(res_nitems > 0);
4201 #else
4202 NO_XML_SUPPORT();
4203 return 0;
4204 #endif
4205 }
4206
4207 /*
4208 * Determines if the node specified by the supplied XPath exists
4209 * in a given XML document, returning a boolean. Differs from
4210 * xmlexists as it supports namespaces and is not defined in SQL/XML.
4211 */
4212 Datum
xpath_exists(PG_FUNCTION_ARGS)4213 xpath_exists(PG_FUNCTION_ARGS)
4214 {
4215 #ifdef USE_LIBXML
4216 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4217 xmltype *data = PG_GETARG_XML_P(1);
4218 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4219 int res_nitems;
4220
4221 xpath_internal(xpath_expr_text, data, namespaces,
4222 &res_nitems, NULL);
4223
4224 PG_RETURN_BOOL(res_nitems > 0);
4225 #else
4226 NO_XML_SUPPORT();
4227 return 0;
4228 #endif
4229 }
4230
4231 /*
4232 * Functions for checking well-formed-ness
4233 */
4234
4235 #ifdef USE_LIBXML
4236 static bool
wellformed_xml(text * data,XmlOptionType xmloption_arg)4237 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4238 {
4239 bool result;
4240 volatile xmlDocPtr doc = NULL;
4241
4242 /* We want to catch any exceptions and return false */
4243 PG_TRY();
4244 {
4245 doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4246 result = true;
4247 }
4248 PG_CATCH();
4249 {
4250 FlushErrorState();
4251 result = false;
4252 }
4253 PG_END_TRY();
4254
4255 if (doc)
4256 xmlFreeDoc(doc);
4257
4258 return result;
4259 }
4260 #endif
4261
4262 Datum
xml_is_well_formed(PG_FUNCTION_ARGS)4263 xml_is_well_formed(PG_FUNCTION_ARGS)
4264 {
4265 #ifdef USE_LIBXML
4266 text *data = PG_GETARG_TEXT_PP(0);
4267
4268 PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4269 #else
4270 NO_XML_SUPPORT();
4271 return 0;
4272 #endif /* not USE_LIBXML */
4273 }
4274
4275 Datum
xml_is_well_formed_document(PG_FUNCTION_ARGS)4276 xml_is_well_formed_document(PG_FUNCTION_ARGS)
4277 {
4278 #ifdef USE_LIBXML
4279 text *data = PG_GETARG_TEXT_PP(0);
4280
4281 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4282 #else
4283 NO_XML_SUPPORT();
4284 return 0;
4285 #endif /* not USE_LIBXML */
4286 }
4287
4288 Datum
xml_is_well_formed_content(PG_FUNCTION_ARGS)4289 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4290 {
4291 #ifdef USE_LIBXML
4292 text *data = PG_GETARG_TEXT_PP(0);
4293
4294 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4295 #else
4296 NO_XML_SUPPORT();
4297 return 0;
4298 #endif /* not USE_LIBXML */
4299 }
4300
4301 /*
4302 * support functions for XMLTABLE
4303 *
4304 */
4305 #ifdef USE_LIBXML
4306
4307 /*
4308 * Returns private data from executor state. Ensure validity by check with
4309 * MAGIC number.
4310 */
4311 static inline XmlTableBuilderData *
GetXmlTableBuilderPrivateData(TableFuncScanState * state,const char * fname)4312 GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4313 {
4314 XmlTableBuilderData *result;
4315
4316 if (!IsA(state, TableFuncScanState))
4317 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4318 result = (XmlTableBuilderData *) state->opaque;
4319 if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4320 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4321
4322 return result;
4323 }
4324 #endif
4325
4326 /*
4327 * XmlTableInitOpaque
4328 * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4329 * the XML parser.
4330 *
4331 * Note: Because we call pg_xml_init() here and pg_xml_done() in
4332 * XmlTableDestroyOpaque, it is critical for robustness that no other
4333 * executor nodes run until this node is processed to completion. Caller
4334 * must execute this to completion (probably filling a tuplestore to exhaust
4335 * this node in a single pass) instead of using row-per-call mode.
4336 */
4337 static void
XmlTableInitOpaque(TableFuncScanState * state,int natts)4338 XmlTableInitOpaque(TableFuncScanState *state, int natts)
4339 {
4340 #ifdef USE_LIBXML
4341 volatile xmlParserCtxtPtr ctxt = NULL;
4342 XmlTableBuilderData *xtCxt;
4343 PgXmlErrorContext *xmlerrcxt;
4344
4345 xtCxt = palloc0(sizeof(XmlTableBuilderData));
4346 xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4347 xtCxt->natts = natts;
4348 xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4349
4350 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4351
4352 PG_TRY();
4353 {
4354 xmlInitParser();
4355
4356 ctxt = xmlNewParserCtxt();
4357 if (ctxt == NULL || xmlerrcxt->err_occurred)
4358 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4359 "could not allocate parser context");
4360 }
4361 PG_CATCH();
4362 {
4363 if (ctxt != NULL)
4364 xmlFreeParserCtxt(ctxt);
4365
4366 pg_xml_done(xmlerrcxt, true);
4367
4368 PG_RE_THROW();
4369 }
4370 PG_END_TRY();
4371
4372 xtCxt->xmlerrcxt = xmlerrcxt;
4373 xtCxt->ctxt = ctxt;
4374
4375 state->opaque = xtCxt;
4376 #else
4377 NO_XML_SUPPORT();
4378 #endif /* not USE_LIBXML */
4379 }
4380
4381 /*
4382 * XmlTableSetDocument
4383 * Install the input document
4384 */
4385 static void
XmlTableSetDocument(TableFuncScanState * state,Datum value)4386 XmlTableSetDocument(TableFuncScanState *state, Datum value)
4387 {
4388 #ifdef USE_LIBXML
4389 XmlTableBuilderData *xtCxt;
4390 xmltype *xmlval = DatumGetXmlP(value);
4391 char *str;
4392 xmlChar *xstr;
4393 int length;
4394 volatile xmlDocPtr doc = NULL;
4395 volatile xmlXPathContextPtr xpathcxt = NULL;
4396
4397 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4398
4399 /*
4400 * Use out function for casting to string (remove encoding property). See
4401 * comment in xml_out.
4402 */
4403 str = xml_out_internal(xmlval, 0);
4404
4405 length = strlen(str);
4406 xstr = pg_xmlCharStrndup(str, length);
4407
4408 PG_TRY();
4409 {
4410 doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4411 if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4412 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4413 "could not parse XML document");
4414 xpathcxt = xmlXPathNewContext(doc);
4415 if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4416 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4417 "could not allocate XPath context");
4418 xpathcxt->node = (xmlNodePtr) doc;
4419 }
4420 PG_CATCH();
4421 {
4422 if (xpathcxt != NULL)
4423 xmlXPathFreeContext(xpathcxt);
4424 if (doc != NULL)
4425 xmlFreeDoc(doc);
4426
4427 PG_RE_THROW();
4428 }
4429 PG_END_TRY();
4430
4431 xtCxt->doc = doc;
4432 xtCxt->xpathcxt = xpathcxt;
4433 #else
4434 NO_XML_SUPPORT();
4435 #endif /* not USE_LIBXML */
4436 }
4437
4438 /*
4439 * XmlTableSetNamespace
4440 * Add a namespace declaration
4441 */
4442 static void
XmlTableSetNamespace(TableFuncScanState * state,const char * name,const char * uri)4443 XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4444 {
4445 #ifdef USE_LIBXML
4446 XmlTableBuilderData *xtCxt;
4447
4448 if (name == NULL)
4449 ereport(ERROR,
4450 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4451 errmsg("DEFAULT namespace is not supported")));
4452 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4453
4454 if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4455 pg_xmlCharStrndup(name, strlen(name)),
4456 pg_xmlCharStrndup(uri, strlen(uri))))
4457 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4458 "could not set XML namespace");
4459 #else
4460 NO_XML_SUPPORT();
4461 #endif /* not USE_LIBXML */
4462 }
4463
4464 /*
4465 * XmlTableSetRowFilter
4466 * Install the row-filter Xpath expression.
4467 */
4468 static void
XmlTableSetRowFilter(TableFuncScanState * state,const char * path)4469 XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4470 {
4471 #ifdef USE_LIBXML
4472 XmlTableBuilderData *xtCxt;
4473 xmlChar *xstr;
4474
4475 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4476
4477 if (*path == '\0')
4478 ereport(ERROR,
4479 (errcode(ERRCODE_DATA_EXCEPTION),
4480 errmsg("row path filter must not be empty string")));
4481
4482 xstr = pg_xmlCharStrndup(path, strlen(path));
4483
4484 xtCxt->xpathcomp = xmlXPathCompile(xstr);
4485 if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4486 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4487 "invalid XPath expression");
4488 #else
4489 NO_XML_SUPPORT();
4490 #endif /* not USE_LIBXML */
4491 }
4492
4493 /*
4494 * XmlTableSetColumnFilter
4495 * Install the column-filter Xpath expression, for the given column.
4496 */
4497 static void
XmlTableSetColumnFilter(TableFuncScanState * state,const char * path,int colnum)4498 XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4499 {
4500 #ifdef USE_LIBXML
4501 XmlTableBuilderData *xtCxt;
4502 xmlChar *xstr;
4503
4504 AssertArg(PointerIsValid(path));
4505
4506 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4507
4508 if (*path == '\0')
4509 ereport(ERROR,
4510 (errcode(ERRCODE_DATA_EXCEPTION),
4511 errmsg("column path filter must not be empty string")));
4512
4513 xstr = pg_xmlCharStrndup(path, strlen(path));
4514
4515 xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4516 if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4517 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4518 "invalid XPath expression");
4519 #else
4520 NO_XML_SUPPORT();
4521 #endif /* not USE_LIBXML */
4522 }
4523
4524 /*
4525 * XmlTableFetchRow
4526 * Prepare the next "current" tuple for upcoming GetValue calls.
4527 * Returns false if the row-filter expression returned no more rows.
4528 */
4529 static bool
XmlTableFetchRow(TableFuncScanState * state)4530 XmlTableFetchRow(TableFuncScanState *state)
4531 {
4532 #ifdef USE_LIBXML
4533 XmlTableBuilderData *xtCxt;
4534
4535 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4536
4537 /* Propagate our own error context to libxml2 */
4538 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4539
4540 if (xtCxt->xpathobj == NULL)
4541 {
4542 xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4543 if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4544 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4545 "could not create XPath object");
4546
4547 xtCxt->row_count = 0;
4548 }
4549
4550 if (xtCxt->xpathobj->type == XPATH_NODESET)
4551 {
4552 if (xtCxt->xpathobj->nodesetval != NULL)
4553 {
4554 if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4555 return true;
4556 }
4557 }
4558
4559 return false;
4560 #else
4561 NO_XML_SUPPORT();
4562 return false;
4563 #endif /* not USE_LIBXML */
4564 }
4565
4566 /*
4567 * XmlTableGetValue
4568 * Return the value for column number 'colnum' for the current row. If
4569 * column -1 is requested, return representation of the whole row.
4570 *
4571 * This leaks memory, so be sure to reset often the context in which it's
4572 * called.
4573 */
4574 static Datum
XmlTableGetValue(TableFuncScanState * state,int colnum,Oid typid,int32 typmod,bool * isnull)4575 XmlTableGetValue(TableFuncScanState *state, int colnum,
4576 Oid typid, int32 typmod, bool *isnull)
4577 {
4578 #ifdef USE_LIBXML
4579 XmlTableBuilderData *xtCxt;
4580 Datum result = (Datum) 0;
4581 xmlNodePtr cur;
4582 char *cstr = NULL;
4583 volatile xmlXPathObjectPtr xpathobj = NULL;
4584
4585 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4586
4587 Assert(xtCxt->xpathobj &&
4588 xtCxt->xpathobj->type == XPATH_NODESET &&
4589 xtCxt->xpathobj->nodesetval != NULL);
4590
4591 /* Propagate our own error context to libxml2 */
4592 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4593
4594 *isnull = false;
4595
4596 cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4597
4598 Assert(xtCxt->xpathscomp[colnum] != NULL);
4599
4600 PG_TRY();
4601 {
4602 /* Set current node as entry point for XPath evaluation */
4603 xtCxt->xpathcxt->node = cur;
4604
4605 /* Evaluate column path */
4606 xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4607 if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4608 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4609 "could not create XPath object");
4610
4611 /*
4612 * There are four possible cases, depending on the number of nodes
4613 * returned by the XPath expression and the type of the target column:
4614 * a) XPath returns no nodes. b) The target type is XML (return all
4615 * as XML). For non-XML return types: c) One node (return content).
4616 * d) Multiple nodes (error).
4617 */
4618 if (xpathobj->type == XPATH_NODESET)
4619 {
4620 int count = 0;
4621
4622 if (xpathobj->nodesetval != NULL)
4623 count = xpathobj->nodesetval->nodeNr;
4624
4625 if (xpathobj->nodesetval == NULL || count == 0)
4626 {
4627 *isnull = true;
4628 }
4629 else
4630 {
4631 if (typid == XMLOID)
4632 {
4633 text *textstr;
4634 StringInfoData str;
4635
4636 /* Concatenate serialized values */
4637 initStringInfo(&str);
4638 for (int i = 0; i < count; i++)
4639 {
4640 textstr =
4641 xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4642 xtCxt->xmlerrcxt);
4643
4644 appendStringInfoText(&str, textstr);
4645 }
4646 cstr = str.data;
4647 }
4648 else
4649 {
4650 xmlChar *str;
4651
4652 if (count > 1)
4653 ereport(ERROR,
4654 (errcode(ERRCODE_CARDINALITY_VIOLATION),
4655 errmsg("more than one value returned by column XPath expression")));
4656
4657 str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
4658 cstr = str ? xml_pstrdup_and_free(str) : "";
4659 }
4660 }
4661 }
4662 else if (xpathobj->type == XPATH_STRING)
4663 {
4664 /* Content should be escaped when target will be XML */
4665 if (typid == XMLOID)
4666 cstr = escape_xml((char *) xpathobj->stringval);
4667 else
4668 cstr = (char *) xpathobj->stringval;
4669 }
4670 else if (xpathobj->type == XPATH_BOOLEAN)
4671 {
4672 char typcategory;
4673 bool typispreferred;
4674 xmlChar *str;
4675
4676 /* Allow implicit casting from boolean to numbers */
4677 get_type_category_preferred(typid, &typcategory, &typispreferred);
4678
4679 if (typcategory != TYPCATEGORY_NUMERIC)
4680 str = xmlXPathCastBooleanToString(xpathobj->boolval);
4681 else
4682 str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
4683
4684 cstr = xml_pstrdup_and_free(str);
4685 }
4686 else if (xpathobj->type == XPATH_NUMBER)
4687 {
4688 xmlChar *str;
4689
4690 str = xmlXPathCastNumberToString(xpathobj->floatval);
4691 cstr = xml_pstrdup_and_free(str);
4692 }
4693 else
4694 elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4695
4696 /*
4697 * By here, either cstr contains the result value, or the isnull flag
4698 * has been set.
4699 */
4700 Assert(cstr || *isnull);
4701
4702 if (!*isnull)
4703 result = InputFunctionCall(&state->in_functions[colnum],
4704 cstr,
4705 state->typioparams[colnum],
4706 typmod);
4707 }
4708 PG_FINALLY();
4709 {
4710 if (xpathobj != NULL)
4711 xmlXPathFreeObject(xpathobj);
4712 }
4713 PG_END_TRY();
4714
4715 return result;
4716 #else
4717 NO_XML_SUPPORT();
4718 return 0;
4719 #endif /* not USE_LIBXML */
4720 }
4721
4722 /*
4723 * XmlTableDestroyOpaque
4724 * Release all libxml2 resources
4725 */
4726 static void
XmlTableDestroyOpaque(TableFuncScanState * state)4727 XmlTableDestroyOpaque(TableFuncScanState *state)
4728 {
4729 #ifdef USE_LIBXML
4730 XmlTableBuilderData *xtCxt;
4731
4732 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4733
4734 /* Propagate our own error context to libxml2 */
4735 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4736
4737 if (xtCxt->xpathscomp != NULL)
4738 {
4739 int i;
4740
4741 for (i = 0; i < xtCxt->natts; i++)
4742 if (xtCxt->xpathscomp[i] != NULL)
4743 xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4744 }
4745
4746 if (xtCxt->xpathobj != NULL)
4747 xmlXPathFreeObject(xtCxt->xpathobj);
4748 if (xtCxt->xpathcomp != NULL)
4749 xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4750 if (xtCxt->xpathcxt != NULL)
4751 xmlXPathFreeContext(xtCxt->xpathcxt);
4752 if (xtCxt->doc != NULL)
4753 xmlFreeDoc(xtCxt->doc);
4754 if (xtCxt->ctxt != NULL)
4755 xmlFreeParserCtxt(xtCxt->ctxt);
4756
4757 pg_xml_done(xtCxt->xmlerrcxt, true);
4758
4759 /* not valid anymore */
4760 xtCxt->magic = 0;
4761 state->opaque = NULL;
4762
4763 #else
4764 NO_XML_SUPPORT();
4765 #endif /* not USE_LIBXML */
4766 }
4767