1 /*
2 * utility functions
3 *
4 * src/pl/plpython/plpy_util.c
5 */
6
7 #include "postgres.h"
8
9 #include "mb/pg_wchar.h"
10 #include "utils/memutils.h"
11
12 #include "plpython.h"
13
14 #include "plpy_util.h"
15
16 #include "plpy_elog.h"
17
18
19 /*
20 * Convert a Python unicode object to a Python string/bytes object in
21 * PostgreSQL server encoding. Reference ownership is passed to the
22 * caller.
23 */
24 PyObject *
PLyUnicode_Bytes(PyObject * unicode)25 PLyUnicode_Bytes(PyObject *unicode)
26 {
27 PyObject *bytes,
28 *rv;
29 char *utf8string,
30 *encoded;
31
32 /* First encode the Python unicode object with UTF-8. */
33 bytes = PyUnicode_AsUTF8String(unicode);
34 if (bytes == NULL)
35 PLy_elog(ERROR, "could not convert Python Unicode object to bytes");
36
37 utf8string = PyBytes_AsString(bytes);
38 if (utf8string == NULL)
39 {
40 Py_DECREF(bytes);
41 PLy_elog(ERROR, "could not extract bytes from encoded string");
42 }
43
44 /*
45 * Then convert to server encoding if necessary.
46 *
47 * PyUnicode_AsEncodedString could be used to encode the object directly
48 * in the server encoding, but Python doesn't support all the encodings
49 * that PostgreSQL does (EUC_TW and MULE_INTERNAL). UTF-8 is used as an
50 * intermediary in PLyUnicode_FromString as well.
51 */
52 if (GetDatabaseEncoding() != PG_UTF8)
53 {
54 PG_TRY();
55 {
56 encoded = pg_any_to_server(utf8string,
57 strlen(utf8string),
58 PG_UTF8);
59 }
60 PG_CATCH();
61 {
62 Py_DECREF(bytes);
63 PG_RE_THROW();
64 }
65 PG_END_TRY();
66 }
67 else
68 encoded = utf8string;
69
70 /* finally, build a bytes object in the server encoding */
71 rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
72
73 /* if pg_any_to_server allocated memory, free it now */
74 if (utf8string != encoded)
75 pfree(encoded);
76
77 Py_DECREF(bytes);
78 return rv;
79 }
80
81 /*
82 * Convert a Python unicode object to a C string in PostgreSQL server
83 * encoding. No Python object reference is passed out of this
84 * function. The result is palloc'ed.
85 *
86 * Note that this function is disguised as PyString_AsString() when
87 * using Python 3. That function returns a pointer into the internal
88 * memory of the argument, which isn't exactly the interface of this
89 * function. But in either case you get a rather short-lived
90 * reference that you ought to better leave alone.
91 */
92 char *
PLyUnicode_AsString(PyObject * unicode)93 PLyUnicode_AsString(PyObject *unicode)
94 {
95 PyObject *o = PLyUnicode_Bytes(unicode);
96 char *rv = pstrdup(PyBytes_AsString(o));
97
98 Py_XDECREF(o);
99 return rv;
100 }
101
102 #if PY_MAJOR_VERSION >= 3
103 /*
104 * Convert a C string in the PostgreSQL server encoding to a Python
105 * unicode object. Reference ownership is passed to the caller.
106 */
107 PyObject *
PLyUnicode_FromStringAndSize(const char * s,Py_ssize_t size)108 PLyUnicode_FromStringAndSize(const char *s, Py_ssize_t size)
109 {
110 char *utf8string;
111 PyObject *o;
112
113 utf8string = pg_server_to_any(s, size, PG_UTF8);
114
115 if (utf8string == s)
116 {
117 o = PyUnicode_FromStringAndSize(s, size);
118 }
119 else
120 {
121 o = PyUnicode_FromString(utf8string);
122 pfree(utf8string);
123 }
124
125 return o;
126 }
127
128 PyObject *
PLyUnicode_FromString(const char * s)129 PLyUnicode_FromString(const char *s)
130 {
131 return PLyUnicode_FromStringAndSize(s, strlen(s));
132 }
133
134 #endif /* PY_MAJOR_VERSION >= 3 */
135