1 /*
2  * utility functions
3  *
4  * src/pl/plpython/plpy_util.c
5  */
6 
7 #include "postgres.h"
8 
9 #include "mb/pg_wchar.h"
10 #include "utils/memutils.h"
11 
12 #include "plpython.h"
13 
14 #include "plpy_util.h"
15 
16 #include "plpy_elog.h"
17 
18 
19 /*
20  * Convert a Python unicode object to a Python string/bytes object in
21  * PostgreSQL server encoding.  Reference ownership is passed to the
22  * caller.
23  */
24 PyObject *
PLyUnicode_Bytes(PyObject * unicode)25 PLyUnicode_Bytes(PyObject *unicode)
26 {
27 	PyObject   *bytes,
28 			   *rv;
29 	char	   *utf8string,
30 			   *encoded;
31 
32 	/* First encode the Python unicode object with UTF-8. */
33 	bytes = PyUnicode_AsUTF8String(unicode);
34 	if (bytes == NULL)
35 		PLy_elog(ERROR, "could not convert Python Unicode object to bytes");
36 
37 	utf8string = PyBytes_AsString(bytes);
38 	if (utf8string == NULL)
39 	{
40 		Py_DECREF(bytes);
41 		PLy_elog(ERROR, "could not extract bytes from encoded string");
42 	}
43 
44 	/*
45 	 * Then convert to server encoding if necessary.
46 	 *
47 	 * PyUnicode_AsEncodedString could be used to encode the object directly
48 	 * in the server encoding, but Python doesn't support all the encodings
49 	 * that PostgreSQL does (EUC_TW and MULE_INTERNAL). UTF-8 is used as an
50 	 * intermediary in PLyUnicode_FromString as well.
51 	 */
52 	if (GetDatabaseEncoding() != PG_UTF8)
53 	{
54 		PG_TRY();
55 		{
56 			encoded = pg_any_to_server(utf8string,
57 									   strlen(utf8string),
58 									   PG_UTF8);
59 		}
60 		PG_CATCH();
61 		{
62 			Py_DECREF(bytes);
63 			PG_RE_THROW();
64 		}
65 		PG_END_TRY();
66 	}
67 	else
68 		encoded = utf8string;
69 
70 	/* finally, build a bytes object in the server encoding */
71 	rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
72 
73 	/* if pg_any_to_server allocated memory, free it now */
74 	if (utf8string != encoded)
75 		pfree(encoded);
76 
77 	Py_DECREF(bytes);
78 	return rv;
79 }
80 
81 /*
82  * Convert a Python unicode object to a C string in PostgreSQL server
83  * encoding.  No Python object reference is passed out of this
84  * function.  The result is palloc'ed.
85  *
86  * Note that this function is disguised as PyString_AsString() when
87  * using Python 3.  That function returns a pointer into the internal
88  * memory of the argument, which isn't exactly the interface of this
89  * function.  But in either case you get a rather short-lived
90  * reference that you ought to better leave alone.
91  */
92 char *
PLyUnicode_AsString(PyObject * unicode)93 PLyUnicode_AsString(PyObject *unicode)
94 {
95 	PyObject   *o = PLyUnicode_Bytes(unicode);
96 	char	   *rv = pstrdup(PyBytes_AsString(o));
97 
98 	Py_XDECREF(o);
99 	return rv;
100 }
101 
102 #if PY_MAJOR_VERSION >= 3
103 /*
104  * Convert a C string in the PostgreSQL server encoding to a Python
105  * unicode object.  Reference ownership is passed to the caller.
106  */
107 PyObject *
PLyUnicode_FromStringAndSize(const char * s,Py_ssize_t size)108 PLyUnicode_FromStringAndSize(const char *s, Py_ssize_t size)
109 {
110 	char	   *utf8string;
111 	PyObject   *o;
112 
113 	utf8string = pg_server_to_any(s, size, PG_UTF8);
114 
115 	if (utf8string == s)
116 	{
117 		o = PyUnicode_FromStringAndSize(s, size);
118 	}
119 	else
120 	{
121 		o = PyUnicode_FromString(utf8string);
122 		pfree(utf8string);
123 	}
124 
125 	return o;
126 }
127 
128 PyObject *
PLyUnicode_FromString(const char * s)129 PLyUnicode_FromString(const char *s)
130 {
131 	return PLyUnicode_FromStringAndSize(s, strlen(s));
132 }
133 
134 #endif							/* PY_MAJOR_VERSION >= 3 */
135