1 /*
2  * utility functions
3  *
4  * src/pl/plpython/plpy_util.c
5  */
6 
7 #include "postgres.h"
8 
9 #include "mb/pg_wchar.h"
10 #include "utils/memutils.h"
11 #include "utils/palloc.h"
12 
13 #include "plpython.h"
14 
15 #include "plpy_util.h"
16 
17 #include "plpy_elog.h"
18 
19 
20 /*
21  * Convert a Python unicode object to a Python string/bytes object in
22  * PostgreSQL server encoding.  Reference ownership is passed to the
23  * caller.
24  */
25 PyObject *
PLyUnicode_Bytes(PyObject * unicode)26 PLyUnicode_Bytes(PyObject *unicode)
27 {
28 	PyObject   *bytes,
29 			   *rv;
30 	char	   *utf8string,
31 			   *encoded;
32 
33 	/* First encode the Python unicode object with UTF-8. */
34 	bytes = PyUnicode_AsUTF8String(unicode);
35 	if (bytes == NULL)
36 		PLy_elog(ERROR, "could not convert Python Unicode object to bytes");
37 
38 	utf8string = PyBytes_AsString(bytes);
39 	if (utf8string == NULL)
40 	{
41 		Py_DECREF(bytes);
42 		PLy_elog(ERROR, "could not extract bytes from encoded string");
43 	}
44 
45 	/*
46 	 * Then convert to server encoding if necessary.
47 	 *
48 	 * PyUnicode_AsEncodedString could be used to encode the object directly
49 	 * in the server encoding, but Python doesn't support all the encodings
50 	 * that PostgreSQL does (EUC_TW and MULE_INTERNAL). UTF-8 is used as an
51 	 * intermediary in PLyUnicode_FromString as well.
52 	 */
53 	if (GetDatabaseEncoding() != PG_UTF8)
54 	{
55 		PG_TRY();
56 		{
57 			encoded = pg_any_to_server(utf8string,
58 									   strlen(utf8string),
59 									   PG_UTF8);
60 		}
61 		PG_CATCH();
62 		{
63 			Py_DECREF(bytes);
64 			PG_RE_THROW();
65 		}
66 		PG_END_TRY();
67 	}
68 	else
69 		encoded = utf8string;
70 
71 	/* finally, build a bytes object in the server encoding */
72 	rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
73 
74 	/* if pg_any_to_server allocated memory, free it now */
75 	if (utf8string != encoded)
76 		pfree(encoded);
77 
78 	Py_DECREF(bytes);
79 	return rv;
80 }
81 
82 /*
83  * Convert a Python unicode object to a C string in PostgreSQL server
84  * encoding.  No Python object reference is passed out of this
85  * function.  The result is palloc'ed.
86  *
87  * Note that this function is disguised as PyString_AsString() when
88  * using Python 3.  That function retuns a pointer into the internal
89  * memory of the argument, which isn't exactly the interface of this
90  * function.  But in either case you get a rather short-lived
91  * reference that you ought to better leave alone.
92  */
93 char *
PLyUnicode_AsString(PyObject * unicode)94 PLyUnicode_AsString(PyObject *unicode)
95 {
96 	PyObject   *o = PLyUnicode_Bytes(unicode);
97 	char	   *rv = pstrdup(PyBytes_AsString(o));
98 
99 	Py_XDECREF(o);
100 	return rv;
101 }
102 
103 #if PY_MAJOR_VERSION >= 3
104 /*
105  * Convert a C string in the PostgreSQL server encoding to a Python
106  * unicode object.  Reference ownership is passed to the caller.
107  */
108 PyObject *
PLyUnicode_FromStringAndSize(const char * s,Py_ssize_t size)109 PLyUnicode_FromStringAndSize(const char *s, Py_ssize_t size)
110 {
111 	char	   *utf8string;
112 	PyObject   *o;
113 
114 	utf8string = pg_server_to_any(s, size, PG_UTF8);
115 
116 	if (utf8string == s)
117 	{
118 		o = PyUnicode_FromStringAndSize(s, size);
119 	}
120 	else
121 	{
122 		o = PyUnicode_FromString(utf8string);
123 		pfree(utf8string);
124 	}
125 
126 	return o;
127 }
128 
129 PyObject *
PLyUnicode_FromString(const char * s)130 PLyUnicode_FromString(const char *s)
131 {
132 	return PLyUnicode_FromStringAndSize(s, strlen(s));
133 }
134 
135 #endif   /* PY_MAJOR_VERSION >= 3 */
136