1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #pragma once
19
20 #include <memory>
21 #include <utility>
22
23 #include "arrow/buffer.h"
24 #include "arrow/python/pyarrow.h"
25 #include "arrow/python/visibility.h"
26 #include "arrow/result.h"
27 #include "arrow/util/macros.h"
28
29 namespace arrow {
30
31 class MemoryPool;
32 template <class T>
33 class Result;
34
35 namespace py {
36
37 // Convert current Python error to a Status. The Python error state is cleared
38 // and can be restored with RestorePyError().
39 ARROW_PYTHON_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
40 // Query whether the given Status is a Python error (as wrapped by ConvertPyError()).
41 ARROW_PYTHON_EXPORT bool IsPyError(const Status& status);
42 // Restore a Python error wrapped in a Status.
43 ARROW_PYTHON_EXPORT void RestorePyError(const Status& status);
44
45 // Catch a pending Python exception and return the corresponding Status.
46 // If no exception is pending, Status::OK() is returned.
47 inline Status CheckPyError(StatusCode code = StatusCode::UnknownError) {
48 if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
49 return Status::OK();
50 } else {
51 return ConvertPyError(code);
52 }
53 }
54
55 #define RETURN_IF_PYERROR() ARROW_RETURN_NOT_OK(CheckPyError())
56
57 #define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE))
58
59 // For Cython, as you can't define template C++ functions in Cython, only use them.
60 // This function can set a Python exception. It assumes that T has a (cheap)
61 // default constructor.
62 template <class T>
GetResultValue(Result<T> result)63 T GetResultValue(Result<T> result) {
64 if (ARROW_PREDICT_TRUE(result.ok())) {
65 return *std::move(result);
66 } else {
67 int r = internal::check_status(result.status()); // takes the GIL
68 assert(r == -1); // should have errored out
69 ARROW_UNUSED(r);
70 return {};
71 }
72 }
73
74 // A RAII-style helper that ensures the GIL is acquired inside a lexical block.
75 class ARROW_PYTHON_EXPORT PyAcquireGIL {
76 public:
PyAcquireGIL()77 PyAcquireGIL() : acquired_gil_(false) { acquire(); }
78
~PyAcquireGIL()79 ~PyAcquireGIL() { release(); }
80
acquire()81 void acquire() {
82 if (!acquired_gil_) {
83 state_ = PyGILState_Ensure();
84 acquired_gil_ = true;
85 }
86 }
87
88 // idempotent
release()89 void release() {
90 if (acquired_gil_) {
91 PyGILState_Release(state_);
92 acquired_gil_ = false;
93 }
94 }
95
96 private:
97 bool acquired_gil_;
98 PyGILState_STATE state_;
99 ARROW_DISALLOW_COPY_AND_ASSIGN(PyAcquireGIL);
100 };
101
102 // A RAII-style helper that releases the GIL until the end of a lexical block
103 class ARROW_PYTHON_EXPORT PyReleaseGIL {
104 public:
PyReleaseGIL()105 PyReleaseGIL() { saved_state_ = PyEval_SaveThread(); }
106
~PyReleaseGIL()107 ~PyReleaseGIL() { PyEval_RestoreThread(saved_state_); }
108
109 private:
110 PyThreadState* saved_state_;
111 ARROW_DISALLOW_COPY_AND_ASSIGN(PyReleaseGIL);
112 };
113
114 // A helper to call safely into the Python interpreter from arbitrary C++ code.
115 // The GIL is acquired, and the current thread's error status is preserved.
116 template <typename Function>
117 auto SafeCallIntoPython(Function&& func) -> decltype(func()) {
118 PyAcquireGIL lock;
119 PyObject* exc_type;
120 PyObject* exc_value;
121 PyObject* exc_traceback;
122 PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
123 auto maybe_status = std::forward<Function>(func)();
124 // If the return Status is a "Python error", the current Python error status
125 // describes the error and shouldn't be clobbered.
126 if (!IsPyError(::arrow::internal::GenericToStatus(maybe_status)) &&
127 exc_type != NULLPTR) {
128 PyErr_Restore(exc_type, exc_value, exc_traceback);
129 }
130 return maybe_status;
131 }
132
133 // A RAII primitive that DECREFs the underlying PyObject* when it
134 // goes out of scope.
135 class ARROW_PYTHON_EXPORT OwnedRef {
136 public:
OwnedRef()137 OwnedRef() : obj_(NULLPTR) {}
OwnedRef(OwnedRef && other)138 OwnedRef(OwnedRef&& other) : OwnedRef(other.detach()) {}
OwnedRef(PyObject * obj)139 explicit OwnedRef(PyObject* obj) : obj_(obj) {}
140
141 OwnedRef& operator=(OwnedRef&& other) {
142 obj_ = other.detach();
143 return *this;
144 }
145
~OwnedRef()146 ~OwnedRef() { reset(); }
147
reset(PyObject * obj)148 void reset(PyObject* obj) {
149 Py_XDECREF(obj_);
150 obj_ = obj;
151 }
152
reset()153 void reset() { reset(NULLPTR); }
154
detach()155 PyObject* detach() {
156 PyObject* result = obj_;
157 obj_ = NULLPTR;
158 return result;
159 }
160
obj()161 PyObject* obj() const { return obj_; }
162
ref()163 PyObject** ref() { return &obj_; }
164
165 operator bool() const { return obj_ != NULLPTR; }
166
167 private:
168 ARROW_DISALLOW_COPY_AND_ASSIGN(OwnedRef);
169
170 PyObject* obj_;
171 };
172
173 // Same as OwnedRef, but ensures the GIL is taken when it goes out of scope.
174 // This is for situations where the GIL is not always known to be held
175 // (e.g. if it is released in the middle of a function for performance reasons)
176 class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
177 public:
OwnedRefNoGIL()178 OwnedRefNoGIL() : OwnedRef() {}
OwnedRefNoGIL(OwnedRefNoGIL && other)179 OwnedRefNoGIL(OwnedRefNoGIL&& other) : OwnedRef(other.detach()) {}
OwnedRefNoGIL(PyObject * obj)180 explicit OwnedRefNoGIL(PyObject* obj) : OwnedRef(obj) {}
181
~OwnedRefNoGIL()182 ~OwnedRefNoGIL() {
183 PyAcquireGIL lock;
184 reset();
185 }
186 };
187
188 // A temporary conversion of a Python object to a bytes area.
189 struct PyBytesView {
190 const char* bytes;
191 Py_ssize_t size;
192 bool is_utf8;
193
194 static Result<PyBytesView> FromString(PyObject* obj, bool check_utf8 = false) {
195 PyBytesView self;
196 ARROW_RETURN_NOT_OK(self.ParseString(obj, check_utf8));
197 return std::move(self);
198 }
199
FromUnicodePyBytesView200 static Result<PyBytesView> FromUnicode(PyObject* obj) {
201 PyBytesView self;
202 ARROW_RETURN_NOT_OK(self.ParseUnicode(obj));
203 return std::move(self);
204 }
205
FromBinaryPyBytesView206 static Result<PyBytesView> FromBinary(PyObject* obj) {
207 PyBytesView self;
208 ARROW_RETURN_NOT_OK(self.ParseBinary(obj));
209 return std::move(self);
210 }
211
212 // View the given Python object as string-like, i.e. str or (utf8) bytes
213 Status ParseString(PyObject* obj, bool check_utf8 = false) {
214 if (PyUnicode_Check(obj)) {
215 return ParseUnicode(obj);
216 } else {
217 ARROW_RETURN_NOT_OK(ParseBinary(obj));
218 if (check_utf8) {
219 // Check the bytes are utf8 utf-8
220 OwnedRef decoded(PyUnicode_FromStringAndSize(bytes, size));
221 if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
222 is_utf8 = true;
223 } else {
224 PyErr_Clear();
225 is_utf8 = false;
226 }
227 }
228 return Status::OK();
229 }
230 }
231
232 // View the given Python object as unicode string
ParseUnicodePyBytesView233 Status ParseUnicode(PyObject* obj) {
234 // The utf-8 representation is cached on the unicode object
235 bytes = PyUnicode_AsUTF8AndSize(obj, &size);
236 RETURN_IF_PYERROR();
237 is_utf8 = true;
238 return Status::OK();
239 }
240
241 // View the given Python object as binary-like, i.e. bytes
ParseBinaryPyBytesView242 Status ParseBinary(PyObject* obj) {
243 if (PyBytes_Check(obj)) {
244 bytes = PyBytes_AS_STRING(obj);
245 size = PyBytes_GET_SIZE(obj);
246 is_utf8 = false;
247 } else if (PyByteArray_Check(obj)) {
248 bytes = PyByteArray_AS_STRING(obj);
249 size = PyByteArray_GET_SIZE(obj);
250 is_utf8 = false;
251 } else if (PyMemoryView_Check(obj)) {
252 PyObject* ref = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C');
253 RETURN_IF_PYERROR();
254 Py_buffer* buffer = PyMemoryView_GET_BUFFER(ref);
255 bytes = reinterpret_cast<const char*>(buffer->buf);
256 size = buffer->len;
257 is_utf8 = false;
258 } else {
259 return Status::TypeError("Expected bytes, got a '", Py_TYPE(obj)->tp_name,
260 "' object");
261 }
262 return Status::OK();
263 }
264
265 protected:
266 OwnedRef ref;
267 };
268
269 class ARROW_PYTHON_EXPORT PyBuffer : public Buffer {
270 public:
271 /// While memoryview objects support multi-dimensional buffers, PyBuffer only supports
272 /// one-dimensional byte buffers.
273 ~PyBuffer();
274
275 static Result<std::shared_ptr<Buffer>> FromPyObject(PyObject* obj);
276
277 private:
278 PyBuffer();
279 Status Init(PyObject*);
280
281 Py_buffer py_buf_;
282 };
283
284 // Return the common PyArrow memory pool
285 ARROW_PYTHON_EXPORT void set_default_memory_pool(MemoryPool* pool);
286 ARROW_PYTHON_EXPORT MemoryPool* get_memory_pool();
287
288 // This is annoying: because C++11 does not allow implicit conversion of string
289 // literals to non-const char*, we need to go through some gymnastics to use
290 // PyObject_CallMethod without a lot of pain (its arguments are non-const
291 // char*)
292 template <typename... ArgTypes>
cpp_PyObject_CallMethod(PyObject * obj,const char * method_name,const char * argspec,ArgTypes...args)293 static inline PyObject* cpp_PyObject_CallMethod(PyObject* obj, const char* method_name,
294 const char* argspec, ArgTypes... args) {
295 return PyObject_CallMethod(obj, const_cast<char*>(method_name),
296 const_cast<char*>(argspec), args...);
297 }
298
299 } // namespace py
300 } // namespace arrow
301