1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #pragma once
19 
20 #include <memory>
21 #include <utility>
22 
23 #include "arrow/buffer.h"
24 #include "arrow/python/pyarrow.h"
25 #include "arrow/python/visibility.h"
26 #include "arrow/result.h"
27 #include "arrow/util/macros.h"
28 
29 namespace arrow {
30 
31 class MemoryPool;
32 template <class T>
33 class Result;
34 
35 namespace py {
36 
37 // Convert current Python error to a Status.  The Python error state is cleared
38 // and can be restored with RestorePyError().
39 ARROW_PYTHON_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
40 // Query whether the given Status is a Python error (as wrapped by ConvertPyError()).
41 ARROW_PYTHON_EXPORT bool IsPyError(const Status& status);
42 // Restore a Python error wrapped in a Status.
43 ARROW_PYTHON_EXPORT void RestorePyError(const Status& status);
44 
45 // Catch a pending Python exception and return the corresponding Status.
46 // If no exception is pending, Status::OK() is returned.
47 inline Status CheckPyError(StatusCode code = StatusCode::UnknownError) {
48   if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
49     return Status::OK();
50   } else {
51     return ConvertPyError(code);
52   }
53 }
54 
55 #define RETURN_IF_PYERROR() ARROW_RETURN_NOT_OK(CheckPyError())
56 
57 #define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE))
58 
59 // For Cython, as you can't define template C++ functions in Cython, only use them.
60 // This function can set a Python exception.  It assumes that T has a (cheap)
61 // default constructor.
62 template <class T>
GetResultValue(Result<T> result)63 T GetResultValue(Result<T> result) {
64   if (ARROW_PREDICT_TRUE(result.ok())) {
65     return *std::move(result);
66   } else {
67     int r = internal::check_status(result.status());  // takes the GIL
68     assert(r == -1);                                  // should have errored out
69     ARROW_UNUSED(r);
70     return {};
71   }
72 }
73 
74 // A RAII-style helper that ensures the GIL is acquired inside a lexical block.
75 class ARROW_PYTHON_EXPORT PyAcquireGIL {
76  public:
PyAcquireGIL()77   PyAcquireGIL() : acquired_gil_(false) { acquire(); }
78 
~PyAcquireGIL()79   ~PyAcquireGIL() { release(); }
80 
acquire()81   void acquire() {
82     if (!acquired_gil_) {
83       state_ = PyGILState_Ensure();
84       acquired_gil_ = true;
85     }
86   }
87 
88   // idempotent
release()89   void release() {
90     if (acquired_gil_) {
91       PyGILState_Release(state_);
92       acquired_gil_ = false;
93     }
94   }
95 
96  private:
97   bool acquired_gil_;
98   PyGILState_STATE state_;
99   ARROW_DISALLOW_COPY_AND_ASSIGN(PyAcquireGIL);
100 };
101 
102 // A RAII-style helper that releases the GIL until the end of a lexical block
103 class ARROW_PYTHON_EXPORT PyReleaseGIL {
104  public:
PyReleaseGIL()105   PyReleaseGIL() { saved_state_ = PyEval_SaveThread(); }
106 
~PyReleaseGIL()107   ~PyReleaseGIL() { PyEval_RestoreThread(saved_state_); }
108 
109  private:
110   PyThreadState* saved_state_;
111   ARROW_DISALLOW_COPY_AND_ASSIGN(PyReleaseGIL);
112 };
113 
114 // A helper to call safely into the Python interpreter from arbitrary C++ code.
115 // The GIL is acquired, and the current thread's error status is preserved.
116 template <typename Function>
117 auto SafeCallIntoPython(Function&& func) -> decltype(func()) {
118   PyAcquireGIL lock;
119   PyObject* exc_type;
120   PyObject* exc_value;
121   PyObject* exc_traceback;
122   PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
123   auto maybe_status = std::forward<Function>(func)();
124   // If the return Status is a "Python error", the current Python error status
125   // describes the error and shouldn't be clobbered.
126   if (!IsPyError(::arrow::internal::GenericToStatus(maybe_status)) &&
127       exc_type != NULLPTR) {
128     PyErr_Restore(exc_type, exc_value, exc_traceback);
129   }
130   return maybe_status;
131 }
132 
133 // A RAII primitive that DECREFs the underlying PyObject* when it
134 // goes out of scope.
135 class ARROW_PYTHON_EXPORT OwnedRef {
136  public:
OwnedRef()137   OwnedRef() : obj_(NULLPTR) {}
OwnedRef(OwnedRef && other)138   OwnedRef(OwnedRef&& other) : OwnedRef(other.detach()) {}
OwnedRef(PyObject * obj)139   explicit OwnedRef(PyObject* obj) : obj_(obj) {}
140 
141   OwnedRef& operator=(OwnedRef&& other) {
142     obj_ = other.detach();
143     return *this;
144   }
145 
~OwnedRef()146   ~OwnedRef() { reset(); }
147 
reset(PyObject * obj)148   void reset(PyObject* obj) {
149     Py_XDECREF(obj_);
150     obj_ = obj;
151   }
152 
reset()153   void reset() { reset(NULLPTR); }
154 
detach()155   PyObject* detach() {
156     PyObject* result = obj_;
157     obj_ = NULLPTR;
158     return result;
159   }
160 
obj()161   PyObject* obj() const { return obj_; }
162 
ref()163   PyObject** ref() { return &obj_; }
164 
165   operator bool() const { return obj_ != NULLPTR; }
166 
167  private:
168   ARROW_DISALLOW_COPY_AND_ASSIGN(OwnedRef);
169 
170   PyObject* obj_;
171 };
172 
173 // Same as OwnedRef, but ensures the GIL is taken when it goes out of scope.
174 // This is for situations where the GIL is not always known to be held
175 // (e.g. if it is released in the middle of a function for performance reasons)
176 class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
177  public:
OwnedRefNoGIL()178   OwnedRefNoGIL() : OwnedRef() {}
OwnedRefNoGIL(OwnedRefNoGIL && other)179   OwnedRefNoGIL(OwnedRefNoGIL&& other) : OwnedRef(other.detach()) {}
OwnedRefNoGIL(PyObject * obj)180   explicit OwnedRefNoGIL(PyObject* obj) : OwnedRef(obj) {}
181 
~OwnedRefNoGIL()182   ~OwnedRefNoGIL() {
183     PyAcquireGIL lock;
184     reset();
185   }
186 };
187 
188 // A temporary conversion of a Python object to a bytes area.
189 struct PyBytesView {
190   const char* bytes;
191   Py_ssize_t size;
192   bool is_utf8;
193 
194   static Result<PyBytesView> FromString(PyObject* obj, bool check_utf8 = false) {
195     PyBytesView self;
196     ARROW_RETURN_NOT_OK(self.ParseString(obj, check_utf8));
197     return std::move(self);
198   }
199 
FromUnicodePyBytesView200   static Result<PyBytesView> FromUnicode(PyObject* obj) {
201     PyBytesView self;
202     ARROW_RETURN_NOT_OK(self.ParseUnicode(obj));
203     return std::move(self);
204   }
205 
FromBinaryPyBytesView206   static Result<PyBytesView> FromBinary(PyObject* obj) {
207     PyBytesView self;
208     ARROW_RETURN_NOT_OK(self.ParseBinary(obj));
209     return std::move(self);
210   }
211 
212   // View the given Python object as string-like, i.e. str or (utf8) bytes
213   Status ParseString(PyObject* obj, bool check_utf8 = false) {
214     if (PyUnicode_Check(obj)) {
215       return ParseUnicode(obj);
216     } else {
217       ARROW_RETURN_NOT_OK(ParseBinary(obj));
218       if (check_utf8) {
219         // Check the bytes are utf8 utf-8
220         OwnedRef decoded(PyUnicode_FromStringAndSize(bytes, size));
221         if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
222           is_utf8 = true;
223         } else {
224           PyErr_Clear();
225           is_utf8 = false;
226         }
227       }
228       return Status::OK();
229     }
230   }
231 
232   // View the given Python object as unicode string
ParseUnicodePyBytesView233   Status ParseUnicode(PyObject* obj) {
234     // The utf-8 representation is cached on the unicode object
235     bytes = PyUnicode_AsUTF8AndSize(obj, &size);
236     RETURN_IF_PYERROR();
237     is_utf8 = true;
238     return Status::OK();
239   }
240 
241   // View the given Python object as binary-like, i.e. bytes
ParseBinaryPyBytesView242   Status ParseBinary(PyObject* obj) {
243     if (PyBytes_Check(obj)) {
244       bytes = PyBytes_AS_STRING(obj);
245       size = PyBytes_GET_SIZE(obj);
246       is_utf8 = false;
247     } else if (PyByteArray_Check(obj)) {
248       bytes = PyByteArray_AS_STRING(obj);
249       size = PyByteArray_GET_SIZE(obj);
250       is_utf8 = false;
251     } else if (PyMemoryView_Check(obj)) {
252       PyObject* ref = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C');
253       RETURN_IF_PYERROR();
254       Py_buffer* buffer = PyMemoryView_GET_BUFFER(ref);
255       bytes = reinterpret_cast<const char*>(buffer->buf);
256       size = buffer->len;
257       is_utf8 = false;
258     } else {
259       return Status::TypeError("Expected bytes, got a '", Py_TYPE(obj)->tp_name,
260                                "' object");
261     }
262     return Status::OK();
263   }
264 
265  protected:
266   OwnedRef ref;
267 };
268 
269 class ARROW_PYTHON_EXPORT PyBuffer : public Buffer {
270  public:
271   /// While memoryview objects support multi-dimensional buffers, PyBuffer only supports
272   /// one-dimensional byte buffers.
273   ~PyBuffer();
274 
275   static Result<std::shared_ptr<Buffer>> FromPyObject(PyObject* obj);
276 
277  private:
278   PyBuffer();
279   Status Init(PyObject*);
280 
281   Py_buffer py_buf_;
282 };
283 
284 // Return the common PyArrow memory pool
285 ARROW_PYTHON_EXPORT void set_default_memory_pool(MemoryPool* pool);
286 ARROW_PYTHON_EXPORT MemoryPool* get_memory_pool();
287 
288 // This is annoying: because C++11 does not allow implicit conversion of string
289 // literals to non-const char*, we need to go through some gymnastics to use
290 // PyObject_CallMethod without a lot of pain (its arguments are non-const
291 // char*)
292 template <typename... ArgTypes>
cpp_PyObject_CallMethod(PyObject * obj,const char * method_name,const char * argspec,ArgTypes...args)293 static inline PyObject* cpp_PyObject_CallMethod(PyObject* obj, const char* method_name,
294                                                 const char* argspec, ArgTypes... args) {
295   return PyObject_CallMethod(obj, const_cast<char*>(method_name),
296                              const_cast<char*>(argspec), args...);
297 }
298 
299 }  // namespace py
300 }  // namespace arrow
301