1 // Copyright (C) 2014  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #ifndef DLIB_PYTHON_NuMPY_IMAGE_Hh_
4 #define DLIB_PYTHON_NuMPY_IMAGE_Hh_
5 
6 #include <dlib/algs.h>
7 #include <dlib/error.h>
8 #include <dlib/matrix.h>
9 #include <dlib/pixel.h>
10 #include <string>
11 #include <memory>
12 #include <pybind11/numpy.h>
13 #include <pybind11/pybind11.h>
14 #include <dlib/image_transforms/assign_image.h>
15 #include <stdint.h>
16 #include <type_traits>
17 
18 namespace py = pybind11;
19 
20 
21 namespace dlib
22 {
23 
24 // ----------------------------------------------------------------------------------------
25 
26     template <
27         typename pixel_type
28         >
is_image(const py::array & img)29     bool is_image (
30         const py::array& img
31     )
32     /*!
33         ensures
34             - returns true if and only if the given python numpy array can reasonably be
35               interpreted as an image containing pixel_type pixels.
36     !*/
37     {
38         using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
39         const size_t expected_channels = pixel_traits<pixel_type>::num;
40 
41         const bool has_correct_number_of_dims = (img.ndim()==2 && expected_channels==1) ||
42                                                 (img.ndim()==3 && img.shape(2)==expected_channels);
43 
44         return img.dtype().kind() == py::dtype::of<basic_pixel_type>().kind() &&
45                img.itemsize() == sizeof(basic_pixel_type) &&
46                has_correct_number_of_dims;
47     }
48 
49 // ----------------------------------------------------------------------------------------
50 
51     template <
52         typename pixel_type
53         >
assert_correct_num_channels_in_image(const py::array & img)54     void assert_correct_num_channels_in_image (
55         const py::array& img
56     )
57     {
58         const size_t expected_channels = pixel_traits<pixel_type>::num;
59         if (expected_channels == 1)
60         {
61             if (!(img.ndim() == 2 || (img.ndim()==3&&img.shape(2)==1)))
62                 throw dlib::error("Expected a 2D numpy array, but instead got one with " + std::to_string(img.ndim()) + " dimensions.");
63         }
64         else
65         {
66             if (img.ndim() != 3)
67             {
68                 throw dlib::error("Expected a numpy array with 3 dimensions, but instead got one with " + std::to_string(img.ndim()) + " dimensions.");
69             }
70             else if (img.shape(2) != expected_channels)
71             {
72                 if (pixel_traits<pixel_type>::rgb)
73                     throw dlib::error("Expected a RGB image with " + std::to_string(expected_channels) + " channels but got an image with " + std::to_string(img.shape(2)) + " channels.");
74                 else
75                     throw dlib::error("Expected an image with " + std::to_string(expected_channels) + " channels but got an image with " + std::to_string(img.shape(2)) + " channels.");
76             }
77         }
78     }
79 
80 // ----------------------------------------------------------------------------------------
81 
82     template <
83         typename pixel_type
84         >
assert_is_image(const py::array & obj)85     void assert_is_image (
86         const py::array& obj
87     )
88     {
89         if (!is_image<pixel_type>(obj))
90         {
91             assert_correct_num_channels_in_image<pixel_type>(obj);
92 
93             using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
94             const char expected_type = py::dtype::of<basic_pixel_type>().kind();
95             const char got_type = obj.dtype().kind();
96 
97             const size_t expected_size = sizeof(basic_pixel_type);
98             const size_t got_size = obj.itemsize();
99 
100             auto toname = [](char type, size_t size) {
101                 if (type == 'i' && size == 1) return "int8";
102                 else if (type == 'i' && size == 2) return "int16";
103                 else if (type == 'i' && size == 4) return "int32";
104                 else if (type == 'i' && size == 8) return "int64";
105                 else if (type == 'u' && size == 1) return "uint8";
106                 else if (type == 'u' && size == 2) return "uint16";
107                 else if (type == 'u' && size == 4) return "uint32";
108                 else if (type == 'u' && size == 8) return "uint64";
109                 else if (type == 'f' && size == 4) return "float32";
110                 else if (type == 'd' && size == 8) return "float64";
111                 else DLIB_CASSERT(false, "unknown type");
112             };
113 
114             throw dlib::error("Expected numpy array with elements of type " + std::string(toname(expected_type,expected_size)) + " but got " + toname(got_type, got_size) + ".");
115         }
116     }
117 
118 // ----------------------------------------------------------------------------------------
119 
120     template <
121         typename pixel_type
122         >
123     class numpy_image : public py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type, py::array::c_style>
124     {
125         /*!
126             REQUIREMENTS ON pixel_type
127                 - is a dlib pixel type, this just means that dlib::pixel_traits<pixel_type>
128                   is defined.
129 
130             WHAT THIS OBJECT REPRESENTS
131                 This is an image object that implements dlib's generic image interface and
132                 is backed by a numpy array.  It therefore is easily interchanged with
133                 python since there is no copying.  It is functionally just a pybind11
134                 array_t object with the additional routines needed to conform to dlib's
135                 generic image API.  It also includes appropriate runtime checks to make
136                 sure that the numpy array is always typed and sized appropriately relative
137                 to the supplied pixel_type.
138         !*/
139     public:
140 
141         numpy_image() = default;
142 
numpy_image(const py::array & img)143         numpy_image(
144             const py::array& img
145         ) : py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type, py::array::c_style>(img)
146         {
147             assert_is_image<pixel_type>(img);
148         }
149 
numpy_image(long rows,long cols)150         numpy_image (
151             long rows,
152             long cols
153         )
154         {
155             set_size(rows,cols);
156         }
157 
numpy_image(const py::object & img)158         numpy_image (
159             const py::object& img
160         ) : numpy_image(img.cast<py::array>()) {}
161 
162         numpy_image(
163             const numpy_image& img
164         ) = default;
165 
166         numpy_image& operator= (
167             const py::object& rhs
168         )
169         {
170             *this = numpy_image(rhs);
171             return *this;
172         }
173 
174         numpy_image& operator= (
175             const py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type, py::array::c_style>& rhs
176         )
177         {
178             *this = numpy_image(rhs);
179             return *this;
180         }
181 
182         numpy_image& operator= (
183             const numpy_image& rhs
184         ) = default;
185 
186         template <long NR, long NC>
numpy_image(matrix<pixel_type,NR,NC> && rhs)187         numpy_image (
188             matrix<pixel_type,NR,NC>&& rhs
189         ) : numpy_image(convert_to_numpy(std::move(rhs))) {}
190 
191         template <long NR, long NC>
192         numpy_image& operator= (
193             matrix<pixel_type,NR,NC>&& rhs
194         )
195         {
196             *this = numpy_image(rhs);
197             return *this;
198         }
199 
set_size(size_t rows,size_t cols)200         void set_size(size_t rows, size_t cols)
201         {
202             using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
203             constexpr size_t channels = pixel_traits<pixel_type>::num;
204             if (channels != 1)
205                 *this = py::array_t<basic_pixel_type, py::array::c_style>({rows, cols, channels});
206             else
207                 *this = py::array_t<basic_pixel_type, py::array::c_style>({rows, cols});
208         }
209 
210     private:
convert_to_numpy(matrix<pixel_type> && img)211         static py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type, py::array::c_style> convert_to_numpy(matrix<pixel_type>&& img)
212         {
213             using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
214             const size_t dtype_size = sizeof(basic_pixel_type);
215             const auto rows = static_cast<const size_t>(num_rows(img));
216             const auto cols = static_cast<const size_t>(num_columns(img));
217             const size_t channels = pixel_traits<pixel_type>::num;
218             const size_t image_size = dtype_size * rows * cols * channels;
219 
220             std::unique_ptr<pixel_type[]> arr_ptr = img.steal_memory();
221             basic_pixel_type* arr = (basic_pixel_type *) arr_ptr.release();
222 
223             if (channels == 1)
224             {
225                 return pybind11::template array_t<basic_pixel_type, py::array::c_style>(
226                     {rows, cols},                                                       // shape
227                     {dtype_size*cols, dtype_size},                                      // strides
228                     arr,                                                                // pointer
229                     pybind11::capsule{ arr, [](void *arr_p) { delete[] reinterpret_cast<basic_pixel_type*>(arr_p); } }
230                 );
231             }
232             else
233             {
234                 return pybind11::template array_t<basic_pixel_type, py::array::c_style>(
235                     {rows, cols, channels},                                                     // shape
236                     {dtype_size * cols * channels, dtype_size * channels, dtype_size},          // strides
237                     arr,                                                                        // pointer
238                     pybind11::capsule{ arr, [](void *arr_p) { delete[] reinterpret_cast<basic_pixel_type*>(arr_p); } }
239                 );
240             }
241         }
242 
243     };
244 
245 // ----------------------------------------------------------------------------------------
246 
247     template <typename pixel_type>
assign_image(numpy_image<pixel_type> & dest,const py::array & src)248     void assign_image (
249         numpy_image<pixel_type>& dest,
250         const py::array& src
251     )
252     {
253         if (is_image<pixel_type>(src))     dest = src;
254         else if (is_image<uint8_t>(src))   assign_image(dest, numpy_image<uint8_t>(src));
255         else if (is_image<uint16_t>(src))  assign_image(dest, numpy_image<uint16_t>(src));
256         else if (is_image<uint32_t>(src))  assign_image(dest, numpy_image<uint32_t>(src));
257         else if (is_image<uint64_t>(src))  assign_image(dest, numpy_image<uint64_t>(src));
258         else if (is_image<int8_t>(src))    assign_image(dest, numpy_image<int8_t>(src));
259         else if (is_image<int16_t>(src))   assign_image(dest, numpy_image<int16_t>(src));
260         else if (is_image<int32_t>(src))   assign_image(dest, numpy_image<int32_t>(src));
261         else if (is_image<int64_t>(src))   assign_image(dest, numpy_image<int64_t>(src));
262         else if (is_image<float>(src))     assign_image(dest, numpy_image<float>(src));
263         else if (is_image<double>(src))    assign_image(dest, numpy_image<double>(src));
264         else if (is_image<rgb_pixel>(src)) assign_image(dest, numpy_image<rgb_pixel>(src));
265         else DLIB_CASSERT(false, "Unsupported pixel type used in assign_image().");
266     }
267 
268 // ----------------------------------------------------------------------------------------
269 // ----------------------------------------------------------------------------------------
270 //                          BORING IMPLEMENTATION STUFF
271 // ----------------------------------------------------------------------------------------
272 // ----------------------------------------------------------------------------------------
273 
274     template <typename pixel_type>
num_rows(const numpy_image<pixel_type> & img)275     long num_rows(const numpy_image<pixel_type>& img)
276     {
277         if (img.size()==0)
278             return 0;
279 
280         assert_correct_num_channels_in_image<pixel_type>(img);
281         return img.shape(0);
282     }
283 
284     template <typename pixel_type>
num_columns(const numpy_image<pixel_type> & img)285     long num_columns(const numpy_image<pixel_type>& img)
286     {
287         if (img.size()==0)
288             return 0;
289 
290         assert_correct_num_channels_in_image<pixel_type>(img);
291         return img.shape(1);
292     }
293 
294     template <typename pixel_type>
set_image_size(numpy_image<pixel_type> & img,size_t rows,size_t cols)295     void set_image_size(numpy_image<pixel_type>& img, size_t rows, size_t cols)
296     {
297         img.set_size(rows, cols);
298     }
299 
300     template <typename pixel_type>
image_data(numpy_image<pixel_type> & img)301     void* image_data(numpy_image<pixel_type>& img)
302     {
303         if (img.size()==0)
304             return 0;
305 
306         assert_is_image<pixel_type>(img);
307         return img.mutable_data(0);
308     }
309 
310     template <typename pixel_type>
image_data(const numpy_image<pixel_type> & img)311     const void* image_data (const numpy_image<pixel_type>& img)
312     {
313         if (img.size()==0)
314             return 0;
315 
316         assert_is_image<pixel_type>(img);
317         return img.data(0);
318     }
319 
320     template <typename pixel_type>
width_step(const numpy_image<pixel_type> & img)321     long width_step (const numpy_image<pixel_type>& img)
322     {
323         if (img.size()==0)
324             return 0;
325 
326         assert_correct_num_channels_in_image<pixel_type>(img);
327         using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type;
328         if (img.ndim()==3 && img.strides(2) != sizeof(basic_pixel_type))
329             throw dlib::error("The stride of the 3rd dimension (the channel dimension) of the numpy array must be " + std::to_string(sizeof(basic_pixel_type)));
330         if (img.strides(1) != sizeof(pixel_type))
331             throw dlib::error("The stride of the 2nd dimension (the columns dimension) of the numpy array must be " + std::to_string(sizeof(pixel_type)));
332 
333         return img.strides(0);
334     }
335 
336     template <typename pixel_type>
swap(numpy_image<pixel_type> & a,numpy_image<pixel_type> & b)337     void swap(numpy_image<pixel_type>& a, numpy_image<pixel_type>& b)
338     {
339         std::swap(a,b);
340     }
341 
342 
343     template <typename T>
344     struct image_traits<numpy_image<T>>
345     {
346         typedef T pixel_type;
347     };
348 }
349 
350 // ----------------------------------------------------------------------------------------
351 
352 namespace pybind11
353 {
354     namespace detail
355     {
356         template <typename pixel_type> struct handle_type_name<dlib::numpy_image<pixel_type>>
357         {
358             using basic_pixel_type = typename dlib::pixel_traits<pixel_type>::basic_pixel_type;
359 
360             template <size_t channels>
361             static PYBIND11_DESCR getname(typename std::enable_if<channels==1,int>::type) {
362                 return _("numpy.ndarray[(rows,cols),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
363             }
364             template <size_t channels>
365             static PYBIND11_DESCR getname(typename std::enable_if<channels!=1,int>::type) {
366                 if (channels == 2)
367                     return _("numpy.ndarray[(rows,cols,2),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
368                 else if (channels == 3)
369                     return _("numpy.ndarray[(rows,cols,3),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
370                 else if (channels == 4)
371                     return _("numpy.ndarray[(rows,cols,4),") + npy_format_descriptor<basic_pixel_type>::name() + _("]");
372             }
373 
374             static PYBIND11_DESCR name() {
375                 constexpr size_t channels = dlib::pixel_traits<pixel_type>::num;
376                 // The reason we have to call getname() in this wonky way is because
377                 // pybind11 uses a type that records the length of the returned string in
378                 // the type.  So we have to do this overloading to make the return type
379                 // from name() consistent.  In C++17 this would be a lot cleaner with
380                 // constexpr if, but can't use C++17 yet because of lack of wide support  :(
381                 return getname<channels>(0);
382             }
383         };
384 
385         template <typename pixel_type>
386         struct pyobject_caster<dlib::numpy_image<pixel_type>> {
387             using type = dlib::numpy_image<pixel_type>;
388 
389             bool load(handle src, bool convert) {
390                 // If passed a tuple where the first element of the tuple is a valid
391                 // numpy_image then bind the numpy_image to that element of the tuple.
392                 // We do this because there is a pattern of returning an image and some
393                 // associated metadata.  This allows the returned tuple from such functions
394                 // to also be treated as an image without needing to unpack the first
395                 // argument.
396                 if (PyTuple_Check(src.ptr()) && PyTuple_Size(src.ptr()) >= 1)
397                     src = reinterpret_borrow<py::tuple>(src)[0];
398 
399                 if (!type::check_(src))
400                     return false;
401                 // stash the output of ensure into a temp variable since assigning it to
402                 // value (the member variable created by the PYBIND11_TYPE_CASTER)
403                 // apparently causes the return bool value to be ignored?
404                 auto temp = type::ensure(src);
405                 if (!dlib::is_image<pixel_type>(temp))
406                     return false;
407                 value = temp;
408                 return static_cast<bool>(value);
409             }
410 
411             static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) {
412                 return src.inc_ref();
413             }
414             PYBIND11_TYPE_CASTER(type, handle_type_name<type>::name());
415         };
416     }
417 }
418 
419 
420 // ----------------------------------------------------------------------------------------
421 
422 #endif // DLIB_PYTHON_NuMPY_IMAGE_Hh_
423 
424