1 // Copyright (C) 2014 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #ifndef DLIB_PYTHON_NuMPY_IMAGE_Hh_ 4 #define DLIB_PYTHON_NuMPY_IMAGE_Hh_ 5 6 #include <dlib/algs.h> 7 #include <dlib/error.h> 8 #include <dlib/matrix.h> 9 #include <dlib/pixel.h> 10 #include <string> 11 #include <memory> 12 #include <pybind11/numpy.h> 13 #include <pybind11/pybind11.h> 14 #include <dlib/image_transforms/assign_image.h> 15 #include <stdint.h> 16 #include <type_traits> 17 18 namespace py = pybind11; 19 20 21 namespace dlib 22 { 23 24 // ---------------------------------------------------------------------------------------- 25 26 template < 27 typename pixel_type 28 > is_image(const py::array & img)29 bool is_image ( 30 const py::array& img 31 ) 32 /*! 33 ensures 34 - returns true if and only if the given python numpy array can reasonably be 35 interpreted as an image containing pixel_type pixels. 36 !*/ 37 { 38 using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type; 39 const size_t expected_channels = pixel_traits<pixel_type>::num; 40 41 const bool has_correct_number_of_dims = (img.ndim()==2 && expected_channels==1) || 42 (img.ndim()==3 && img.shape(2)==expected_channels); 43 44 return img.dtype().kind() == py::dtype::of<basic_pixel_type>().kind() && 45 img.itemsize() == sizeof(basic_pixel_type) && 46 has_correct_number_of_dims; 47 } 48 49 // ---------------------------------------------------------------------------------------- 50 51 template < 52 typename pixel_type 53 > assert_correct_num_channels_in_image(const py::array & img)54 void assert_correct_num_channels_in_image ( 55 const py::array& img 56 ) 57 { 58 const size_t expected_channels = pixel_traits<pixel_type>::num; 59 if (expected_channels == 1) 60 { 61 if (!(img.ndim() == 2 || (img.ndim()==3&&img.shape(2)==1))) 62 throw dlib::error("Expected a 2D numpy array, but instead got one with " + std::to_string(img.ndim()) + " dimensions."); 63 } 64 else 65 { 66 if (img.ndim() != 3) 67 { 68 throw dlib::error("Expected a numpy array with 3 dimensions, but instead got one with " + std::to_string(img.ndim()) + " dimensions."); 69 } 70 else if (img.shape(2) != expected_channels) 71 { 72 if (pixel_traits<pixel_type>::rgb) 73 throw dlib::error("Expected a RGB image with " + std::to_string(expected_channels) + " channels but got an image with " + std::to_string(img.shape(2)) + " channels."); 74 else 75 throw dlib::error("Expected an image with " + std::to_string(expected_channels) + " channels but got an image with " + std::to_string(img.shape(2)) + " channels."); 76 } 77 } 78 } 79 80 // ---------------------------------------------------------------------------------------- 81 82 template < 83 typename pixel_type 84 > assert_is_image(const py::array & obj)85 void assert_is_image ( 86 const py::array& obj 87 ) 88 { 89 if (!is_image<pixel_type>(obj)) 90 { 91 assert_correct_num_channels_in_image<pixel_type>(obj); 92 93 using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type; 94 const char expected_type = py::dtype::of<basic_pixel_type>().kind(); 95 const char got_type = obj.dtype().kind(); 96 97 const size_t expected_size = sizeof(basic_pixel_type); 98 const size_t got_size = obj.itemsize(); 99 100 auto toname = [](char type, size_t size) { 101 if (type == 'i' && size == 1) return "int8"; 102 else if (type == 'i' && size == 2) return "int16"; 103 else if (type == 'i' && size == 4) return "int32"; 104 else if (type == 'i' && size == 8) return "int64"; 105 else if (type == 'u' && size == 1) return "uint8"; 106 else if (type == 'u' && size == 2) return "uint16"; 107 else if (type == 'u' && size == 4) return "uint32"; 108 else if (type == 'u' && size == 8) return "uint64"; 109 else if (type == 'f' && size == 4) return "float32"; 110 else if (type == 'd' && size == 8) return "float64"; 111 else DLIB_CASSERT(false, "unknown type"); 112 }; 113 114 throw dlib::error("Expected numpy array with elements of type " + std::string(toname(expected_type,expected_size)) + " but got " + toname(got_type, got_size) + "."); 115 } 116 } 117 118 // ---------------------------------------------------------------------------------------- 119 120 template < 121 typename pixel_type 122 > 123 class numpy_image : public py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type, py::array::c_style> 124 { 125 /*! 126 REQUIREMENTS ON pixel_type 127 - is a dlib pixel type, this just means that dlib::pixel_traits<pixel_type> 128 is defined. 129 130 WHAT THIS OBJECT REPRESENTS 131 This is an image object that implements dlib's generic image interface and 132 is backed by a numpy array. It therefore is easily interchanged with 133 python since there is no copying. It is functionally just a pybind11 134 array_t object with the additional routines needed to conform to dlib's 135 generic image API. It also includes appropriate runtime checks to make 136 sure that the numpy array is always typed and sized appropriately relative 137 to the supplied pixel_type. 138 !*/ 139 public: 140 141 numpy_image() = default; 142 numpy_image(const py::array & img)143 numpy_image( 144 const py::array& img 145 ) : py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type, py::array::c_style>(img) 146 { 147 assert_is_image<pixel_type>(img); 148 } 149 numpy_image(long rows,long cols)150 numpy_image ( 151 long rows, 152 long cols 153 ) 154 { 155 set_size(rows,cols); 156 } 157 numpy_image(const py::object & img)158 numpy_image ( 159 const py::object& img 160 ) : numpy_image(img.cast<py::array>()) {} 161 162 numpy_image( 163 const numpy_image& img 164 ) = default; 165 166 numpy_image& operator= ( 167 const py::object& rhs 168 ) 169 { 170 *this = numpy_image(rhs); 171 return *this; 172 } 173 174 numpy_image& operator= ( 175 const py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type, py::array::c_style>& rhs 176 ) 177 { 178 *this = numpy_image(rhs); 179 return *this; 180 } 181 182 numpy_image& operator= ( 183 const numpy_image& rhs 184 ) = default; 185 186 template <long NR, long NC> numpy_image(matrix<pixel_type,NR,NC> && rhs)187 numpy_image ( 188 matrix<pixel_type,NR,NC>&& rhs 189 ) : numpy_image(convert_to_numpy(std::move(rhs))) {} 190 191 template <long NR, long NC> 192 numpy_image& operator= ( 193 matrix<pixel_type,NR,NC>&& rhs 194 ) 195 { 196 *this = numpy_image(rhs); 197 return *this; 198 } 199 set_size(size_t rows,size_t cols)200 void set_size(size_t rows, size_t cols) 201 { 202 using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type; 203 constexpr size_t channels = pixel_traits<pixel_type>::num; 204 if (channels != 1) 205 *this = py::array_t<basic_pixel_type, py::array::c_style>({rows, cols, channels}); 206 else 207 *this = py::array_t<basic_pixel_type, py::array::c_style>({rows, cols}); 208 } 209 210 private: convert_to_numpy(matrix<pixel_type> && img)211 static py::array_t<typename pixel_traits<pixel_type>::basic_pixel_type, py::array::c_style> convert_to_numpy(matrix<pixel_type>&& img) 212 { 213 using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type; 214 const size_t dtype_size = sizeof(basic_pixel_type); 215 const auto rows = static_cast<const size_t>(num_rows(img)); 216 const auto cols = static_cast<const size_t>(num_columns(img)); 217 const size_t channels = pixel_traits<pixel_type>::num; 218 const size_t image_size = dtype_size * rows * cols * channels; 219 220 std::unique_ptr<pixel_type[]> arr_ptr = img.steal_memory(); 221 basic_pixel_type* arr = (basic_pixel_type *) arr_ptr.release(); 222 223 if (channels == 1) 224 { 225 return pybind11::template array_t<basic_pixel_type, py::array::c_style>( 226 {rows, cols}, // shape 227 {dtype_size*cols, dtype_size}, // strides 228 arr, // pointer 229 pybind11::capsule{ arr, [](void *arr_p) { delete[] reinterpret_cast<basic_pixel_type*>(arr_p); } } 230 ); 231 } 232 else 233 { 234 return pybind11::template array_t<basic_pixel_type, py::array::c_style>( 235 {rows, cols, channels}, // shape 236 {dtype_size * cols * channels, dtype_size * channels, dtype_size}, // strides 237 arr, // pointer 238 pybind11::capsule{ arr, [](void *arr_p) { delete[] reinterpret_cast<basic_pixel_type*>(arr_p); } } 239 ); 240 } 241 } 242 243 }; 244 245 // ---------------------------------------------------------------------------------------- 246 247 template <typename pixel_type> assign_image(numpy_image<pixel_type> & dest,const py::array & src)248 void assign_image ( 249 numpy_image<pixel_type>& dest, 250 const py::array& src 251 ) 252 { 253 if (is_image<pixel_type>(src)) dest = src; 254 else if (is_image<uint8_t>(src)) assign_image(dest, numpy_image<uint8_t>(src)); 255 else if (is_image<uint16_t>(src)) assign_image(dest, numpy_image<uint16_t>(src)); 256 else if (is_image<uint32_t>(src)) assign_image(dest, numpy_image<uint32_t>(src)); 257 else if (is_image<uint64_t>(src)) assign_image(dest, numpy_image<uint64_t>(src)); 258 else if (is_image<int8_t>(src)) assign_image(dest, numpy_image<int8_t>(src)); 259 else if (is_image<int16_t>(src)) assign_image(dest, numpy_image<int16_t>(src)); 260 else if (is_image<int32_t>(src)) assign_image(dest, numpy_image<int32_t>(src)); 261 else if (is_image<int64_t>(src)) assign_image(dest, numpy_image<int64_t>(src)); 262 else if (is_image<float>(src)) assign_image(dest, numpy_image<float>(src)); 263 else if (is_image<double>(src)) assign_image(dest, numpy_image<double>(src)); 264 else if (is_image<rgb_pixel>(src)) assign_image(dest, numpy_image<rgb_pixel>(src)); 265 else DLIB_CASSERT(false, "Unsupported pixel type used in assign_image()."); 266 } 267 268 // ---------------------------------------------------------------------------------------- 269 // ---------------------------------------------------------------------------------------- 270 // BORING IMPLEMENTATION STUFF 271 // ---------------------------------------------------------------------------------------- 272 // ---------------------------------------------------------------------------------------- 273 274 template <typename pixel_type> num_rows(const numpy_image<pixel_type> & img)275 long num_rows(const numpy_image<pixel_type>& img) 276 { 277 if (img.size()==0) 278 return 0; 279 280 assert_correct_num_channels_in_image<pixel_type>(img); 281 return img.shape(0); 282 } 283 284 template <typename pixel_type> num_columns(const numpy_image<pixel_type> & img)285 long num_columns(const numpy_image<pixel_type>& img) 286 { 287 if (img.size()==0) 288 return 0; 289 290 assert_correct_num_channels_in_image<pixel_type>(img); 291 return img.shape(1); 292 } 293 294 template <typename pixel_type> set_image_size(numpy_image<pixel_type> & img,size_t rows,size_t cols)295 void set_image_size(numpy_image<pixel_type>& img, size_t rows, size_t cols) 296 { 297 img.set_size(rows, cols); 298 } 299 300 template <typename pixel_type> image_data(numpy_image<pixel_type> & img)301 void* image_data(numpy_image<pixel_type>& img) 302 { 303 if (img.size()==0) 304 return 0; 305 306 assert_is_image<pixel_type>(img); 307 return img.mutable_data(0); 308 } 309 310 template <typename pixel_type> image_data(const numpy_image<pixel_type> & img)311 const void* image_data (const numpy_image<pixel_type>& img) 312 { 313 if (img.size()==0) 314 return 0; 315 316 assert_is_image<pixel_type>(img); 317 return img.data(0); 318 } 319 320 template <typename pixel_type> width_step(const numpy_image<pixel_type> & img)321 long width_step (const numpy_image<pixel_type>& img) 322 { 323 if (img.size()==0) 324 return 0; 325 326 assert_correct_num_channels_in_image<pixel_type>(img); 327 using basic_pixel_type = typename pixel_traits<pixel_type>::basic_pixel_type; 328 if (img.ndim()==3 && img.strides(2) != sizeof(basic_pixel_type)) 329 throw dlib::error("The stride of the 3rd dimension (the channel dimension) of the numpy array must be " + std::to_string(sizeof(basic_pixel_type))); 330 if (img.strides(1) != sizeof(pixel_type)) 331 throw dlib::error("The stride of the 2nd dimension (the columns dimension) of the numpy array must be " + std::to_string(sizeof(pixel_type))); 332 333 return img.strides(0); 334 } 335 336 template <typename pixel_type> swap(numpy_image<pixel_type> & a,numpy_image<pixel_type> & b)337 void swap(numpy_image<pixel_type>& a, numpy_image<pixel_type>& b) 338 { 339 std::swap(a,b); 340 } 341 342 343 template <typename T> 344 struct image_traits<numpy_image<T>> 345 { 346 typedef T pixel_type; 347 }; 348 } 349 350 // ---------------------------------------------------------------------------------------- 351 352 namespace pybind11 353 { 354 namespace detail 355 { 356 template <typename pixel_type> struct handle_type_name<dlib::numpy_image<pixel_type>> 357 { 358 using basic_pixel_type = typename dlib::pixel_traits<pixel_type>::basic_pixel_type; 359 360 template <size_t channels> 361 static PYBIND11_DESCR getname(typename std::enable_if<channels==1,int>::type) { 362 return _("numpy.ndarray[(rows,cols),") + npy_format_descriptor<basic_pixel_type>::name() + _("]"); 363 } 364 template <size_t channels> 365 static PYBIND11_DESCR getname(typename std::enable_if<channels!=1,int>::type) { 366 if (channels == 2) 367 return _("numpy.ndarray[(rows,cols,2),") + npy_format_descriptor<basic_pixel_type>::name() + _("]"); 368 else if (channels == 3) 369 return _("numpy.ndarray[(rows,cols,3),") + npy_format_descriptor<basic_pixel_type>::name() + _("]"); 370 else if (channels == 4) 371 return _("numpy.ndarray[(rows,cols,4),") + npy_format_descriptor<basic_pixel_type>::name() + _("]"); 372 } 373 374 static PYBIND11_DESCR name() { 375 constexpr size_t channels = dlib::pixel_traits<pixel_type>::num; 376 // The reason we have to call getname() in this wonky way is because 377 // pybind11 uses a type that records the length of the returned string in 378 // the type. So we have to do this overloading to make the return type 379 // from name() consistent. In C++17 this would be a lot cleaner with 380 // constexpr if, but can't use C++17 yet because of lack of wide support :( 381 return getname<channels>(0); 382 } 383 }; 384 385 template <typename pixel_type> 386 struct pyobject_caster<dlib::numpy_image<pixel_type>> { 387 using type = dlib::numpy_image<pixel_type>; 388 389 bool load(handle src, bool convert) { 390 // If passed a tuple where the first element of the tuple is a valid 391 // numpy_image then bind the numpy_image to that element of the tuple. 392 // We do this because there is a pattern of returning an image and some 393 // associated metadata. This allows the returned tuple from such functions 394 // to also be treated as an image without needing to unpack the first 395 // argument. 396 if (PyTuple_Check(src.ptr()) && PyTuple_Size(src.ptr()) >= 1) 397 src = reinterpret_borrow<py::tuple>(src)[0]; 398 399 if (!type::check_(src)) 400 return false; 401 // stash the output of ensure into a temp variable since assigning it to 402 // value (the member variable created by the PYBIND11_TYPE_CASTER) 403 // apparently causes the return bool value to be ignored? 404 auto temp = type::ensure(src); 405 if (!dlib::is_image<pixel_type>(temp)) 406 return false; 407 value = temp; 408 return static_cast<bool>(value); 409 } 410 411 static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) { 412 return src.inc_ref(); 413 } 414 PYBIND11_TYPE_CASTER(type, handle_type_name<type>::name()); 415 }; 416 } 417 } 418 419 420 // ---------------------------------------------------------------------------------------- 421 422 #endif // DLIB_PYTHON_NuMPY_IMAGE_Hh_ 423 424