1 #ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
2 $CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --cflags --libs cudart-11.0`&&$0.$X&&rm $0.$X;exit
3 #endif
4 // © Alfredo A. Correa 2019-2020
5
6 #ifndef BOOST_MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_PTR_HPP
7 #define BOOST_MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_PTR_HPP
8
9 #include<cstddef> // nullptr_t
10 #include<iterator> // random_access_iterator_tag
11
12 #include<type_traits> // is_const
13
14 #include "../../cuda/ptr.hpp"
15
16 #include "../../../../detail/memory.hpp"
17
18 #include<cuda_runtime.h> // cudaDeviceSynchronize
19
20 #ifndef _DISABLE_CUDA_SLOW
21 #ifdef NDEBUG
22 #define SLOW deprecated("because it implies a slow element access to GPU memory")
23 #else
24 #define SLOW
25 #endif
26 #else
27 #define SLOW
28 #endif
29
30 #ifndef HD
31 #ifdef __CUDA_ARCH__
32 #define HD __host__ __device__
33 #else
34 #define HD
35 #endif
36 #endif
37
38 namespace boost{
39 namespace serialization{
40 template<class T> class array_wrapper;
41 template<class T, class S> const array_wrapper<T> make_array(T* t, S s);
42 }}
43
44 namespace boost{namespace multi{
45 namespace memory{namespace cuda{
46
47 namespace managed{
48
49 template<typename T, typename Ptr = T*> struct ptr;
50
51 template<typename RawPtr>
52 struct ptr<void const, RawPtr>{
53 using T = void const;
54 using raw_pointer = RawPtr;
55 raw_pointer rp_;
56 template<typename, typename> friend struct ptr;
57 template<class TT> friend ptr<TT> const_pointer_cast(ptr<TT const> const&);
ptrboost::multi::memory::cuda::managed::ptr58 ptr(raw_pointer rp) : rp_{rp}{}
59 public:
60 ptr() = default;
61 ptr(ptr const&) = default;
ptrboost::multi::memory::cuda::managed::ptr62 ptr(std::nullptr_t n) : rp_{n}{}
63 template<class Other, typename = decltype(raw_pointer{std::declval<Other const&>().rp_})>
ptrboost::multi::memory::cuda::managed::ptr64 ptr(Other const& o) : rp_{o.rp_}{}
65 ptr& operator=(ptr const&) = default;
66
67 using pointer = ptr<T>;
68 using element_type = typename std::pointer_traits<raw_pointer>::element_type;
69 using difference_type = void;//typename std::pointer_traits<impl_t>::difference_type;
operator boolboost::multi::memory::cuda::managed::ptr70 explicit operator bool() const{return rp_;}
71 // explicit operator raw_pointer&()&{return rp_;}
operator ==boost::multi::memory::cuda::managed::ptr72 bool operator==(ptr const& other) const{return rp_==other.rp_;}
operator !=boost::multi::memory::cuda::managed::ptr73 bool operator!=(ptr const& other) const{return rp_!=other.rp_;}
to_address(ptr const & p)74 friend ptr to_address(ptr const& p){return p;}
75 void operator*() const = delete;
76 template<class U> using rebind = ptr<U, typename std::pointer_traits<raw_pointer>::template rebind<U>>;
raw_pointer_cast(ptr const & self)77 friend raw_pointer raw_pointer_cast(ptr const& self){return self.rp_;}
78 };
79
80 template<typename RawPtr>
81 struct ptr<void, RawPtr>{
82 using pointer = ptr;
83 using element_type = void;
84 using difference_type = typename std::pointer_traits<RawPtr>::difference_type;
85 protected:
86 using raw_pointer = RawPtr;
87 raw_pointer rp_;
88 private:
ptrboost::multi::memory::cuda::managed::ptr89 ptr(ptr<void const> const& p) : rp_{const_cast<void*>(p.rp_)}{}
90 template<class TT> friend ptr<TT> const_pointer_cast(ptr<TT const> const&);
91 template<class, class> friend struct ptr;
92 template<class TT, class DP> friend class allocator;
93 public:
ptrboost::multi::memory::cuda::managed::ptr94 template<class Other> ptr(ptr<Other> const& p) : rp_{p.rp_}{}
ptrboost::multi::memory::cuda::managed::ptr95 explicit ptr(raw_pointer rp) : rp_{rp}{}
96 ptr() = default;
97 ptr(ptr const& p) = default;
ptrboost::multi::memory::cuda::managed::ptr98 ptr(std::nullptr_t n) : rp_{n}{}
99 template<class Other, typename = decltype(raw_pointer{std::declval<Other const&>().impl_})>
ptrboost::multi::memory::cuda::managed::ptr100 ptr(Other const& o) : rp_{o.rp_}{}
101 ptr& operator=(ptr const&) = default;
operator ==boost::multi::memory::cuda::managed::ptr102 bool operator==(ptr const& other) const{return rp_==other.rp_;}
operator !=boost::multi::memory::cuda::managed::ptr103 bool operator!=(ptr const& other) const{return rp_!=other.rp_;}
operator cuda::ptr<void>boost::multi::memory::cuda::managed::ptr104 operator cuda::ptr<void>(){return {rp_};}
105 template<class U> using rebind = ptr<U, typename std::pointer_traits<raw_pointer>::template rebind<U>>;
106
operator boolboost::multi::memory::cuda::managed::ptr107 explicit operator bool() const{return rp_;}
operator raw_pointer&boost::multi::memory::cuda::managed::ptr108 explicit operator raw_pointer&()&{return rp_;}
to_address(ptr const & p)109 friend ptr to_address(ptr const& p){return p;}
110 void operator*() = delete;
raw_pointer_cast(ptr const & self)111 friend raw_pointer raw_pointer_cast(ptr const& self){return self.rp_;}
112 };
113
114 template<class T, class PrefetchDevice = std::integral_constant<int, -99> > class allocator;
115
116 template<typename T, typename RawPtr>
117 struct ptr : cuda::ptr<T, RawPtr>{
118 using raw_pointer = RawPtr;
119 // raw_pointer rp_;
120 protected:
121 friend struct cuda::ptr<T, RawPtr>; // to allow automatic conversions
122 template<class TT, class DP> friend class allocator;
123 template<typename, typename> friend struct ptr;
124 // template<class TT, typename = typename std::enable_if<not std::is_const<TT>{}>::type>
125 // ptr(ptr<TT const> const& p) : rp_{const_cast<T*>(p.impl_)}{}
126 template<class TT> friend ptr<TT> const_pointer_cast(ptr<TT const> const&);
127 public:
128 template<class U> using rebind = ptr<U, typename std::pointer_traits<RawPtr>::template rebind<U>>;
129 // explicit ptr(cuda::ptr<T, RawPtr> const& other) : rp_{other.rp_}{}
130 template<class Other, typename = std::enable_if_t<std::is_convertible<std::decay_t<decltype(std::declval<ptr<Other>>().rp_)>, raw_pointer>{}>>
ptrboost::multi::memory::cuda::managed::ptr131 /*explicit(false)*/ constexpr ptr(ptr<Other> const& o) : cuda::ptr<T, RawPtr>{static_cast<raw_pointer>(o.rp_)}{}
132 template<class Other, typename = std::enable_if_t<not std::is_convertible<std::decay_t<decltype(std::declval<ptr<Other>>().rp_)>, raw_pointer>{}>, typename = decltype(static_cast<raw_pointer>(std::declval<ptr<Other>>().rp_))>
133 explicit/*(true)*/ constexpr ptr(ptr<Other> const& o, void** = 0) : cuda::ptr<T, RawPtr>{static_cast<raw_pointer>(o.rp_)}{}
ptrboost::multi::memory::cuda::managed::ptr134 explicit constexpr ptr(void* vp) : cuda::ptr<T, RawPtr>{static_cast<raw_pointer>(vp)}{}
135 // template<class Other, typename = std::enable_if_t<std::is_convertible<std::decay_t<decltype(std::declval<ptr<Other>>().rp_)>, raw_pointer>{}>>
136 // ptr(ptr<Other> const& o) HD : rp_{static_cast<raw_pointer>(o.rp_)}{}
137 // template<class Other, typename = std::enable_if_t<not std::is_convertible<std::decay_t<decltype(std::declval<ptr<Other>>().rp_)>, raw_pointer>{}>>
138 // explicit ptr(ptr<Other> const& o, void** = 0) HD : rp_{static_cast<raw_pointer>(o.rp_)}{}
ptrboost::multi::memory::cuda::managed::ptr139 explicit ptr(cuda::ptr<T, raw_pointer> const& other) : ptr{other.rp_}{
140 assert(other.rp_!=nullptr or Cuda::pointer::type(other.rp_) == cudaMemoryTypeManaged);
141 }
ptrboost::multi::memory::cuda::managed::ptr142 explicit constexpr ptr(raw_pointer p) : cuda::ptr<T, RawPtr>{p}{}//Cuda::pointer::is_device(p);}
143 ptr() = default;
144 ptr(ptr const&) = default;
ptrboost::multi::memory::cuda::managed::ptr145 constexpr ptr(std::nullptr_t n) : cuda::ptr<T, RawPtr>{n}{}
146 ptr& operator=(ptr const&) = default;
operator ==boost::multi::memory::cuda::managed::ptr147 constexpr bool operator==(ptr const& other) const{return this->rp_==other.rp_;}
operator !=boost::multi::memory::cuda::managed::ptr148 constexpr bool operator!=(ptr const& other) const{return this->rp_!=other.rp_;}
149
150 using element_type = typename std::pointer_traits<raw_pointer>::element_type;
151 using difference_type = typename std::pointer_traits<raw_pointer>::difference_type;
152 using value_type = T;
153 using pointer = ptr<T>;
154 using iterator_category = typename std::iterator_traits<raw_pointer>::iterator_category; // using iterator_concept = typename std::iterator_traits<impl_t>::iterator_concept;
operator boolboost::multi::memory::cuda::managed::ptr155 explicit constexpr operator bool() const{return this->rp_;}
156 // bool operator not() const{return !rp_;}
operator raw_pointerboost::multi::memory::cuda::managed::ptr157 constexpr operator raw_pointer()const&{return this->rp_;} // do not =delete
operator ptr<void>boost::multi::memory::cuda::managed::ptr158 constexpr operator ptr<void>() const{return ptr<void>{this->rp_};}
159 // template<class PM>
160 // decltype(auto) operator->*(PM pm) const{return *ptr<std::decay_t<decltype(rp_->*pm)>, decltype(&(rp_->*pm))>{&(rp_->*pm)};}
161 explicit constexpr operator typename std::pointer_traits<raw_pointer>::template rebind<void>() const{return typename std::pointer_traits<raw_pointer>::template rebind<void>{this->rp_};}
162 explicit operator typename std::pointer_traits<raw_pointer>::template rebind<void const>() const{return typename std::pointer_traits<raw_pointer>::template rebind<void const>{this->rp_};}
163 constexpr ptr& operator++(){++(this->rp_); return *this;} // remove
operator --boost::multi::memory::cuda::managed::ptr164 constexpr ptr& operator--(){--(this->rp_); return *this;} // remove
operator ++boost::multi::memory::cuda::managed::ptr165 ptr operator++(int){auto tmp = *this; ++(*this); return tmp;} // remove
operator --boost::multi::memory::cuda::managed::ptr166 ptr operator--(int){auto tmp = *this; --(*this); return tmp;} // remove
operator +=boost::multi::memory::cuda::managed::ptr167 constexpr ptr& operator+=(typename ptr::difference_type n){(this->rp_)+=n; return *this;} // remove
operator -=boost::multi::memory::cuda::managed::ptr168 constexpr ptr& operator-=(typename ptr::difference_type n) HD{(this->rp_)-=n; return *this;} // remove
operator +boost::multi::memory::cuda::managed::ptr169 constexpr ptr operator+(typename ptr::difference_type n) const{return ptr{(this->rp_) + n};} // remove
operator -boost::multi::memory::cuda::managed::ptr170 constexpr ptr operator-(typename ptr::difference_type n) const{return (*this) + (-n);} // remove
171 using reference = typename std::pointer_traits<raw_pointer>::element_type&;//ref<element_type>;
172 // [[SLOW]]
173 // [[deprecated]]
operator *boost::multi::memory::cuda::managed::ptr174 constexpr reference operator*() const{
175 // cudaDeviceSynchronize();
176 return *(this->rp_);
177 }
operator []boost::multi::memory::cuda::managed::ptr178 constexpr reference operator[](difference_type n){return *((*this)+n);}
to_address(ptr const & p)179 friend inline ptr to_address(ptr const& p){return p;}
operator -boost::multi::memory::cuda::managed::ptr180 constexpr typename ptr::difference_type operator-(ptr const& other) const{return (this->rp_)-other.rp_;}
raw_pointer_castboost::multi::memory::cuda::managed::ptr181 constexpr raw_pointer raw_pointer_cast() const&{return this->rp_;} // remove
raw_pointer_cast(ptr const & self)182 friend raw_pointer raw_pointer_cast(ptr const& self){return self.rp_;}
cuda_pointer_cast(ptr const & self)183 friend cuda::ptr<T, RawPtr> cuda_pointer_cast(ptr const& self){return cuda::ptr<T, RawPtr>{self.rp_};}
184 // constexpr operator cuda::ptr<T, RawPtr>() const{return cuda::ptr<T, RawPtr>{this->rp_};}
get_allocator(ptr const &)185 friend constexpr allocator<std::decay_t<T>> get_allocator(ptr const&){return {};} // do not =delete
186 using default_allocator_type = allocator<std::decay_t<T>>;
default_allocatorboost::multi::memory::cuda::managed::ptr187 default_allocator_type default_allocator() const{return {};}
188
189 template<class T1, class... A1, class Size, class T2, class... A2>//, std::enable_if_t<std::is_trivially_assignable<T2&, T1>{}, int> =0>
copy_nboost::multi::memory::cuda::managed::ptr190 static auto copy_n(
191 managed::ptr<T1, A1...> first, Size count,
192 managed::ptr<T2, A2...> result
193 ){
194 return adl_copy_n(cuda::ptr<T1>(first), count, cuda::ptr<T2>(result)), result + count;
195 }
196 public:
default_allocator_of(ptr const &)197 friend allocator<std::decay_t<T>> default_allocator_of(ptr const&){return {};}
198 };
199
make_array(ptr<T> t,S s)200 template<class T, class S> const boost::serialization::array_wrapper<T> make_array(ptr<T> t, S s){
201 using boost::serialization::make_array;
202 return make_array(raw_pointer_cast(t), s);
203 }
204
205
206 }
207
208 }}
209 }}
210
211 #undef SLOW
212
213 #if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__
214
215 #include "../../cuda/managed/clib.hpp" // cuda::malloc
216 #include "../../cuda/managed/malloc.hpp"
217
218 #include<memory>
219 #include<cstring>
220 #include<iostream>
221
222 namespace multi = boost::multi;
223 namespace cuda = multi::memory::cuda;
224
add_one(double & d)225 void add_one(double& d){d += 1.;}
226 template<class T>
add_one(T && t)227 void add_one(T&& t){std::forward<T>(t) += 1.;}
228
229 // * Functions with a __global__ qualifier, which run on the device but are called by the host, cannot use pass by reference.
230 //__global__ void set_5(cuda::ptr<double> const& p){
231 //__global__ void set_5(cuda::ptr<double> p){*p = 5.;}
232 //__global__ void check_5(cuda::ptr<double> p){assert(*p == 5.);}
233
g()234 double const* g(){double* p{nullptr}; return p;}
235
f()236 cuda::managed::ptr<double const> f(){
237 return cuda::managed::ptr<double>{nullptr};
238 }
239
ff()240 cuda::managed::ptr<double> ff(){
241 return cuda::managed::ptr<double>{cuda::ptr<double>{nullptr}};
242 }
243
full_overload(double *)244 std::string full_overload(double*){return "cpu";}
full_overload(cuda::ptr<double>)245 std::string full_overload(cuda::ptr<double>){return "gpu";}
full_overload(cuda::managed::ptr<double>)246 std::string full_overload(cuda::managed::ptr<double>){return "mng";}
247
cpugpu_overload(double *)248 std::string cpugpu_overload(double*){return "cpu";}
cpugpu_overload(cuda::ptr<double>)249 std::string cpugpu_overload(cuda::ptr<double>){return "gpu";}
250
cpuonly_overload(double *)251 std::string cpuonly_overload(double*){return "cpu";}
252
gpuonly_overload(cuda::ptr<double>)253 std::string gpuonly_overload(cuda::ptr<double>){return "gpu";}
254
255 template<class T> void what(T&&) = delete;
256
main()257 int main(){
258
259
260 f();
261 using T = double; static_assert( sizeof(cuda::managed::ptr<T>) == sizeof(T*) , "!");
262 std::size_t const n = 100;
263 {
264 auto p = static_cast<cuda::managed::ptr<T>>(cuda::managed::malloc(n*sizeof(T)));
265 // cuda::managed::ptr<void> vp = p;
266 // T* rp = p;
267 // void* vrp = p;
268 #pragma GCC diagnostic push
269 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
270 *p = 99.;
271 if(*p != 99.) assert(0);
272 if(*p == 11.) assert(0);
273 #pragma GCC diagnostic pop
274 cuda::managed::free(p);
275 }
276 {
277 double d = 1.;
278 assert( full_overload(&d) == "cpu" );
279 assert( cpugpu_overload(&d) == "cpu" );
280 assert( cpugpu_overload(&d) == "cpu" );
281
282 cuda::ptr<double> p = nullptr;
283 assert( full_overload(p) == "gpu" );
284 assert( cpugpu_overload(p) == "gpu" );
285 assert( gpuonly_overload(p) == "gpu" );
286
287 cuda::managed::ptr<double> pm = nullptr;
288 assert( full_overload(pm) == "mng" );
289 assert( cpugpu_overload(pm) == "gpu" );
290 assert( cpuonly_overload(pm) == "cpu" );
291 assert( gpuonly_overload(pm) == "gpu" );
292 }
293 {
294 auto p = static_cast<cuda::managed::ptr<T>>(cuda::managed::malloc(n*sizeof(T)));
295 #pragma GCC diagnostic push
296 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
297 double* ppp = p; *ppp = 3.14;
298 assert( *p == 3.14 );
299 #pragma GCC diagnostic pop
300 // cuda::managed::ptr<T> P = nullptr;
301 }
302 {
303 cuda::managed::ptr<double> p = nullptr;
304 cuda::managed::ptr<double const> pc = nullptr;
305 assert( p == pc );
306 pc = static_cast<cuda::managed::ptr<double const>>(p);
307 // double* dp = cuda::managed::ptr<double>{nullptr};
308 auto f = [](double const*){};
309 f(p);
310 // cuda::ptr<double> pp = p;
311 // std::reinterpret_pointer_cast<double*>(pp);
312 // cuda::managed::ptr<double> ppp{pp};
313 }
314 {
315 static_assert(std::is_convertible<cuda::managed::ptr<double>, double*>{});
316 }
317 {
318 auto p = static_cast<cuda::managed::ptr<T>>(cuda::managed::malloc(n*sizeof(T)));
319 cuda::ptr<T> cp = p;
320 cuda::managed::ptr<T> mcp{cp};
321 }
322 {
323 static_assert(std::is_same<std::pointer_traits<cuda::managed::ptr<double>>::rebind<double const>, cuda::managed::ptr<double const>>{}, "!");
324 }
325 std::cout << "Finish" << std::endl;
326 }
327 #endif
328 #endif
329
330
331