1 #ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*-
2 $CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --cflags --libs cudart-11.0`&&$0.$X&&rm $0.$X;exit
3 #endif
4 // © Alfredo A. Correa 2019-2020
5 
6 #ifndef BOOST_MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_PTR_HPP
7 #define BOOST_MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_PTR_HPP
8 
9 #include<cstddef> // nullptr_t
10 #include<iterator> // random_access_iterator_tag
11 
12 #include<type_traits> // is_const
13 
14 #include "../../cuda/ptr.hpp"
15 
16 #include "../../../../detail/memory.hpp"
17 
18 #include<cuda_runtime.h> // cudaDeviceSynchronize
19 
20 #ifndef _DISABLE_CUDA_SLOW
21 #ifdef NDEBUG
22 #define SLOW deprecated("because it implies a slow element access to GPU memory")
23 #else
24 #define SLOW
25 #endif
26 #else
27 #define SLOW
28 #endif
29 
30 #ifndef HD
31 #ifdef __CUDA_ARCH__
32 #define HD __host__ __device__
33 #else
34 #define HD
35 #endif
36 #endif
37 
38 namespace boost{
39 namespace serialization{
40 	template<class T> class array_wrapper;
41 	template<class T, class S> const array_wrapper<T> make_array(T* t, S s);
42 }}
43 
44 namespace boost{namespace multi{
45 namespace memory{namespace cuda{
46 
47 namespace managed{
48 
49 template<typename T, typename Ptr = T*> struct ptr;
50 
51 template<typename RawPtr>
52 struct ptr<void const, RawPtr>{
53 	using T = void const;
54 	using raw_pointer = RawPtr;
55 	raw_pointer rp_;
56 	template<typename, typename> friend struct ptr;
57 	template<class TT> friend ptr<TT> const_pointer_cast(ptr<TT const> const&);
ptrboost::multi::memory::cuda::managed::ptr58 	ptr(raw_pointer rp) : rp_{rp}{}
59 public:
60 	ptr() = default;
61 	ptr(ptr const&) = default;
ptrboost::multi::memory::cuda::managed::ptr62 	ptr(std::nullptr_t n) : rp_{n}{}
63 	template<class Other, typename = decltype(raw_pointer{std::declval<Other const&>().rp_})>
ptrboost::multi::memory::cuda::managed::ptr64 	ptr(Other const& o) : rp_{o.rp_}{}
65 	ptr& operator=(ptr const&) = default;
66 
67 	using pointer = ptr<T>;
68 	using element_type = typename std::pointer_traits<raw_pointer>::element_type;
69 	using difference_type = void;//typename std::pointer_traits<impl_t>::difference_type;
operator boolboost::multi::memory::cuda::managed::ptr70 	explicit operator bool() const{return rp_;}
71 //	explicit operator raw_pointer&()&{return rp_;}
operator ==boost::multi::memory::cuda::managed::ptr72 	bool operator==(ptr const& other) const{return rp_==other.rp_;}
operator !=boost::multi::memory::cuda::managed::ptr73 	bool operator!=(ptr const& other) const{return rp_!=other.rp_;}
to_address(ptr const & p)74 	friend ptr to_address(ptr const& p){return p;}
75 	void operator*() const = delete;
76 	template<class U> using rebind = ptr<U, typename std::pointer_traits<raw_pointer>::template rebind<U>>;
raw_pointer_cast(ptr const & self)77 	friend raw_pointer raw_pointer_cast(ptr const& self){return self.rp_;}
78 };
79 
80 template<typename RawPtr>
81 struct ptr<void, RawPtr>{
82 	using pointer = ptr;
83 	using element_type    = void;
84 	using difference_type = typename std::pointer_traits<RawPtr>::difference_type;
85 protected:
86 	using raw_pointer = RawPtr;
87 	raw_pointer rp_;
88 private:
ptrboost::multi::memory::cuda::managed::ptr89 	ptr(ptr<void const> const& p) : rp_{const_cast<void*>(p.rp_)}{}
90 	template<class TT> friend ptr<TT> const_pointer_cast(ptr<TT const> const&);
91 	template<class, class> friend struct ptr;
92 	template<class TT, class DP> friend class allocator;
93 public:
ptrboost::multi::memory::cuda::managed::ptr94 	template<class Other> ptr(ptr<Other> const& p) : rp_{p.rp_}{}
ptrboost::multi::memory::cuda::managed::ptr95 	explicit ptr(raw_pointer rp) : rp_{rp}{}
96 	ptr() = default;
97 	ptr(ptr const& p) = default;
ptrboost::multi::memory::cuda::managed::ptr98 	ptr(std::nullptr_t n) : rp_{n}{}
99 	template<class Other, typename = decltype(raw_pointer{std::declval<Other const&>().impl_})>
ptrboost::multi::memory::cuda::managed::ptr100 	ptr(Other const& o) : rp_{o.rp_}{}
101 	ptr& operator=(ptr const&) = default;
operator ==boost::multi::memory::cuda::managed::ptr102 	bool operator==(ptr const& other) const{return rp_==other.rp_;}
operator !=boost::multi::memory::cuda::managed::ptr103 	bool operator!=(ptr const& other) const{return rp_!=other.rp_;}
operator cuda::ptr<void>boost::multi::memory::cuda::managed::ptr104 	operator cuda::ptr<void>(){return {rp_};}
105 	template<class U> using rebind = ptr<U, typename std::pointer_traits<raw_pointer>::template rebind<U>>;
106 
operator boolboost::multi::memory::cuda::managed::ptr107 	explicit operator bool() const{return rp_;}
operator raw_pointer&boost::multi::memory::cuda::managed::ptr108 	explicit operator raw_pointer&()&{return rp_;}
to_address(ptr const & p)109 	friend ptr to_address(ptr const& p){return p;}
110 	void operator*() = delete;
raw_pointer_cast(ptr const & self)111 	friend raw_pointer raw_pointer_cast(ptr const& self){return self.rp_;}
112 };
113 
114 template<class T, class PrefetchDevice = std::integral_constant<int, -99> > class allocator;
115 
116 template<typename T, typename RawPtr>
117 struct ptr : cuda::ptr<T, RawPtr>{
118 	using raw_pointer = RawPtr;
119 //	raw_pointer rp_;
120 protected:
121 	friend struct cuda::ptr<T, RawPtr>; // to allow automatic conversions
122 	template<class TT, class DP> friend class allocator;
123 	template<typename, typename> friend struct ptr;
124 //	template<class TT, typename = typename std::enable_if<not std::is_const<TT>{}>::type>
125 //	ptr(ptr<TT const> const& p) : rp_{const_cast<T*>(p.impl_)}{}
126 	template<class TT> friend ptr<TT> const_pointer_cast(ptr<TT const> const&);
127 public:
128 	template<class U> using rebind = ptr<U, typename std::pointer_traits<RawPtr>::template rebind<U>>;
129 //	explicit ptr(cuda::ptr<T, RawPtr> const& other) : rp_{other.rp_}{}
130 	template<class Other, typename = std::enable_if_t<std::is_convertible<std::decay_t<decltype(std::declval<ptr<Other>>().rp_)>, raw_pointer>{}>>
ptrboost::multi::memory::cuda::managed::ptr131 	/*explicit(false)*/ constexpr ptr(ptr<Other> const& o) : cuda::ptr<T, RawPtr>{static_cast<raw_pointer>(o.rp_)}{}
132 	template<class Other, typename = std::enable_if_t<not std::is_convertible<std::decay_t<decltype(std::declval<ptr<Other>>().rp_)>, raw_pointer>{}>, typename = decltype(static_cast<raw_pointer>(std::declval<ptr<Other>>().rp_))>
133 	explicit/*(true)*/ constexpr ptr(ptr<Other> const& o, void** = 0) : cuda::ptr<T, RawPtr>{static_cast<raw_pointer>(o.rp_)}{}
ptrboost::multi::memory::cuda::managed::ptr134 	explicit constexpr ptr(void* vp) : cuda::ptr<T, RawPtr>{static_cast<raw_pointer>(vp)}{}
135 //	template<class Other, typename = std::enable_if_t<std::is_convertible<std::decay_t<decltype(std::declval<ptr<Other>>().rp_)>, raw_pointer>{}>>
136 //	ptr(ptr<Other> const& o) HD : rp_{static_cast<raw_pointer>(o.rp_)}{}
137 //	template<class Other, typename = std::enable_if_t<not std::is_convertible<std::decay_t<decltype(std::declval<ptr<Other>>().rp_)>, raw_pointer>{}>>
138 //	explicit ptr(ptr<Other> const& o, void** = 0) HD : rp_{static_cast<raw_pointer>(o.rp_)}{}
ptrboost::multi::memory::cuda::managed::ptr139 	explicit ptr(cuda::ptr<T, raw_pointer> const& other) : ptr{other.rp_}{
140 		assert(other.rp_!=nullptr or Cuda::pointer::type(other.rp_) == cudaMemoryTypeManaged);
141 	}
ptrboost::multi::memory::cuda::managed::ptr142 	explicit constexpr ptr(raw_pointer p) : cuda::ptr<T, RawPtr>{p}{}//Cuda::pointer::is_device(p);}
143 	ptr() = default;
144 	ptr(ptr const&) = default;
ptrboost::multi::memory::cuda::managed::ptr145 	constexpr ptr(std::nullptr_t n) : cuda::ptr<T, RawPtr>{n}{}
146 	ptr& operator=(ptr const&) = default;
operator ==boost::multi::memory::cuda::managed::ptr147 	constexpr bool operator==(ptr const& other) const{return this->rp_==other.rp_;}
operator !=boost::multi::memory::cuda::managed::ptr148 	constexpr bool operator!=(ptr const& other) const{return this->rp_!=other.rp_;}
149 
150 	using element_type = typename std::pointer_traits<raw_pointer>::element_type;
151 	using difference_type = typename std::pointer_traits<raw_pointer>::difference_type;
152 	using value_type = T;
153 	using pointer = ptr<T>;
154 	using iterator_category = typename std::iterator_traits<raw_pointer>::iterator_category; //	using iterator_concept  = typename std::iterator_traits<impl_t>::iterator_concept;
operator boolboost::multi::memory::cuda::managed::ptr155 	explicit constexpr operator bool() const{return this->rp_;}
156 //	bool operator not() const{return !rp_;}
operator raw_pointerboost::multi::memory::cuda::managed::ptr157 	constexpr operator raw_pointer()const&{return this->rp_;} // do not =delete
operator ptr<void>boost::multi::memory::cuda::managed::ptr158 	constexpr operator ptr<void>() const{return ptr<void>{this->rp_};}
159 //	template<class PM>
160 //	decltype(auto) operator->*(PM pm) const{return *ptr<std::decay_t<decltype(rp_->*pm)>, decltype(&(rp_->*pm))>{&(rp_->*pm)};}
161 	explicit constexpr operator typename std::pointer_traits<raw_pointer>::template rebind<void>() const{return typename std::pointer_traits<raw_pointer>::template rebind<void>{this->rp_};}
162 	explicit operator typename std::pointer_traits<raw_pointer>::template rebind<void const>() const{return typename std::pointer_traits<raw_pointer>::template rebind<void const>{this->rp_};}
163 	constexpr ptr& operator++(){++(this->rp_); return *this;} // remove
operator --boost::multi::memory::cuda::managed::ptr164 	constexpr ptr& operator--(){--(this->rp_); return *this;} // remove
operator ++boost::multi::memory::cuda::managed::ptr165 	ptr  operator++(int){auto tmp = *this; ++(*this); return tmp;} // remove
operator --boost::multi::memory::cuda::managed::ptr166 	ptr  operator--(int){auto tmp = *this; --(*this); return tmp;} // remove
operator +=boost::multi::memory::cuda::managed::ptr167 	constexpr ptr& operator+=(typename ptr::difference_type n){(this->rp_)+=n; return *this;} // remove
operator -=boost::multi::memory::cuda::managed::ptr168 	constexpr ptr& operator-=(typename ptr::difference_type n) HD{(this->rp_)-=n; return *this;} // remove
operator +boost::multi::memory::cuda::managed::ptr169 	constexpr ptr operator+(typename ptr::difference_type n) const{return ptr{(this->rp_) + n};} // remove
operator -boost::multi::memory::cuda::managed::ptr170 	constexpr ptr operator-(typename ptr::difference_type n) const{return (*this) + (-n);} // remove
171 	using reference = typename std::pointer_traits<raw_pointer>::element_type&;//ref<element_type>;
172 //	[[SLOW]]
173 //	[[deprecated]]
operator *boost::multi::memory::cuda::managed::ptr174 	constexpr reference operator*() const{
175 //		cudaDeviceSynchronize();
176 		return *(this->rp_);
177 	}
operator []boost::multi::memory::cuda::managed::ptr178 	constexpr reference operator[](difference_type n){return *((*this)+n);}
to_address(ptr const & p)179 	friend inline ptr to_address(ptr const& p){return p;}
operator -boost::multi::memory::cuda::managed::ptr180 	constexpr typename ptr::difference_type operator-(ptr const& other) const{return (this->rp_)-other.rp_;}
raw_pointer_castboost::multi::memory::cuda::managed::ptr181 	constexpr raw_pointer raw_pointer_cast() const&{return this->rp_;} // remove
raw_pointer_cast(ptr const & self)182 	friend raw_pointer raw_pointer_cast(ptr const& self){return self.rp_;}
cuda_pointer_cast(ptr const & self)183 	friend cuda::ptr<T, RawPtr> cuda_pointer_cast(ptr const& self){return cuda::ptr<T, RawPtr>{self.rp_};}
184 //	constexpr operator cuda::ptr<T, RawPtr>() const{return cuda::ptr<T, RawPtr>{this->rp_};}
get_allocator(ptr const &)185 	friend constexpr allocator<std::decay_t<T>> get_allocator(ptr const&){return {};} // do not =delete
186 	using default_allocator_type = allocator<std::decay_t<T>>;
default_allocatorboost::multi::memory::cuda::managed::ptr187 	default_allocator_type default_allocator() const{return {};}
188 
189 	template<class T1, class... A1, class Size, class T2, class... A2>//, std::enable_if_t<std::is_trivially_assignable<T2&, T1>{}, int> =0>
copy_nboost::multi::memory::cuda::managed::ptr190 	static auto copy_n(
191 		managed::ptr<T1, A1...> first, Size count,
192 		managed::ptr<T2, A2...> result
193 	){
194 		return adl_copy_n(cuda::ptr<T1>(first), count, cuda::ptr<T2>(result)), result + count;
195 	}
196 public:
default_allocator_of(ptr const &)197 	friend allocator<std::decay_t<T>> default_allocator_of(ptr const&){return {};}
198 };
199 
make_array(ptr<T> t,S s)200 template<class T, class S> const boost::serialization::array_wrapper<T> make_array(ptr<T> t, S s){
201 	using boost::serialization::make_array;
202 	return make_array(raw_pointer_cast(t), s);
203 }
204 
205 
206 }
207 
208 }}
209 }}
210 
211 #undef SLOW
212 
213 #if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__
214 
215 #include "../../cuda/managed/clib.hpp" // cuda::malloc
216 #include "../../cuda/managed/malloc.hpp"
217 
218 #include<memory>
219 #include<cstring>
220 #include<iostream>
221 
222 namespace multi = boost::multi;
223 namespace cuda = multi::memory::cuda;
224 
add_one(double & d)225 void add_one(double& d){d += 1.;}
226 template<class T>
add_one(T && t)227 void add_one(T&& t){std::forward<T>(t) += 1.;}
228 
229 // * Functions with a __global__ qualifier, which run on the device but are called by the host, cannot use pass by reference.
230 //__global__ void set_5(cuda::ptr<double> const& p){
231 //__global__ void set_5(cuda::ptr<double> p){*p = 5.;}
232 //__global__ void check_5(cuda::ptr<double> p){assert(*p == 5.);}
233 
g()234 double const* g(){double* p{nullptr}; return p;}
235 
f()236 cuda::managed::ptr<double const> f(){
237 	return cuda::managed::ptr<double>{nullptr};
238 }
239 
ff()240 cuda::managed::ptr<double> ff(){
241 	return cuda::managed::ptr<double>{cuda::ptr<double>{nullptr}};
242 }
243 
full_overload(double *)244 std::string full_overload(double*){return "cpu";}
full_overload(cuda::ptr<double>)245 std::string full_overload(cuda::ptr<double>){return "gpu";}
full_overload(cuda::managed::ptr<double>)246 std::string full_overload(cuda::managed::ptr<double>){return "mng";}
247 
cpugpu_overload(double *)248 std::string cpugpu_overload(double*){return "cpu";}
cpugpu_overload(cuda::ptr<double>)249 std::string cpugpu_overload(cuda::ptr<double>){return "gpu";}
250 
cpuonly_overload(double *)251 std::string cpuonly_overload(double*){return "cpu";}
252 
gpuonly_overload(cuda::ptr<double>)253 std::string gpuonly_overload(cuda::ptr<double>){return "gpu";}
254 
255 template<class T> void what(T&&) = delete;
256 
main()257 int main(){
258 
259 
260 	f();
261 	using T = double; static_assert( sizeof(cuda::managed::ptr<T>) == sizeof(T*) , "!");
262 	std::size_t const n = 100;
263 	{
264 		auto p = static_cast<cuda::managed::ptr<T>>(cuda::managed::malloc(n*sizeof(T)));
265 	//	cuda::managed::ptr<void> vp = p;
266 	//	T* rp = p;
267 	//	void* vrp = p;
268 #pragma GCC diagnostic push
269 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
270 		*p = 99.;
271 		if(*p != 99.) assert(0);
272 		if(*p == 11.) assert(0);
273 #pragma GCC diagnostic pop
274 		cuda::managed::free(p);
275 	}
276 	{
277 		double d = 1.;
278 		assert( full_overload(&d) == "cpu" );
279 		assert( cpugpu_overload(&d) == "cpu" );
280 		assert( cpugpu_overload(&d) == "cpu" );
281 
282 		cuda::ptr<double> p = nullptr;
283 		assert( full_overload(p) == "gpu" );
284 		assert( cpugpu_overload(p) == "gpu" );
285 		assert( gpuonly_overload(p) == "gpu" );
286 
287 		cuda::managed::ptr<double> pm = nullptr;
288 		assert( full_overload(pm) == "mng" );
289 		assert( cpugpu_overload(pm) == "gpu" );
290 		assert( cpuonly_overload(pm) == "cpu" );
291 		assert( gpuonly_overload(pm) == "gpu" );
292 	}
293 	{
294 		auto p = static_cast<cuda::managed::ptr<T>>(cuda::managed::malloc(n*sizeof(T)));
295 #pragma GCC diagnostic push
296 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
297 		double* ppp = p; *ppp = 3.14;
298 		assert( *p == 3.14 );
299 #pragma GCC diagnostic pop
300 	//	cuda::managed::ptr<T> P = nullptr;
301 	}
302 	{
303 		cuda::managed::ptr<double> p = nullptr;
304 		cuda::managed::ptr<double const> pc = nullptr;
305 		assert( p == pc );
306 		pc = static_cast<cuda::managed::ptr<double const>>(p);
307 	//	double* dp = cuda::managed::ptr<double>{nullptr};
308 		auto f = [](double const*){};
309 		f(p);
310 	//	cuda::ptr<double> pp = p;
311 //		std::reinterpret_pointer_cast<double*>(pp);
312 	//	cuda::managed::ptr<double> ppp{pp};
313 	}
314 	{
315 		static_assert(std::is_convertible<cuda::managed::ptr<double>, double*>{});
316 	}
317 	{
318 		auto p = static_cast<cuda::managed::ptr<T>>(cuda::managed::malloc(n*sizeof(T)));
319 		cuda::ptr<T> cp = p;
320 		cuda::managed::ptr<T> mcp{cp};
321 	}
322 	{
323 		static_assert(std::is_same<std::pointer_traits<cuda::managed::ptr<double>>::rebind<double const>, cuda::managed::ptr<double const>>{}, "!");
324 	}
325 	std::cout << "Finish" << std::endl;
326 }
327 #endif
328 #endif
329 
330 
331