1 /*
2  *Copyright (c) 2018 Intel Corporation.
3  *
4  *Permission is hereby granted, free of charge, to any person obtaining a copy
5  *of this software and associated documentation files (the "Software"), to deal
6  *in the Software without restriction, including without limitation the rights
7  *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  *copies of the Software, and to permit persons to whom the Software is
9  *furnished to do so, subject to the following conditions:
10  *
11  *The above copyright notice and this permission notice shall be included in
12  *all copies or substantial portions of the Software.
13  *
14  *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20  *THE SOFTWARE.
21  *
22  */
23 
24 
25 #ifndef IDEEP_ALLOCATOR_HPP
26 #define IDEEP_ALLOCATOR_HPP
27 
28 #include <mutex>
29 #include <list>
30 #include <sstream>
31 
32 namespace ideep {
33 
34 #ifdef _TENSOR_MEM_ALIGNMENT_
35 #define SYS_MEMORY_ALIGNMENT _TENSOR_MEM_ALIGNMENT_
36 #else
37 #define SYS_MEMORY_ALIGNMENT 4096
38 #endif
39 
40 namespace utils {
41 
42 class allocator {
43 public:
44   constexpr static size_t tensor_memalignment = SYS_MEMORY_ALIGNMENT;
45   allocator() = default;
46 
47   template<class computation_t = void>
malloc(size_t size)48   static char *malloc(size_t size) {
49     void *ptr;
50 #ifdef _WIN32
51     ptr = _aligned_malloc(size, tensor_memalignment);
52     int rc = ((ptr)? 0 : errno);
53 #else
54     int rc = ::posix_memalign(&ptr, tensor_memalignment, size);
55 #endif /* _WIN32 */
56     return (rc == 0) ? (char*)ptr : nullptr;
57   }
58 
59   template<class computation_t = void>
free(void * p)60   static void free(void *p) {
61 #ifdef _WIN32
62     _aligned_free((void*)p);
63 #else
64     ::free((void*)p);
65 #endif /* _WIN32 */
66   }
67 
68   template<class computation_t = void>
69   struct byte {
70   public:
operator newideep::utils::allocator::byte71     static void *operator new(size_t sz) {
72       return (void *)malloc<computation_t>(sz);
73     }
74 
operator new[]ideep::utils::allocator::byte75     static void *operator new[](size_t sz) {
76       return (void *)malloc<computation_t>(sz);
77     }
78 
operator deleteideep::utils::allocator::byte79     static void operator delete(void *p) { free<computation_t>(p); }
operator delete[]ideep::utils::allocator::byte80     static void operator delete[](void *p) { free<computation_t>(p); }
81 
82   private:
83     char q;
84   };
85 };
86 
87 // Default SA implementation (by computation)
88 class scratch_allocator {
89 public:
90   #define GET_PTR(t, p, offset) \
91       (reinterpret_cast<t*>(reinterpret_cast<size_t>(p) + \
92       static_cast<size_t>(offset)))
93 
is_enabled()94   static bool is_enabled() {
95     static bool enabled = true;
96     static bool checked = false;
97 
98     // Set by first run. Could not be adjusted dynamically.
99     if (!checked) {
100       char *env = getenv("DISABLE_MEM_CACHE_OPT");
101       if (env && *env != '0')
102         enabled = false;
103       checked = true;
104     }
105     return enabled;
106   }
107 
108   class mpool {
109   public:
mpool()110     mpool() : alloc_size_(0), free_size_(0),
111         alignment_(SYS_MEMORY_ALIGNMENT), seq_(0) {}
112 
~mpool()113     ~mpool() {
114       std::lock_guard<std::mutex> lock(mutex_);
115       for (int i = 0; i < MAX_ENTRY; ++i) {
116         std::list<header_t *>& l = free_hashline_[i];
117         for (auto& h: l) {
118           ::free(h);
119         }
120       }
121     }
122 
malloc(size_t size)123     void *malloc(size_t size) {
124       std::lock_guard<std::mutex> lock(mutex_);
125       void *ptr;
126       int idx = to_index(size);
127 
128       if (!free_hashline_[idx].empty()) {
129         header_t *head = nullptr;
130         std::list<header_t *> &list = free_hashline_[idx];
131         typename std::list<header_t *>::iterator it;
132         for(it = list.begin(); it != list.end(); ++it) {
133           if((*it)->size_ == size) {
134             head = *it;
135             break;
136           }
137         }
138         if (head) {
139           list.erase(it);
140           void *ptr = static_cast<void *>(head);
141           free_size_ -= size;
142           return GET_PTR(void, ptr, alignment_);
143         }
144       }
145 
146       // No cached memory
147       size_t len = size + alignment_;
148 #if defined(WIN32)
149       ptr = _aligned_malloc(size, alignment_);
150 #else
151       int rc = ::posix_memalign(&ptr, alignment_, len);
152       if (rc != 0)
153         throw std::invalid_argument("Out of memory");
154 #endif
155       header_t *head = static_cast<header_t *>(ptr);
156       head->size_ = size;
157       head->seq_ = seq_++;
158       alloc_size_ += size;
159       return GET_PTR(void, ptr, alignment_);
160     }
161 
free(void * ptr)162     void free(void *ptr) {
163       std::lock_guard<std::mutex> lock(mutex_);
164       header_t *head = GET_PTR(header_t, ptr, -alignment_);
165       int idx = to_index(head->size_);
166       free_hashline_[idx].push_back(head);
167       free_size_ += head->size_;
168     }
169 
170   private:
to_index(size_t size)171     inline int to_index(size_t size) {
172       std::ostringstream os;
173       os << std::hex << "L" << size << "_";
174       size_t hash = std::hash<std::string>{}(os.str());
175       return hash % MAX_ENTRY;
176     }
177 
178     typedef struct {
179       size_t size_;
180       int seq_;
181     } header_t;
182 
183     static constexpr int MAX_ENTRY = 512;
184 
185     size_t alloc_size_;
186     size_t free_size_;
187     const size_t alignment_;
188     std::list<header_t *> free_hashline_[MAX_ENTRY];
189     std::mutex mutex_;
190     int seq_;
191   };
192 
193   scratch_allocator() = default;
194 
195   template<class computation_t = void>
get_mpool(void)196   static inline mpool *get_mpool(void) {
197     static std::shared_ptr<mpool> mpool_(new mpool());
198     return mpool_.get();
199   }
200 
201   template<class computation_t = void>
malloc(size_t size)202   static char *malloc(size_t size) {
203     if (!is_enabled())
204       return static_cast<char *>(allocator::malloc(size));
205     else
206       return static_cast<char *>(get_mpool<computation_t>()->malloc(size));
207   }
208 
209   template<class computation_t = void>
free(void * p)210   static void free(void *p) {
211     if (!is_enabled())
212       allocator::free(p);
213     else
214       get_mpool<computation_t>()->free(p);
215   }
216 
217   template<class computation_t = void>
218   struct byte {
219   public:
operator newideep::utils::scratch_allocator::byte220     static void *operator new(size_t sz) {
221       return (void *)malloc<computation_t>(sz);
222     }
223 
operator new[]ideep::utils::scratch_allocator::byte224     static void *operator new[](size_t sz) {
225       return (void *)malloc<computation_t>(sz);
226     }
227 
operator deleteideep::utils::scratch_allocator::byte228     static void operator delete(void *p) { free<computation_t>(p); }
operator delete[]ideep::utils::scratch_allocator::byte229     static void operator delete[](void *p) {
230       free<computation_t>(p);
231     }
232 
233   private:
234     char q;
235   };
236 };
237 }
238 }
239 
240 #define SCRATCH_ALLOCATOR(computation_t) \
241     ideep::utils::scratch_allocator, ideep::computation_t
242 
243 #endif
244