1 /* 2 *Copyright (c) 2018 Intel Corporation. 3 * 4 *Permission is hereby granted, free of charge, to any person obtaining a copy 5 *of this software and associated documentation files (the "Software"), to deal 6 *in the Software without restriction, including without limitation the rights 7 *to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 *copies of the Software, and to permit persons to whom the Software is 9 *furnished to do so, subject to the following conditions: 10 * 11 *The above copyright notice and this permission notice shall be included in 12 *all copies or substantial portions of the Software. 13 * 14 *THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 *IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 *FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 *AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 *LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 *OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 *THE SOFTWARE. 21 * 22 */ 23 24 25 #ifndef IDEEP_ALLOCATOR_HPP 26 #define IDEEP_ALLOCATOR_HPP 27 28 #include <mutex> 29 #include <list> 30 #include <sstream> 31 32 namespace ideep { 33 34 #ifdef _TENSOR_MEM_ALIGNMENT_ 35 #define SYS_MEMORY_ALIGNMENT _TENSOR_MEM_ALIGNMENT_ 36 #else 37 #define SYS_MEMORY_ALIGNMENT 4096 38 #endif 39 40 namespace utils { 41 42 class allocator { 43 public: 44 constexpr static size_t tensor_memalignment = SYS_MEMORY_ALIGNMENT; 45 allocator() = default; 46 47 template<class computation_t = void> malloc(size_t size)48 static char *malloc(size_t size) { 49 void *ptr; 50 #ifdef _WIN32 51 ptr = _aligned_malloc(size, tensor_memalignment); 52 int rc = ((ptr)? 0 : errno); 53 #else 54 int rc = ::posix_memalign(&ptr, tensor_memalignment, size); 55 #endif /* _WIN32 */ 56 return (rc == 0) ? (char*)ptr : nullptr; 57 } 58 59 template<class computation_t = void> free(void * p)60 static void free(void *p) { 61 #ifdef _WIN32 62 _aligned_free((void*)p); 63 #else 64 ::free((void*)p); 65 #endif /* _WIN32 */ 66 } 67 68 template<class computation_t = void> 69 struct byte { 70 public: operator newideep::utils::allocator::byte71 static void *operator new(size_t sz) { 72 return (void *)malloc<computation_t>(sz); 73 } 74 operator new[]ideep::utils::allocator::byte75 static void *operator new[](size_t sz) { 76 return (void *)malloc<computation_t>(sz); 77 } 78 operator deleteideep::utils::allocator::byte79 static void operator delete(void *p) { free<computation_t>(p); } operator delete[]ideep::utils::allocator::byte80 static void operator delete[](void *p) { free<computation_t>(p); } 81 82 private: 83 char q; 84 }; 85 }; 86 87 // Default SA implementation (by computation) 88 class scratch_allocator { 89 public: 90 #define GET_PTR(t, p, offset) \ 91 (reinterpret_cast<t*>(reinterpret_cast<size_t>(p) + \ 92 static_cast<size_t>(offset))) 93 is_enabled()94 static bool is_enabled() { 95 static bool enabled = true; 96 static bool checked = false; 97 98 // Set by first run. Could not be adjusted dynamically. 99 if (!checked) { 100 char *env = getenv("DISABLE_MEM_CACHE_OPT"); 101 if (env && *env != '0') 102 enabled = false; 103 checked = true; 104 } 105 return enabled; 106 } 107 108 class mpool { 109 public: mpool()110 mpool() : alloc_size_(0), free_size_(0), 111 alignment_(SYS_MEMORY_ALIGNMENT), seq_(0) {} 112 ~mpool()113 ~mpool() { 114 std::lock_guard<std::mutex> lock(mutex_); 115 for (int i = 0; i < MAX_ENTRY; ++i) { 116 std::list<header_t *>& l = free_hashline_[i]; 117 for (auto& h: l) { 118 ::free(h); 119 } 120 } 121 } 122 malloc(size_t size)123 void *malloc(size_t size) { 124 std::lock_guard<std::mutex> lock(mutex_); 125 void *ptr; 126 int idx = to_index(size); 127 128 if (!free_hashline_[idx].empty()) { 129 header_t *head = nullptr; 130 std::list<header_t *> &list = free_hashline_[idx]; 131 typename std::list<header_t *>::iterator it; 132 for(it = list.begin(); it != list.end(); ++it) { 133 if((*it)->size_ == size) { 134 head = *it; 135 break; 136 } 137 } 138 if (head) { 139 list.erase(it); 140 void *ptr = static_cast<void *>(head); 141 free_size_ -= size; 142 return GET_PTR(void, ptr, alignment_); 143 } 144 } 145 146 // No cached memory 147 size_t len = size + alignment_; 148 #if defined(WIN32) 149 ptr = _aligned_malloc(size, alignment_); 150 #else 151 int rc = ::posix_memalign(&ptr, alignment_, len); 152 if (rc != 0) 153 throw std::invalid_argument("Out of memory"); 154 #endif 155 header_t *head = static_cast<header_t *>(ptr); 156 head->size_ = size; 157 head->seq_ = seq_++; 158 alloc_size_ += size; 159 return GET_PTR(void, ptr, alignment_); 160 } 161 free(void * ptr)162 void free(void *ptr) { 163 std::lock_guard<std::mutex> lock(mutex_); 164 header_t *head = GET_PTR(header_t, ptr, -alignment_); 165 int idx = to_index(head->size_); 166 free_hashline_[idx].push_back(head); 167 free_size_ += head->size_; 168 } 169 170 private: to_index(size_t size)171 inline int to_index(size_t size) { 172 std::ostringstream os; 173 os << std::hex << "L" << size << "_"; 174 size_t hash = std::hash<std::string>{}(os.str()); 175 return hash % MAX_ENTRY; 176 } 177 178 typedef struct { 179 size_t size_; 180 int seq_; 181 } header_t; 182 183 static constexpr int MAX_ENTRY = 512; 184 185 size_t alloc_size_; 186 size_t free_size_; 187 const size_t alignment_; 188 std::list<header_t *> free_hashline_[MAX_ENTRY]; 189 std::mutex mutex_; 190 int seq_; 191 }; 192 193 scratch_allocator() = default; 194 195 template<class computation_t = void> get_mpool(void)196 static inline mpool *get_mpool(void) { 197 static std::shared_ptr<mpool> mpool_(new mpool()); 198 return mpool_.get(); 199 } 200 201 template<class computation_t = void> malloc(size_t size)202 static char *malloc(size_t size) { 203 if (!is_enabled()) 204 return static_cast<char *>(allocator::malloc(size)); 205 else 206 return static_cast<char *>(get_mpool<computation_t>()->malloc(size)); 207 } 208 209 template<class computation_t = void> free(void * p)210 static void free(void *p) { 211 if (!is_enabled()) 212 allocator::free(p); 213 else 214 get_mpool<computation_t>()->free(p); 215 } 216 217 template<class computation_t = void> 218 struct byte { 219 public: operator newideep::utils::scratch_allocator::byte220 static void *operator new(size_t sz) { 221 return (void *)malloc<computation_t>(sz); 222 } 223 operator new[]ideep::utils::scratch_allocator::byte224 static void *operator new[](size_t sz) { 225 return (void *)malloc<computation_t>(sz); 226 } 227 operator deleteideep::utils::scratch_allocator::byte228 static void operator delete(void *p) { free<computation_t>(p); } operator delete[]ideep::utils::scratch_allocator::byte229 static void operator delete[](void *p) { 230 free<computation_t>(p); 231 } 232 233 private: 234 char q; 235 }; 236 }; 237 } 238 } 239 240 #define SCRATCH_ALLOCATOR(computation_t) \ 241 ideep::utils::scratch_allocator, ideep::computation_t 242 243 #endif 244