1 /* 2 Copyright 2017 Skytechnology sp. z o.o. 3 4 This file is part of LizardFS. 5 6 LizardFS is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation, version 3. 9 10 LizardFS is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with LizardFS. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "common/platform.h" 20 21 #include <atomic> 22 23 #include "common/attributes.h" 24 #include "common/shared_mutex.h" 25 #include "common/time_utils.h" 26 #include "mount/lizard_client_context.h" 27 #include "protocol/directory_entry.h" 28 29 #include <boost/intrusive/list.hpp> 30 #include <boost/intrusive/set.hpp> 31 32 /*! \brief Cache for directory entries 33 * 34 * Implementation of directory cache with following properties 35 * - fast lookup by parent inode + entry name 36 * - fast lookup by parent inode + entry index 37 * - fast lookup by inode 38 * - fast removal of oldest entries 39 * 40 * \warning Only explicitly specified methods are thread safe. 41 */ 42 class DirEntryCache { 43 public: 44 struct DirEntry { DirEntryDirEntry45 DirEntry(const LizardClient::Context &ctx, uint32_t parent_inode, uint32_t inode, 46 std::size_t index, std::string name, Attributes attr, uint64_t ts) 47 : uid(ctx.uid), 48 gid(ctx.gid), 49 parent_inode(parent_inode), 50 inode(inode), 51 index(index), 52 timestamp(ts), 53 name(name), 54 attr(attr) { 55 } 56 toStringDirEntry57 std::string toString() const { 58 return "Entry " + std::to_string(inode) + ": ctx=(" + std::to_string(uid) + 59 "," + std::to_string(gid) + ") parent_inode=" + 60 std::to_string(parent_inode) + ", index=" + std::to_string(index) + 61 ", timestamp=" + std::to_string(timestamp) + ", name=" + name; 62 } 63 64 uint32_t uid; 65 uint32_t gid; 66 uint32_t parent_inode; 67 uint32_t inode; 68 std::size_t index; 69 uint64_t timestamp; 70 std::string name; 71 Attributes attr; 72 73 boost::intrusive::set_member_hook<> 74 lookup_hook; /*!< For lookups (parent inode, name) */ 75 boost::intrusive::set_member_hook<> 76 index_hook; /*!< For getdir (parent inode, index) */ 77 boost::intrusive::set_member_hook<> 78 inode_hook; /*!< For extracting inode's attributes */ 79 boost::intrusive::list_member_hook<> fifo_hook; /*!< For removing oldest entries */ 80 }; 81 82 protected: 83 struct LookupCompare { operatorLookupCompare84 bool operator()(const DirEntry &e1, const DirEntry &e2) const { 85 return std::make_tuple(e1.parent_inode, e1.uid, e1.gid, e1.name) < 86 std::make_tuple(e2.parent_inode, e2.uid, e2.gid, e2.name); 87 } 88 operatorLookupCompare89 bool operator()(const DirEntry &e, 90 const std::tuple<uint32_t, uint32_t, uint32_t, std::string> 91 &lookup_info) const { 92 return std::make_tuple(e.parent_inode, e.uid, e.gid, e.name) < lookup_info; 93 } 94 operatorLookupCompare95 bool operator()( 96 const std::tuple<uint32_t, uint32_t, uint32_t, std::string> &lookup_info, 97 const DirEntry &e) const { 98 return lookup_info < std::make_tuple(e.parent_inode, e.uid, e.gid, e.name); 99 } 100 }; 101 102 struct IndexCompare { operatorIndexCompare103 bool operator()(const DirEntry &e1, const DirEntry &e2) const { 104 return std::make_tuple(e1.parent_inode, e1.uid, e1.gid, e1.index) < 105 std::make_tuple(e2.parent_inode, e2.uid, e2.gid, e2.index); 106 } 107 operatorIndexCompare108 bool operator()(const DirEntry &e, 109 const std::tuple<uint32_t, uint32_t, uint32_t, std::size_t> 110 &index_info) const { 111 return std::make_tuple(e.parent_inode, e.uid, e.gid, e.index) < index_info; 112 } 113 operatorIndexCompare114 bool operator()( 115 const std::tuple<uint32_t, uint32_t, uint32_t, std::size_t> &index_info, 116 const DirEntry &e) const { 117 return index_info < std::make_tuple(e.parent_inode, e.uid, e.gid, e.index); 118 } 119 }; 120 121 struct InodeCompare { operatorInodeCompare122 bool operator()(const DirEntry &e1, const DirEntry &e2) const { 123 return e1.inode < e2.inode; 124 } 125 operatorInodeCompare126 bool operator()(const DirEntry &e, const uint32_t &inode) const { 127 return e.inode < inode; 128 } 129 operatorInodeCompare130 bool operator()(const uint32_t &inode, const DirEntry &e) const { 131 return inode < e.inode; 132 } 133 }; 134 135 public: 136 typedef boost::intrusive::set< 137 DirEntry, 138 boost::intrusive::member_hook<DirEntry, boost::intrusive::set_member_hook<>, 139 &DirEntry::lookup_hook>, 140 boost::intrusive::compare<LookupCompare>, 141 boost::intrusive::constant_time_size<true>> 142 LookupSet; 143 144 typedef boost::intrusive::set< 145 DirEntry, 146 boost::intrusive::member_hook<DirEntry, boost::intrusive::set_member_hook<>, 147 &DirEntry::index_hook>, 148 boost::intrusive::compare<IndexCompare>, boost::intrusive::constant_time_size<true>> 149 IndexSet; 150 151 typedef boost::intrusive::multiset< 152 DirEntry, 153 boost::intrusive::member_hook<DirEntry, boost::intrusive::set_member_hook<>, 154 &DirEntry::inode_hook>, 155 boost::intrusive::compare<InodeCompare>, boost::intrusive::constant_time_size<true>> 156 InodeMultiset; 157 158 typedef boost::intrusive::list< 159 DirEntry, 160 boost::intrusive::member_hook<DirEntry, boost::intrusive::list_member_hook<>, 161 &DirEntry::fifo_hook>, 162 boost::intrusive::constant_time_size<true>, boost::intrusive::cache_last<true>> 163 FifoList; 164 165 typedef shared_mutex SharedMutex; 166 167 /*! \brief Constructor. 168 * 169 * \param timeout cache entry expiration timeout (us). 170 */ 171 DirEntryCache(uint64_t timeout = kDefaultTimeout_us) timer_()172 : timer_(), current_time_(0), timeout_(timeout) { 173 } 174 ~DirEntryCache()175 ~DirEntryCache() { 176 auto it = fifo_list_.begin(); 177 while (it != fifo_list_.end()) { 178 auto next_it = std::next(it); 179 erase(std::addressof(*it)); 180 it = next_it; 181 } 182 } 183 184 /*! \brief Set cache entry expiration timeout (us). 185 * 186 * \param timeout entry expiration timeout (us). 187 */ setTimeout(uint64_t timeout)188 void setTimeout(uint64_t timeout) { 189 timeout_ = timeout; 190 } 191 192 /*! \brief Check if entry is valid (not expired). */ isValid(const IndexSet::iterator & index_it)193 bool isValid(const IndexSet::iterator &index_it) const { 194 return index_it != index_set_.end() && !expired(*index_it, current_time_); 195 } 196 197 /*! \brief Check if entry is valid (not expired). */ isValid(const IndexSet::const_iterator & index_it)198 bool isValid(const IndexSet::const_iterator &index_it) const { 199 return index_it != index_set_.end() && !expired(*index_it, current_time_); 200 } 201 202 /*! \brief Find directory entry in cache. 203 * 204 * \param ctx Process credentials. 205 * \param parent_inode Parent inode. 206 * \param name Directory entry name. 207 * 208 * \return Iterator to found entry. 209 */ find(const LizardClient::Context & ctx,uint32_t parent_inode,const std::string & name)210 LookupSet::iterator find(const LizardClient::Context &ctx, uint32_t parent_inode, 211 const std::string &name) { 212 return lookup_set_.find(std::make_tuple(parent_inode, ctx.uid, ctx.gid, name), 213 LookupCompare()); 214 } 215 216 /*! \brief Find directory entry in cache. 217 * 218 * \param ctx Process credentials. 219 * \param parent_inode Parent inode. 220 * \param index Entry index in directory. 221 * 222 * \return Iterator to found entry. 223 */ find(const LizardClient::Context & ctx,uint32_t parent_inode,size_t index)224 IndexSet::iterator find(const LizardClient::Context &ctx, uint32_t parent_inode, 225 size_t index) { 226 return index_set_.find(std::make_tuple(parent_inode, ctx.uid, ctx.gid, index), 227 IndexCompare()); 228 } 229 230 /*! \brief Find directory entry in cache. 231 * 232 * \param ctx Process credentials. 233 * \param inode Node inode. 234 * 235 * \return Iterator to found entry. 236 */ find(const LizardClient::Context & ctx,uint32_t inode)237 InodeMultiset::iterator find(const LizardClient::Context &ctx, uint32_t inode) { 238 auto it = inode_multiset_.lower_bound(inode, InodeCompare()); 239 240 while (it != inode_multiset_.end() && it->inode == inode) { 241 if (it->uid == ctx.uid && it->gid == ctx.gid) { 242 return it; 243 } 244 it++; 245 } 246 return inode_multiset_.end(); 247 } 248 249 /*! \brief Get attributes of an inode. 250 * 251 * \warning This function takes read (shared) lock. 252 * 253 * \param ctx Process credentials. 254 * \param inode Node index (inode). 255 * \param attr Output: attributes of found inode. 256 * 257 * \return True if inode has been found in cache, false otherwise. 258 */ lookup(const LizardClient::Context & ctx,uint32_t inode,Attributes & attr)259 bool lookup(const LizardClient::Context &ctx, uint32_t inode, Attributes &attr) { 260 shared_lock<SharedMutex> guard(rwlock_); 261 updateTime(); 262 auto it = find(ctx, inode); 263 if (it == inode_multiset_.end() || expired(*it, current_time_) || it->inode == 0) { 264 return false; 265 } 266 attr = it->attr; 267 return true; 268 } 269 270 /*! \brief Get attributes of directory entry. 271 * 272 * \warning This function takes read (shared) lock. 273 * 274 * \param ctx Process credentials. 275 * \param parent_inode Parent node index (inode). 276 * \param name Name of directory entry to find. 277 * \param inode Output: inode of directory entry. 278 * \param attr Output: attributes of found directory entry. 279 * 280 * \return True if inode has been found in cache, false otherwise. 281 */ lookup(const LizardClient::Context & ctx,uint32_t parent_inode,const std::string & name,uint32_t & inode,Attributes & attr)282 bool lookup(const LizardClient::Context &ctx, uint32_t parent_inode, 283 const std::string &name, uint32_t &inode, Attributes &attr) { 284 shared_lock<SharedMutex> guard(rwlock_); 285 updateTime(); 286 auto it = find(ctx, parent_inode, name); 287 if (it == lookup_set_.end() || expired(*it, current_time_) || it->inode == 0) { 288 return false; 289 } 290 inode = it->inode; 291 attr = it->attr; 292 return true; 293 } 294 295 /*! \brief Add directory entry information to cache. 296 * 297 * \param ctx Process credentials. 298 * \param parent_inode Parent node index (inode). 299 * \param inode Inode of directory entry. 300 * \param index Position of entry in directory listing. 301 * \param name Name of directory entry. 302 * \param attr attributes of found directory entry. 303 * \param timestamp Time when data has been obtained (used for entry timeout). 304 */ insert(const LizardClient::Context & ctx,uint32_t parent_inode,uint32_t inode,size_t index,const std::string name,const Attributes & attr,uint64_t timestamp)305 void insert(const LizardClient::Context &ctx, uint32_t parent_inode, uint32_t inode, 306 size_t index, const std::string name, const Attributes &attr, 307 uint64_t timestamp) { 308 // Avoid inserting stale data 309 if (timestamp + timeout_ <= current_time_) { 310 return; 311 } 312 removeExpired(1, timestamp); 313 auto lookup_it = find(ctx, parent_inode, name); 314 if (lookup_it != lookup_set_.end()) { 315 erase(std::addressof(*lookup_it)); 316 } 317 auto index_it = find(ctx, parent_inode, index); 318 if (index_it != index_set_.end()) { 319 erase(std::addressof(*index_it)); 320 } 321 addEntry(ctx, parent_inode, inode, index, name, attr, timestamp); 322 } 323 324 /*! \brief Add data to cache from container. 325 * 326 * \param ctx Process credentials. 327 * \param parent_inode Parent node index (inode). 328 * \param first_index Position of entry in directory listing. 329 * \param container Container with data to add to cache. 330 * \param timestamp Time when data has been obtained (used for entry timeout). 331 */ 332 template <typename Container> insertSubsequent(const LizardClient::Context & ctx,uint32_t parent_inode,std::size_t first_index,const Container & container,uint64_t timestamp)333 void insertSubsequent(const LizardClient::Context &ctx, uint32_t parent_inode, 334 std::size_t first_index, const Container &container, 335 uint64_t timestamp) { 336 // Avoid inserting stale data 337 if (timestamp + timeout_ <= current_time_) { 338 return; 339 } 340 removeExpired(container.size(), timestamp); 341 auto it = index_set_.lower_bound( 342 std::make_tuple(parent_inode, ctx.uid, ctx.gid, first_index), 343 IndexCompare()); 344 std::size_t current_index = first_index; 345 for (const DirectoryEntry &de : container) { 346 auto lookup_it = find(ctx, parent_inode, de.name); 347 if (it == index_set_.end() || 348 std::make_tuple(parent_inode, ctx.uid, ctx.gid) != 349 std::make_tuple(it->parent_inode, it->uid, it->gid) || 350 it->index != current_index) { 351 if (lookup_it != lookup_end()) { 352 erase(std::addressof(*lookup_it)); 353 } 354 it = addEntry(ctx, parent_inode, de.inode, current_index, de.name, 355 de.attributes, timestamp); 356 } else { 357 if (lookup_it != lookup_end() && it != index_set_.iterator_to(*lookup_it)) { 358 erase(std::addressof(*lookup_it)); 359 } 360 overwriteEntry(*it, de, timestamp); 361 } 362 ++it; 363 ++current_index; 364 } 365 } 366 367 /*! \brief Remove data from cache matching specified criteria. 368 * 369 * \param ctx Process credentials. 370 * \param parent_inode Parent node inode. 371 * \param first_index Directory index of first entry to remove. 372 */ 373 void invalidate(const LizardClient::Context &ctx, uint32_t parent_inode, 374 size_t first_index = 0) { 375 auto it = index_set_.lower_bound( 376 std::make_tuple(parent_inode, ctx.uid, ctx.gid, first_index), 377 IndexCompare()); 378 while (it != index_set_.end() && 379 std::make_tuple(parent_inode, ctx.uid, ctx.gid) == 380 std::make_tuple(it->parent_inode, it->uid, it->gid)) { 381 assert(it->index >= first_index); 382 383 DirEntry *entry = std::addressof(*it); 384 ++it; 385 erase(entry); 386 } 387 } 388 389 /*! \brief Remove data from cache matching specified criteria. 390 * 391 * \warning This function takes write (unique) lock. 392 * 393 * \param inode Inode. 394 */ lockAndInvalidateInode(uint32_t inode)395 void lockAndInvalidateInode(uint32_t inode) { 396 std::unique_lock<SharedMutex> guard(rwlock_); 397 auto it = inode_multiset_.find(inode, InodeCompare()); 398 while (it != inode_multiset_.end() && it->inode == inode) { 399 DirEntry *entry = std::addressof(*it); 400 ++it; 401 erase(entry); 402 } 403 } 404 405 /*! \brief Remove data from cache matching specified criteria. 406 * 407 * \warning This function takes write (unique) lock. 408 * 409 * \param parent_inode Parent inode. 410 */ lockAndInvalidateParent(uint32_t parent_inode)411 void lockAndInvalidateParent(uint32_t parent_inode) { 412 std::unique_lock<SharedMutex> guard(rwlock_); 413 auto it = index_set_.lower_bound(std::make_tuple(parent_inode, 0, 0, 0), 414 IndexCompare()); 415 while (it != index_set_.end() && it->parent_inode == parent_inode) { 416 DirEntry *entry = std::addressof(*it); 417 ++it; 418 erase(entry); 419 } 420 } 421 422 /*! \brief Remove data from cache matching specified criteria. 423 * 424 * \warning This function takes write (unique) lock. 425 * 426 * \param ctx Process credentials. 427 * \param parent_inode Parent inode. 428 */ lockAndInvalidateParent(const LizardClient::Context & ctx,uint32_t parent_inode)429 void lockAndInvalidateParent(const LizardClient::Context &ctx, uint32_t parent_inode) { 430 std::unique_lock<SharedMutex> guard(rwlock_); 431 432 auto it = index_set_.lower_bound(std::make_tuple(parent_inode, ctx.uid, ctx.gid, 0), 433 IndexCompare()); 434 while (it != index_set_.end() && 435 std::make_tuple(parent_inode, ctx.uid, ctx.gid) == 436 std::make_tuple(it->parent_inode, it->uid, it->gid)) { 437 DirEntry *entry = std::addressof(*it); 438 ++it; 439 erase(entry); 440 } 441 } 442 lookup_begin()443 LookupSet::const_iterator lookup_begin() const { 444 return lookup_set_.begin(); 445 } 446 index_begin()447 IndexSet::const_iterator index_begin() const { 448 return index_set_.begin(); 449 } 450 inode_begin()451 InodeMultiset::const_iterator inode_begin() const { 452 return inode_multiset_.begin(); 453 } 454 lookup_end()455 LookupSet::const_iterator lookup_end() const { 456 return lookup_set_.end(); 457 } 458 index_end()459 IndexSet::const_iterator index_end() const { 460 return index_set_.end(); 461 } 462 inode_end()463 InodeMultiset::const_iterator inode_end() const { 464 return inode_multiset_.end(); 465 } 466 467 /*! \brief Get number of elements in cache. */ size()468 size_t size() const { 469 return lookup_set_.size(); 470 } 471 472 /*! \brief Get reference to reader-writer (shared) mutex. */ rwlock()473 SharedMutex &rwlock() { 474 return rwlock_; 475 } 476 477 /*! \brief Remove expired elements from cache. 478 * 479 * \param max_to_remove Limit on number of removed entries. 480 * \param timestamp Current time. 481 */ removeExpired(int max_to_remove,uint64_t timestamp)482 void removeExpired(int max_to_remove, uint64_t timestamp) { 483 int i = 0; 484 while (!fifo_list_.empty()) { 485 if (!expired(fifo_list_.front(), timestamp)) { 486 break; 487 } 488 if (i >= max_to_remove) { 489 break; 490 } 491 DirEntry *entry = std::addressof(fifo_list_.front()); 492 erase(entry); 493 ++i; 494 } 495 } 496 497 /*! \brief Remove expired elements from cache. 498 * 499 * \param max_to_remove Limit on number of removed entries. 500 */ removeExpired(int max_to_remove)501 void removeExpired(int max_to_remove) { 502 removeExpired(max_to_remove, current_time_); 503 } 504 505 /*! \brief Remove oldest elements from cache. 506 * 507 * \param count Number of entries to remove. 508 */ removeOldest(std::size_t count)509 void removeOldest(std::size_t count) { 510 for(std::size_t i = 0; i < count && !fifo_list_.empty(); ++i) { 511 DirEntry *entry = std::addressof(fifo_list_.front()); 512 erase(entry); 513 } 514 } 515 516 /*! \brief Update internal time to wall time. 517 * 518 * \return Current internal time. 519 */ updateTime()520 uint64_t updateTime() { 521 current_time_ = timer_.elapsed_us(); 522 return current_time_; 523 } 524 525 protected: erase(DirEntry * entry)526 void erase(DirEntry *entry) { 527 lookup_set_.erase(lookup_set_.iterator_to(*entry)); 528 index_set_.erase(index_set_.iterator_to(*entry)); 529 inode_multiset_.erase(inode_multiset_.iterator_to(*entry)); 530 fifo_list_.erase(fifo_list_.iterator_to(*entry)); 531 delete entry; 532 } 533 expired(const DirEntry & entry,uint64_t timestamp)534 bool expired(const DirEntry &entry, uint64_t timestamp) const { 535 return entry.timestamp + timeout_ <= timestamp; 536 } 537 overwriteEntry(DirEntry & entry,DirectoryEntry de,uint64_t timestamp)538 void overwriteEntry(DirEntry &entry, DirectoryEntry de, uint64_t timestamp) { 539 if (entry.inode != de.inode) { 540 inode_multiset_.erase(inode_multiset_.iterator_to(entry)); 541 entry.inode = de.inode; 542 inode_multiset_.insert(entry); 543 } 544 545 if (entry.name != de.name) { 546 lookup_set_.erase(lookup_set_.iterator_to(entry)); 547 entry.name = de.name; 548 lookup_set_.insert(entry); 549 } 550 551 fifo_list_.erase(fifo_list_.iterator_to(entry)); 552 fifo_list_.push_back(entry); 553 entry.timestamp = timestamp; 554 entry.attr = de.attributes; 555 } 556 addEntry(const LizardClient::Context & ctx,uint32_t parent_inode,uint32_t inode,size_t index,std::string name,Attributes attr,uint64_t timestamp)557 IndexSet::iterator addEntry(const LizardClient::Context &ctx, uint32_t parent_inode, 558 uint32_t inode, size_t index, std::string name, Attributes attr, 559 uint64_t timestamp) { 560 DirEntry *entry = 561 new DirEntry(ctx, parent_inode, inode, index, name, attr, timestamp); 562 lookup_set_.insert(*entry); 563 auto result = index_set_.insert(*entry); 564 inode_multiset_.insert(*entry); 565 fifo_list_.push_back(*entry); 566 return result.first; 567 } 568 569 Timer timer_; 570 std::atomic<uint64_t> current_time_; 571 uint64_t timeout_; 572 LookupSet lookup_set_; 573 IndexSet index_set_; 574 InodeMultiset inode_multiset_; 575 FifoList fifo_list_; 576 SharedMutex rwlock_; 577 578 static const int kDefaultTimeout_us = 500000; 579 }; 580