1 /*
2    Copyright 2017 Skytechnology sp. z o.o.
3 
4    This file is part of LizardFS.
5 
6    LizardFS is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation, version 3.
9 
10    LizardFS is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with LizardFS. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "common/platform.h"
20 
21 #include <atomic>
22 
23 #include "common/attributes.h"
24 #include "common/shared_mutex.h"
25 #include "common/time_utils.h"
26 #include "mount/lizard_client_context.h"
27 #include "protocol/directory_entry.h"
28 
29 #include <boost/intrusive/list.hpp>
30 #include <boost/intrusive/set.hpp>
31 
32 /*! \brief Cache for directory entries
33  *
34  * Implementation of directory cache with following properties
35  *   - fast lookup by parent inode + entry name
36  *   - fast lookup by parent inode + entry index
37  *   - fast lookup by inode
38  *   - fast removal of oldest entries
39  *
40  * \warning Only explicitly specified methods are thread safe.
41  */
42 class DirEntryCache {
43 public:
44 	struct DirEntry {
DirEntryDirEntry45 		DirEntry(const LizardClient::Context &ctx, uint32_t parent_inode, uint32_t inode,
46 		         std::size_t index, std::string name, Attributes attr, uint64_t ts)
47 		    : uid(ctx.uid),
48 		      gid(ctx.gid),
49 		      parent_inode(parent_inode),
50 		      inode(inode),
51 		      index(index),
52 		      timestamp(ts),
53 		      name(name),
54 		      attr(attr) {
55 		}
56 
toStringDirEntry57 		std::string toString() const {
58 			return "Entry " + std::to_string(inode) + ": ctx=(" + std::to_string(uid) +
59 			       "," + std::to_string(gid) + ") parent_inode=" +
60 			       std::to_string(parent_inode) + ", index=" + std::to_string(index) +
61 			       ", timestamp=" + std::to_string(timestamp) + ", name=" + name;
62 		}
63 
64 		uint32_t uid;
65 		uint32_t gid;
66 		uint32_t parent_inode;
67 		uint32_t inode;
68 		std::size_t index;
69 		uint64_t timestamp;
70 		std::string name;
71 		Attributes attr;
72 
73 		boost::intrusive::set_member_hook<>
74 		        lookup_hook; /*!< For lookups (parent inode, name) */
75 		boost::intrusive::set_member_hook<>
76 		        index_hook; /*!< For getdir (parent inode, index) */
77 		boost::intrusive::set_member_hook<>
78 		        inode_hook; /*!< For extracting inode's attributes */
79 		boost::intrusive::list_member_hook<> fifo_hook; /*!< For removing oldest entries */
80 	};
81 
82 protected:
83 	struct LookupCompare {
operatorLookupCompare84 		bool operator()(const DirEntry &e1, const DirEntry &e2) const {
85 			return std::make_tuple(e1.parent_inode, e1.uid, e1.gid, e1.name) <
86 			       std::make_tuple(e2.parent_inode, e2.uid, e2.gid, e2.name);
87 		}
88 
operatorLookupCompare89 		bool operator()(const DirEntry &e,
90 		                const std::tuple<uint32_t, uint32_t, uint32_t, std::string>
91 		                        &lookup_info) const {
92 			return std::make_tuple(e.parent_inode, e.uid, e.gid, e.name) < lookup_info;
93 		}
94 
operatorLookupCompare95 		bool operator()(
96 		        const std::tuple<uint32_t, uint32_t, uint32_t, std::string> &lookup_info,
97 		        const DirEntry &e) const {
98 			return lookup_info < std::make_tuple(e.parent_inode, e.uid, e.gid, e.name);
99 		}
100 	};
101 
102 	struct IndexCompare {
operatorIndexCompare103 		bool operator()(const DirEntry &e1, const DirEntry &e2) const {
104 			return std::make_tuple(e1.parent_inode, e1.uid, e1.gid, e1.index) <
105 			       std::make_tuple(e2.parent_inode, e2.uid, e2.gid, e2.index);
106 		}
107 
operatorIndexCompare108 		bool operator()(const DirEntry &e,
109 		                const std::tuple<uint32_t, uint32_t, uint32_t, std::size_t>
110 		                        &index_info) const {
111 			return std::make_tuple(e.parent_inode, e.uid, e.gid, e.index) < index_info;
112 		}
113 
operatorIndexCompare114 		bool operator()(
115 		        const std::tuple<uint32_t, uint32_t, uint32_t, std::size_t> &index_info,
116 		        const DirEntry &e) const {
117 			return index_info < std::make_tuple(e.parent_inode, e.uid, e.gid, e.index);
118 		}
119 	};
120 
121 	struct InodeCompare {
operatorInodeCompare122 		bool operator()(const DirEntry &e1, const DirEntry &e2) const {
123 			return e1.inode < e2.inode;
124 		}
125 
operatorInodeCompare126 		bool operator()(const DirEntry &e, const uint32_t &inode) const {
127 			return e.inode < inode;
128 		}
129 
operatorInodeCompare130 		bool operator()(const uint32_t &inode, const DirEntry &e) const {
131 			return inode < e.inode;
132 		}
133 	};
134 
135 public:
136 	typedef boost::intrusive::set<
137 	        DirEntry,
138 	        boost::intrusive::member_hook<DirEntry, boost::intrusive::set_member_hook<>,
139 	                                      &DirEntry::lookup_hook>,
140 	        boost::intrusive::compare<LookupCompare>,
141 	        boost::intrusive::constant_time_size<true>>
142 	        LookupSet;
143 
144 	typedef boost::intrusive::set<
145 	        DirEntry,
146 	        boost::intrusive::member_hook<DirEntry, boost::intrusive::set_member_hook<>,
147 	                                      &DirEntry::index_hook>,
148 	        boost::intrusive::compare<IndexCompare>, boost::intrusive::constant_time_size<true>>
149 	        IndexSet;
150 
151 	typedef boost::intrusive::multiset<
152 	        DirEntry,
153 	        boost::intrusive::member_hook<DirEntry, boost::intrusive::set_member_hook<>,
154 	                                      &DirEntry::inode_hook>,
155 	        boost::intrusive::compare<InodeCompare>, boost::intrusive::constant_time_size<true>>
156 	        InodeMultiset;
157 
158 	typedef boost::intrusive::list<
159 	        DirEntry,
160 	        boost::intrusive::member_hook<DirEntry, boost::intrusive::list_member_hook<>,
161 	                                      &DirEntry::fifo_hook>,
162 	        boost::intrusive::constant_time_size<true>, boost::intrusive::cache_last<true>>
163 	        FifoList;
164 
165 	typedef shared_mutex SharedMutex;
166 
167 	/*! \brief Constructor.
168 	 *
169 	 * \param timeout    cache entry expiration timeout (us).
170 	 */
171 	DirEntryCache(uint64_t timeout = kDefaultTimeout_us)
timer_()172 	    : timer_(), current_time_(0), timeout_(timeout) {
173 	}
174 
~DirEntryCache()175 	~DirEntryCache() {
176 		auto it = fifo_list_.begin();
177 		while (it != fifo_list_.end()) {
178 			auto next_it = std::next(it);
179 			erase(std::addressof(*it));
180 			it = next_it;
181 		}
182 	}
183 
184 	/*! \brief Set cache entry expiration timeout (us).
185 	 *
186 	 * \param timeout    entry expiration timeout (us).
187 	 */
setTimeout(uint64_t timeout)188 	void setTimeout(uint64_t timeout) {
189 		timeout_ = timeout;
190 	}
191 
192 	/*! \brief Check if entry is valid (not expired). */
isValid(const IndexSet::iterator & index_it)193 	bool isValid(const IndexSet::iterator &index_it) const {
194 		return index_it != index_set_.end() && !expired(*index_it, current_time_);
195 	}
196 
197 	/*! \brief Check if entry is valid (not expired). */
isValid(const IndexSet::const_iterator & index_it)198 	bool isValid(const IndexSet::const_iterator &index_it) const {
199 		return index_it != index_set_.end() && !expired(*index_it, current_time_);
200 	}
201 
202 	/*! \brief Find directory entry in cache.
203 	 *
204 	 * \param ctx Process credentials.
205 	 * \param parent_inode Parent inode.
206 	 * \param name Directory entry name.
207 	 *
208 	 * \return Iterator to found entry.
209 	 */
find(const LizardClient::Context & ctx,uint32_t parent_inode,const std::string & name)210 	LookupSet::iterator find(const LizardClient::Context &ctx, uint32_t parent_inode,
211 	                         const std::string &name) {
212 		return lookup_set_.find(std::make_tuple(parent_inode, ctx.uid, ctx.gid, name),
213 		                        LookupCompare());
214 	}
215 
216 	/*! \brief Find directory entry in cache.
217 	 *
218 	 * \param ctx Process credentials.
219 	 * \param parent_inode Parent inode.
220 	 * \param index Entry index in directory.
221 	 *
222 	 * \return Iterator to found entry.
223 	 */
find(const LizardClient::Context & ctx,uint32_t parent_inode,size_t index)224 	IndexSet::iterator find(const LizardClient::Context &ctx, uint32_t parent_inode,
225 	                        size_t index) {
226 		return index_set_.find(std::make_tuple(parent_inode, ctx.uid, ctx.gid, index),
227 		                       IndexCompare());
228 	}
229 
230 	/*! \brief Find directory entry in cache.
231 	 *
232 	 * \param ctx Process credentials.
233 	 * \param inode Node inode.
234 	 *
235 	 * \return Iterator to found entry.
236 	 */
find(const LizardClient::Context & ctx,uint32_t inode)237 	InodeMultiset::iterator find(const LizardClient::Context &ctx, uint32_t inode) {
238 		auto it = inode_multiset_.lower_bound(inode, InodeCompare());
239 
240 		while (it != inode_multiset_.end() && it->inode == inode) {
241 			if (it->uid == ctx.uid && it->gid == ctx.gid) {
242 				return it;
243 			}
244 			it++;
245 		}
246 		return inode_multiset_.end();
247 	}
248 
249 	/*! \brief Get attributes of an inode.
250 	 *
251 	 * \warning This function takes read (shared) lock.
252 	 *
253 	 * \param ctx Process credentials.
254 	 * \param inode Node index (inode).
255 	 * \param attr Output: attributes of found inode.
256 	 *
257 	 * \return True if inode has been found in cache, false otherwise.
258 	 */
lookup(const LizardClient::Context & ctx,uint32_t inode,Attributes & attr)259 	bool lookup(const LizardClient::Context &ctx, uint32_t inode, Attributes &attr) {
260 		shared_lock<SharedMutex> guard(rwlock_);
261 		updateTime();
262 		auto it = find(ctx, inode);
263 		if (it == inode_multiset_.end() || expired(*it, current_time_) || it->inode == 0) {
264 			return false;
265 		}
266 		attr = it->attr;
267 		return true;
268 	}
269 
270 	/*! \brief Get attributes of directory entry.
271 	 *
272 	 * \warning This function takes read (shared) lock.
273 	 *
274 	 * \param ctx Process credentials.
275 	 * \param parent_inode Parent node index (inode).
276 	 * \param name Name of directory entry to find.
277 	 * \param inode Output: inode of directory entry.
278 	 * \param attr Output: attributes of found directory entry.
279 	 *
280 	 * \return True if inode has been found in cache, false otherwise.
281 	 */
lookup(const LizardClient::Context & ctx,uint32_t parent_inode,const std::string & name,uint32_t & inode,Attributes & attr)282 	bool lookup(const LizardClient::Context &ctx, uint32_t parent_inode,
283 	            const std::string &name, uint32_t &inode, Attributes &attr) {
284 		shared_lock<SharedMutex> guard(rwlock_);
285 		updateTime();
286 		auto it = find(ctx, parent_inode, name);
287 		if (it == lookup_set_.end() || expired(*it, current_time_) || it->inode == 0) {
288 			return false;
289 		}
290 		inode = it->inode;
291 		attr = it->attr;
292 		return true;
293 	}
294 
295 	/*! \brief Add directory entry information to cache.
296 	 *
297 	 * \param ctx Process credentials.
298 	 * \param parent_inode Parent node index (inode).
299 	 * \param inode Inode of directory entry.
300 	 * \param index Position of entry in directory listing.
301 	 * \param name Name of directory entry.
302 	 * \param attr attributes of found directory entry.
303 	 * \param timestamp Time when data has been obtained (used for entry timeout).
304 	 */
insert(const LizardClient::Context & ctx,uint32_t parent_inode,uint32_t inode,size_t index,const std::string name,const Attributes & attr,uint64_t timestamp)305 	void insert(const LizardClient::Context &ctx, uint32_t parent_inode, uint32_t inode,
306 	            size_t index, const std::string name, const Attributes &attr,
307 	            uint64_t timestamp) {
308 		// Avoid inserting stale data
309 		if (timestamp + timeout_ <= current_time_) {
310 			return;
311 		}
312 		removeExpired(1, timestamp);
313 		auto lookup_it = find(ctx, parent_inode, name);
314 		if (lookup_it != lookup_set_.end()) {
315 			erase(std::addressof(*lookup_it));
316 		}
317 		auto index_it = find(ctx, parent_inode, index);
318 		if (index_it != index_set_.end()) {
319 			erase(std::addressof(*index_it));
320 		}
321 		addEntry(ctx, parent_inode, inode, index, name, attr, timestamp);
322 	}
323 
324 	/*! \brief Add data to cache from container.
325 	 *
326 	 * \param ctx Process credentials.
327 	 * \param parent_inode Parent node index (inode).
328 	 * \param first_index Position of entry in directory listing.
329 	 * \param container Container with data to add to cache.
330 	 * \param timestamp Time when data has been obtained (used for entry timeout).
331 	 */
332 	template <typename Container>
insertSubsequent(const LizardClient::Context & ctx,uint32_t parent_inode,std::size_t first_index,const Container & container,uint64_t timestamp)333 	void insertSubsequent(const LizardClient::Context &ctx, uint32_t parent_inode,
334 	                      std::size_t first_index, const Container &container,
335 	                      uint64_t timestamp) {
336 		// Avoid inserting stale data
337 		if (timestamp + timeout_ <= current_time_) {
338 			return;
339 		}
340 		removeExpired(container.size(), timestamp);
341 		auto it = index_set_.lower_bound(
342 		        std::make_tuple(parent_inode, ctx.uid, ctx.gid, first_index),
343 		        IndexCompare());
344 		std::size_t current_index = first_index;
345 		for (const DirectoryEntry &de : container) {
346 			auto lookup_it = find(ctx, parent_inode, de.name);
347 			if (it == index_set_.end() ||
348 			    std::make_tuple(parent_inode, ctx.uid, ctx.gid) !=
349 			            std::make_tuple(it->parent_inode, it->uid, it->gid) ||
350 			    it->index != current_index) {
351 				if (lookup_it != lookup_end()) {
352 					erase(std::addressof(*lookup_it));
353 				}
354 				it = addEntry(ctx, parent_inode, de.inode, current_index, de.name,
355 				              de.attributes, timestamp);
356 			} else {
357 				if (lookup_it != lookup_end() && it != index_set_.iterator_to(*lookup_it)) {
358 					erase(std::addressof(*lookup_it));
359 				}
360 				overwriteEntry(*it, de, timestamp);
361 			}
362 			++it;
363 			++current_index;
364 		}
365 	}
366 
367 	/*! \brief Remove data from cache matching specified criteria.
368 	 *
369 	 * \param ctx Process credentials.
370 	 * \param parent_inode Parent node inode.
371 	 * \param first_index Directory index of first entry to remove.
372 	 */
373 	void invalidate(const LizardClient::Context &ctx, uint32_t parent_inode,
374 	                size_t first_index = 0) {
375 		auto it = index_set_.lower_bound(
376 		        std::make_tuple(parent_inode, ctx.uid, ctx.gid, first_index),
377 		        IndexCompare());
378 		while (it != index_set_.end() &&
379 		       std::make_tuple(parent_inode, ctx.uid, ctx.gid) ==
380 		               std::make_tuple(it->parent_inode, it->uid, it->gid)) {
381 			assert(it->index >= first_index);
382 
383 			DirEntry *entry = std::addressof(*it);
384 			++it;
385 			erase(entry);
386 		}
387 	}
388 
389 	/*! \brief Remove data from cache matching specified criteria.
390 	 *
391 	 * \warning This function takes write (unique) lock.
392 	 *
393 	 * \param inode Inode.
394 	 */
lockAndInvalidateInode(uint32_t inode)395 	void lockAndInvalidateInode(uint32_t inode) {
396 		std::unique_lock<SharedMutex> guard(rwlock_);
397 		auto it = inode_multiset_.find(inode, InodeCompare());
398 		while (it != inode_multiset_.end() && it->inode == inode) {
399 			DirEntry *entry = std::addressof(*it);
400 			++it;
401 			erase(entry);
402 		}
403 	}
404 
405 	/*! \brief Remove data from cache matching specified criteria.
406 	 *
407 	 * \warning This function takes write (unique) lock.
408 	 *
409 	 * \param parent_inode Parent inode.
410 	 */
lockAndInvalidateParent(uint32_t parent_inode)411 	void lockAndInvalidateParent(uint32_t parent_inode) {
412 		std::unique_lock<SharedMutex> guard(rwlock_);
413 		auto it = index_set_.lower_bound(std::make_tuple(parent_inode, 0, 0, 0),
414 		                                 IndexCompare());
415 		while (it != index_set_.end() && it->parent_inode == parent_inode) {
416 			DirEntry *entry = std::addressof(*it);
417 			++it;
418 			erase(entry);
419 		}
420 	}
421 
422 	/*! \brief Remove data from cache matching specified criteria.
423 	 *
424 	 * \warning This function takes write (unique) lock.
425 	 *
426 	 * \param ctx Process credentials.
427 	 * \param parent_inode Parent inode.
428 	 */
lockAndInvalidateParent(const LizardClient::Context & ctx,uint32_t parent_inode)429 	void lockAndInvalidateParent(const LizardClient::Context &ctx, uint32_t parent_inode) {
430 		std::unique_lock<SharedMutex> guard(rwlock_);
431 
432 		auto it = index_set_.lower_bound(std::make_tuple(parent_inode, ctx.uid, ctx.gid, 0),
433 		                                 IndexCompare());
434 		while (it != index_set_.end() &&
435 		       std::make_tuple(parent_inode, ctx.uid, ctx.gid) ==
436 		               std::make_tuple(it->parent_inode, it->uid, it->gid)) {
437 			DirEntry *entry = std::addressof(*it);
438 			++it;
439 			erase(entry);
440 		}
441 	}
442 
lookup_begin()443 	LookupSet::const_iterator lookup_begin() const {
444 		return lookup_set_.begin();
445 	}
446 
index_begin()447 	IndexSet::const_iterator index_begin() const {
448 		return index_set_.begin();
449 	}
450 
inode_begin()451 	InodeMultiset::const_iterator inode_begin() const {
452 		return inode_multiset_.begin();
453 	}
454 
lookup_end()455 	LookupSet::const_iterator lookup_end() const {
456 		return lookup_set_.end();
457 	}
458 
index_end()459 	IndexSet::const_iterator index_end() const {
460 		return index_set_.end();
461 	}
462 
inode_end()463 	InodeMultiset::const_iterator inode_end() const {
464 		return inode_multiset_.end();
465 	}
466 
467 	/*! \brief Get number of elements in cache. */
size()468 	size_t size() const {
469 		return lookup_set_.size();
470 	}
471 
472 	/*! \brief Get reference to reader-writer (shared) mutex. */
rwlock()473 	SharedMutex &rwlock() {
474 		return rwlock_;
475 	}
476 
477 	/*! \brief Remove expired elements from cache.
478 	 *
479 	 * \param max_to_remove Limit on number of removed entries.
480 	 * \param timestamp Current time.
481 	 */
removeExpired(int max_to_remove,uint64_t timestamp)482 	void removeExpired(int max_to_remove, uint64_t timestamp) {
483 		int i = 0;
484 		while (!fifo_list_.empty()) {
485 			if (!expired(fifo_list_.front(), timestamp)) {
486 				break;
487 			}
488 			if (i >= max_to_remove) {
489 				break;
490 			}
491 			DirEntry *entry = std::addressof(fifo_list_.front());
492 			erase(entry);
493 			++i;
494 		}
495 	}
496 
497 	/*! \brief Remove expired elements from cache.
498 	 *
499 	 * \param max_to_remove Limit on number of removed entries.
500 	 */
removeExpired(int max_to_remove)501 	void removeExpired(int max_to_remove) {
502 		removeExpired(max_to_remove, current_time_);
503 	}
504 
505 	/*! \brief Remove oldest elements from cache.
506 	 *
507 	 * \param count Number of entries to remove.
508 	 */
removeOldest(std::size_t count)509 	void removeOldest(std::size_t count) {
510 		for(std::size_t i = 0; i < count && !fifo_list_.empty(); ++i) {
511 			DirEntry *entry = std::addressof(fifo_list_.front());
512 			erase(entry);
513 		}
514 	}
515 
516 	/*! \brief Update internal time to wall time.
517 	 *
518 	 * \return Current internal time.
519 	 */
updateTime()520 	uint64_t updateTime() {
521 		current_time_ = timer_.elapsed_us();
522 		return current_time_;
523 	}
524 
525 protected:
erase(DirEntry * entry)526 	void erase(DirEntry *entry) {
527 		lookup_set_.erase(lookup_set_.iterator_to(*entry));
528 		index_set_.erase(index_set_.iterator_to(*entry));
529 		inode_multiset_.erase(inode_multiset_.iterator_to(*entry));
530 		fifo_list_.erase(fifo_list_.iterator_to(*entry));
531 		delete entry;
532 	}
533 
expired(const DirEntry & entry,uint64_t timestamp)534 	bool expired(const DirEntry &entry, uint64_t timestamp) const {
535 		return entry.timestamp + timeout_ <= timestamp;
536 	}
537 
overwriteEntry(DirEntry & entry,DirectoryEntry de,uint64_t timestamp)538 	void overwriteEntry(DirEntry &entry, DirectoryEntry de, uint64_t timestamp) {
539 		if (entry.inode != de.inode) {
540 			inode_multiset_.erase(inode_multiset_.iterator_to(entry));
541 			entry.inode = de.inode;
542 			inode_multiset_.insert(entry);
543 		}
544 
545 		if (entry.name != de.name) {
546 			lookup_set_.erase(lookup_set_.iterator_to(entry));
547 			entry.name = de.name;
548 			lookup_set_.insert(entry);
549 		}
550 
551 		fifo_list_.erase(fifo_list_.iterator_to(entry));
552 		fifo_list_.push_back(entry);
553 		entry.timestamp = timestamp;
554 		entry.attr = de.attributes;
555 	}
556 
addEntry(const LizardClient::Context & ctx,uint32_t parent_inode,uint32_t inode,size_t index,std::string name,Attributes attr,uint64_t timestamp)557 	IndexSet::iterator addEntry(const LizardClient::Context &ctx, uint32_t parent_inode,
558 	                            uint32_t inode, size_t index, std::string name, Attributes attr,
559 	                            uint64_t timestamp) {
560 		DirEntry *entry =
561 		        new DirEntry(ctx, parent_inode, inode, index, name, attr, timestamp);
562 		lookup_set_.insert(*entry);
563 		auto result = index_set_.insert(*entry);
564 		inode_multiset_.insert(*entry);
565 		fifo_list_.push_back(*entry);
566 		return result.first;
567 	}
568 
569 	Timer timer_;
570 	std::atomic<uint64_t> current_time_;
571 	uint64_t timeout_;
572 	LookupSet lookup_set_;
573 	IndexSet index_set_;
574 	InodeMultiset inode_multiset_;
575 	FifoList fifo_list_;
576 	SharedMutex rwlock_;
577 
578 	static const int kDefaultTimeout_us = 500000;
579 };
580